feat: drop embedded webui (#27)

Signed-off-by: mudler <mudler@c3os.io>
2 years ago · 0b330d90ad
parent 63601fabd1
commit 0b330d90ad
3 changed files with 3 additions and 148 deletions
--- a/README.md
+++ b/README.md
@ -38,7 +38,9 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
 ```
-Note: You can use a default template for every model in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
+Note: The API doesn't inject a default prompt for talking to the model, while the CLI does. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
 You can use a default template for every model present in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
 ```
 Below is an instruction that describes a task. Write a response that appropriately completes the request.
@ -187,22 +189,6 @@ You can list all the models available with:
 curl http://localhost:8080/v1/models
 ```
 ## Web interface
 There is also available a simple web interface (for instance, http://localhost:8080/) which can be used as a playground.
 Note: The API doesn't inject a template for talking to the instance, while the CLI does. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release, for instance:
 ```
 Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
 {instruction}
 ### Response:
 ```
 ## Using other models
 gpt4all (https://github.com/nomic-ai/gpt4all) works as well, however the original model needs to be converted (same applies for old alpaca models, too):
--- a/api/api.go
+++ b/api/api.go
@ -1,9 +1,7 @@
 package api
 import (
 	"embed"
 	"fmt"
 	"net/http"
 	"strings"
 	"sync"
@ -12,7 +10,6 @@ import (
 	llama "github.com/go-skynet/go-llama.cpp"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
 	"github.com/gofiber/fiber/v2/middleware/filesystem"
 	"github.com/gofiber/fiber/v2/middleware/recover"
 )
@ -65,9 +62,6 @@ type OpenAIRequest struct {
 	IgnoreEOS bool `json:"ignore_eos"`
 }
 //go:embed index.html
 var indexHTML embed.FS
 // https://platform.openai.com/docs/api-reference/completions
 func openAIEndpoint(chat bool, loader *model.ModelLoader, threads int, defaultMutex *sync.Mutex, mutexMap *sync.Mutex, mutexes map[string]*sync.Mutex) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
@ -234,11 +228,6 @@ func Start(loader *model.ModelLoader, listenAddr string, threads int) error {
 		})
 	})
 	app.Use("/", filesystem.New(filesystem.Config{
 		Root:         http.FS(indexHTML),
 		NotFoundFile: "index.html",
 	}))
 	// Start the server
 	app.Listen(listenAddr)
 	return nil
--- a/api/index.html
+++ b/api/index.html
@ -1,120 +0,0 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>llama-cli</title>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css" crossorigin="anonymous" referrerpolicy="no-referrer" />
    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css">
 </head>
 <style>
    @keyframes rotating {
    from {
        transform: rotate(0deg);
    }
    to {
        transform: rotate(360deg);
    }
 }
 .waiting {
    animation: rotating 1s linear infinite;
 }
 </style>
 <body>
 <div class="container mt-5" x-data="{ templates:[
    {
      name: 'Alpaca: Instruction without input',
      text: `Below is an instruction that describes a task. Write a response that appropriately completes the request.
 ### Instruction:
 {{.Instruction}}
 ### Response:`,
    },
    {
      name: 'Alpaca: Instruction with input',
      text: `Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
 {{.Instruction}}
 ### Input:
 {{.Input}}
 ### Response:`,
    }
  ], selectedTemplate: '', selectedTemplateText: '' }">
    <h1>llama-cli API</h1>
    <div class="form-group">
        <label for="inputText">Input Text:</label>
        <textarea class="form-control" id="inputText" rows="6" placeholder="Your text input here..." x-text="selectedTemplateText"></textarea>
    </div>
    <div class="form-group">
        <label for="templateSelect">Select Template:</label>
        <select class="form-control" id="templateSelect" x-model="selectedTemplateText">
            <option value="">None</option>
            <template x-for="(template, index) in templates" :key="index">
                <option :value="template.text" x-text="template.name"></option>
            </template>
        </select>
    </div>
    <div class="form-group">
        <label for="topP">Top P:</label>
        <input type="range" step="0.01" min="0" max="1" class="form-control" id="topP" value="0.20" name="topP" onchange="this.nextElementSibling.value = this.value" required>
        <output>0.20</output>
    </div>
    <div class="form-group">
        <label for="topK">Top K:</label>
        <input type="number" class="form-control" id="topK" value="10000" name="topK"  required>
    </div>
    <div class="form-group">
        <label for="temperature">Temperature:</label>
        <input type="range" step="0.01" min="0" max="1" value="0.9" class="form-control" id="temperature" name="temperature" onchange="this.nextElementSibling.value = this.value"  required>
        <output>0.9</output>
    </div>
    <div class="form-group">
        <label for="tokens">Tokens:</label>
        <input type="number" class="form-control" id="tokens" name="tokens" value="128" required>
    </div>
    <button class="btn btn-primary" x-on:click="submitRequest()">Submit <i class="fas fa-paper-plane"></i></button>
    <hr>
    <div class="form-group">
        <label for="outputText">Output Text:</label>
        <textarea class="form-control" id="outputText" rows="5" readonly></textarea>
    </div>
 </div>
 <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
 <script>
    function submitRequest() {
        var button = document.querySelector("i.fa-paper-plane");
        button.classList.add("waiting");
        var text = document.getElementById("inputText").value;
        var url = "/predict";
        var data = {
            "text": text,
            "topP": document.getElementById("topP").value,
            "topK": document.getElementById("topK").value,
            "temperature": document.getElementById("temperature").value,
            "tokens": document.getElementById("tokens").value
        };
        fetch(url, {
            method: "POST",
            headers: {
                "Content-Type": "application/json"
            },
            body: JSON.stringify(data)
        })
        .then(response => response.json())
        .then(data => {
            document.getElementById("outputText").value = data.prediction;
            button.classList.remove("waiting");
        })
        .catch(error => { console.error(error); button.classList.remove("waiting"); });
    }
 </script>
 </body>
 </html>