examples(telegram): add

examples(flowise): add
examples: use gallery in chatbot-ui, add flowise
155 changed files with 2839 additions and 10664 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,4 +1,3 @@
 .idea
 models
 examples/chatbot-ui/models
 examples/rwkv/models
--- a/.env
+++ b/.env
@ -7,33 +7,20 @@
 ## Default models context size
 # CONTEXT_SIZE=512
 #
 ## Define galleries.
 ## models will to install will be visible in `/models/available`
 # GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}]
 ## CORS settings
 # CORS=true
 # CORS_ALLOW_ORIGINS=*
 ## Default path for models
 #
 MODELS_PATH=/models
 ## Enable debug mode
 # DEBUG=true
-## Specify a build type. Available: cublas, openblas, clblas.
+## Specify a build type. Available: cublas, openblas.
 # BUILD_TYPE=openblas
-## Uncomment and set to true to enable rebuilding from source
+## Uncomment and set to false to disable rebuilding from source
-# REBUILD=true
+# REBUILD=false
-## Enable go tags, available: stablediffusion, tts
+## Enable image generation with stablediffusion (requires REBUILD=true)
 ## stablediffusion: image generation with stablediffusion
 ## tts: enables text-to-speech with go-piper 
 ## (requires REBUILD=true)
 #
 # GO_TAGS=stablediffusion
 ## Path where to store generated images
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@ -1,5 +0,0 @@
 # These are supported funding model platforms
 github: [mudler]
 custom: 
 - https://www.buymeacoffee.com/mudler
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@ -12,9 +12,6 @@ jobs:
          - repository: "go-skynet/go-llama.cpp"
            variable: "GOLLAMA_VERSION"
            branch: "master"
          - repository: "go-skynet/go-llama.cpp"
            variable: "GOLLAMA_GRAMMAR_VERSION"
            branch: "master"
          - repository: "go-skynet/go-ggml-transformers.cpp"
            variable: "GOGGMLTRANSFORMERS_VERSION"
            branch: "master"
@ -33,15 +30,6 @@ jobs:
          - repository: "nomic-ai/gpt4all"
            variable: "GPT4ALL_VERSION"
            branch: "main"
          - repository: "mudler/go-ggllm.cpp"
            variable: "GOGGLLM_VERSION"
            branch: "master"
          - repository: "mudler/go-stable-diffusion"
            variable: "STABLEDIFFUSION_VERSION"
            branch: "master"
          - repository: "mudler/go-piper"
            variable: "PIPER_VERSION"
            branch: "master"
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@ -15,97 +15,34 @@ concurrency:
 jobs:
  docker:
    strategy:
      matrix:
        include:
          - build-type: ''
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'auto'
            tag-suffix: ''
            ffmpeg: ''
          - build-type: 'cublas'
            cuda-major-version: 11
            cuda-minor-version: 7
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11'
            ffmpeg: ''
          - build-type: 'cublas'
            cuda-major-version: 12
            cuda-minor-version: 1
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12'
            ffmpeg: ''
          - build-type: ''
            platforms: 'linux/amd64,linux/arm64'
            tag-latest: 'false'
            tag-suffix: '-ffmpeg'
            ffmpeg: 'true'
          - build-type: 'cublas'
            cuda-major-version: 11
            cuda-minor-version: 7
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda11-ffmpeg'
            ffmpeg: 'true'
          - build-type: 'cublas'
            cuda-major-version: 12
            cuda-minor-version: 1
            platforms: 'linux/amd64'
            tag-latest: 'false'
            tag-suffix: '-cublas-cuda12-ffmpeg'
            ffmpeg: 'true'
    runs-on: ubuntu-latest
    steps:
      - name: Release space from worker
        run: |
          echo "Listing top largest packages"
          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
          head -n 30 <<< "${pkgs}"
          echo
          df -h
          echo
          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
          sudo rm -rf /usr/local/lib/android
          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
          sudo rm -rf /usr/share/dotnet
          sudo apt-get remove -y '^mono-.*' || true
          sudo apt-get remove -y '^ghc-.*' || true
          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
          sudo apt-get remove -y 'php.*' || true
          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
          sudo apt-get remove -y '^google-.*' || true
          sudo apt-get remove -y azure-cli || true
          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
          sudo apt-get remove -y '^gfortran-.*' || true
          sudo apt-get autoremove -y
          sudo apt-get clean
          echo
          echo "Listing top largest packages"
          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
          head -n 30 <<< "${pkgs}"
          echo
          sudo rm -rfv build || true
          df -h
      - name: Checkout
        uses: actions/checkout@v3
-      - name: Docker meta
+      - name: Prepare
-        id: meta
+        id: prep
-        uses: docker/metadata-action@v4
+        run: |
-        with:
+          DOCKER_IMAGE=quay.io/go-skynet/local-ai
-          images: quay.io/go-skynet/local-ai
+          VERSION=master
-          tags: |
+          SHORTREF=${GITHUB_SHA::8}
-            type=ref,event=branch
+
-            type=semver,pattern={{raw}}
+          # If this is git tag, use the tag name as a docker tag
-            type=sha
+          if [[ $GITHUB_REF == refs/tags/* ]]; then
-          flavor: |
+            VERSION=${GITHUB_REF#refs/tags/}
-            latest=${{ matrix.tag-latest }}
+          fi
-            suffix=${{ matrix.tag-suffix }}
+          TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
          # If the VERSION looks like a version number, assume that
          # this is the most recent version of the image and also
          # tag it 'latest'.
          if [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
            TAGS="$TAGS,${DOCKER_IMAGE}:latest"
          fi
          # Set output parameters.
          echo ::set-output name=tags::${TAGS}
          echo ::set-output name=docker_image::${DOCKER_IMAGE}
      - name: Set up QEMU
        uses: docker/setup-qemu-action@master
@ -123,19 +60,23 @@ jobs:
          registry: quay.io
          username: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
          password: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
-
+      - name: Build
-      - name: Build and push
+        if: github.event_name != 'pull_request'
        uses: docker/build-push-action@v4
        with:
          builder: ${{ steps.buildx.outputs.name }}
          context: .
          file: ./Dockerfile
          platforms: linux/amd64,linux/arm64
          push: true
          tags: ${{ steps.prep.outputs.tags }}
      - name: Build PRs
        if: github.event_name == 'pull_request'
        uses: docker/build-push-action@v4
        with:
          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
            BUILD_TYPE=${{ matrix.build-type }}
            CUDA_MAJOR_VERSION=${{ matrix.cuda-major-version }}
            CUDA_MINOR_VERSION=${{ matrix.cuda-minor-version }}
            FFMPEG=${{ matrix.ffmpeg }}
          context: .
          file: ./Dockerfile
-          platforms: ${{ matrix.platforms }}
+          platforms: linux/amd64
-          push: ${{ github.event_name != 'pull_request' }}
+          push: false
-          tags: ${{ steps.meta.outputs.tags }}
+          tags: ${{ steps.prep.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -32,7 +32,7 @@ jobs:
          CMAKE_ARGS: "${{ matrix.defines }}"
          BUILD_ID: "${{ matrix.build }}"
        run: |
-          STATIC=true make dist
+          make dist
      - uses: actions/upload-artifact@v3
        with:
          name: ${{ matrix.build }}
@ -60,6 +60,11 @@ jobs:
        uses: actions/checkout@v3
        with:
          submodules: true
      - name: Dependencies
        run: |
          brew update
          brew install sdl2 ffmpeg
      - name: Build
        id: build
        env:
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -26,29 +26,9 @@ jobs:
        run: |
          sudo apt-get update
          sudo apt-get install build-essential ffmpeg
          sudo apt-get install -y ca-certificates cmake curl patch
          sudo apt-get install -y libopencv-dev && sudo ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
          sudo pip install -r extra/requirements.txt
          sudo mkdir /build && sudo chmod -R 777 /build && cd /build && \
          curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v1.11.0.tar.gz" | \
          tar -xzvf - && \
          mkdir -p "spdlog-1.11.0/build" && \
          cd "spdlog-1.11.0/build" && \
          cmake ..  && \
          make -j8 && \
          sudo cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
          cd /build && \
          mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
          curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v1.0.0/libpiper_phonemize-amd64.tar.gz" | \
          tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
          sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \
          sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
          sudo cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
      - name: Test
        run: |
-          ESPEAK_DATA="/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data" GO_TAGS="tts stablediffusion" make test
+          make test
  macOS-latest:
    runs-on: macOS-latest
@ -59,6 +39,10 @@ jobs:
        with: 
          submodules: true
      - name: Dependencies
        run: |
          brew update
          brew install sdl2 ffmpeg
      - name: Test
        run: |
-          CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
+          make test
--- a/.gitignore
+++ b/.gitignore
@ -1,20 +1,12 @@
 # go-llama build artifacts
 go-llama
-/gpt4all
+gpt4all
 go-stable-diffusion
 go-piper
 /go-bert
 go-ggllm
 /piper
 __pycache__/
 *.a
 get-sources
 go-ggml-transformers
 go-gpt2
 go-rwkv
 whisper.cpp
-/bloomz
+bloomz
 go-bert
 # LocalAI build binary
@ -32,9 +24,3 @@ release/
 # just in case
 .DS_Store
 .idea
 # Generated during build
 backend-assets/
 /ggml-metal.metal
--- a/119
+++ b/119
@ -1,25 +1,24 @@
-ARG GO_VERSION=1.20-bullseye
+ARG GO_VERSION=1.20
-FROM golang:$GO_VERSION as requirements
+FROM golang:$GO_VERSION as builder
-ARG BUILD_TYPE
+ARG BUILD_TYPE=
 ARG GO_TAGS=
 ARG CUDA_MAJOR_VERSION=11
 ARG CUDA_MINOR_VERSION=7
 ARG SPDLOG_VERSION="1.11.0"
 ARG PIPER_PHONEMIZE_VERSION='1.0.0'
 ARG TARGETARCH
 ARG TARGETVARIANT
 ENV BUILD_TYPE=${BUILD_TYPE}
-ENV EXTERNAL_GRPC_BACKENDS="huggingface-embeddings:/build/extra/grpc/huggingface/huggingface.py"
+ENV GO_TAGS=${GO_TAGS}
-ARG GO_TAGS="stablediffusion tts"
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
 ENV REBUILD=true
-RUN apt-get update && \
+WORKDIR /build
    apt-get install -y ca-certificates cmake curl patch pip
-# Extras requirements
+RUN apt-get update && \
-COPY extra/requirements.txt /build/extra/requirements.txt
+    apt-get install -y ca-certificates cmake curl
 RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt
 # CuBLAS requirements
 RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
@ -33,8 +32,6 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
    ; fi
 ENV PATH /usr/local/cuda/bin:${PATH}
 WORKDIR /build
 # OpenBLAS requirements
 RUN apt-get install -y libopenblas-dev
@ -42,77 +39,49 @@ RUN apt-get install -y libopenblas-dev
 RUN apt-get install -y libopencv-dev && \
    ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
-# Use the variables in subsequent instructions
+COPY . .
-RUN echo "Target Architecture: $TARGETARCH"
+RUN make build
-RUN echo "Target Variant: $TARGETVARIANT"
+
-
+FROM golang:$GO_VERSION
-# piper requirements
+
-# Use pre-compiled Piper phonemization library (includes onnxruntime)
+ARG BUILD_TYPE=
-#RUN if echo "${GO_TAGS}" | grep -q "tts"; then \
+ARG GO_TAGS=
-RUN test -n "$TARGETARCH" \
+ARG CUDA_MAJOR_VERSION=11
-    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
+ARG CUDA_MINOR_VERSION=7
 RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \
    tar -xzvf - && \
    mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \
    cd "spdlog-${SPDLOG_VERSION}/build" && \
    cmake ..  && \
    make -j8 && \
    cmake --install . --prefix /usr && mkdir -p "lib/Linux-$(uname -m)" && \
    cd /build && \
    mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \
    curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH:-$(go env GOARCH)}${TARGETVARIANT}.tar.gz" | \
    tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \
    cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ && \
    cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \
    cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/
 # \
 #    ; fi
 ###################################
 ###################################
 FROM requirements as builder
 ARG GO_TAGS="stablediffusion tts"
 ENV BUILD_TYPE=${BUILD_TYPE}
 ENV GO_TAGS=${GO_TAGS}
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
-WORKDIR /build
+ENV REBUILD=true
 COPY Makefile .
 RUN make get-sources
 COPY go.mod .
 RUN make prepare
 COPY . .
 COPY .git .
 RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build
 ###################################
 ###################################
 FROM requirements
-ARG FFMPEG
+WORKDIR /build
-ENV REBUILD=false
+RUN apt-get update && \
-ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
+    apt-get install -y ca-certificates cmake curl
-# Add FFmpeg
+# CuBLAS requirements
-RUN if [ "${FFMPEG}" = "true" ]; then \
+RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
-    apt-get install -y ffmpeg \
+    apt-get install -y software-properties-common && \
    apt-add-repository contrib && \
    curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \
    dpkg -i cuda-keyring_1.0-1_all.deb && \
    rm -f cuda-keyring_1.0-1_all.deb && \
    apt-get update && \
    apt-get install -y cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
    ; fi
 ENV PATH /usr/local/cuda/bin:${PATH}
-WORKDIR /build
+# OpenBLAS requirements
 RUN apt-get install -y libopenblas-dev
 # Stable Diffusion requirements
 RUN apt-get install -y libopencv-dev && \
    ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2
 # we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
 # so when `entrypoint.sh` runs `make build` again (which it does by default), the build would fail
 # see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
 # https://github.com/go-skynet/LocalAI/pull/434
 COPY . .
 RUN make prepare-sources
 COPY --from=builder /build/local-ai ./
--- a/333
+++ b/333
@ -3,62 +3,23 @@ GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet
 BINARY_NAME=local-ai
-# llama.cpp versions
+GOLLAMA_VERSION?=4bd3910005a593a6db237bc82c506d6d9fb81b18
 # Temporarly pinned to https://github.com/go-skynet/go-llama.cpp/pull/124
 GOLLAMA_VERSION?=f3a6ee0ef53d667f110d28fcf9b808bdca741c07
 GOLLAMA_GRAMMAR_VERSION?=cb8d7cd4cb95725a04504a9e3a26dd72a12b69ac
 # Temporary set a specific version of llama.cpp
 # containing: https://github.com/ggerganov/llama.cpp/pull/1773 and
 # rebased on top of master.
 # This pin can be dropped when the PR above is merged, and go-llama has merged changes as well
 # Set empty to use the version pinned by go-llama
 LLAMA_CPP_GRAMMAR_REPO?=https://github.com/mudler/llama.cpp
 LLAMA_CPP_GRAMMAR_VERSION?=48ce8722a05a018681634af801fd0fd45b3a87cc
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
-GPT4ALL_VERSION?=5f0aaf8bdb166ea3b5bfd578c2b19f61b583e6a9
+GPT4ALL_VERSION?=73db20ba85fbbdc66a56e2619394c0eea40dc72b
-
+GOGGMLTRANSFORMERS_VERSION?=695f97befe14f0107d8da1c11f5b84912e0754b6
 # go-ggml-transformers version
 GOGGMLTRANSFORMERS_VERSION?=ffb09d7dd71e2cbc6c5d7d05357d230eea6f369a
 # go-rwkv version
 RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
-RWKV_VERSION?=c898cd0f62df8f2a7830e53d1d513bef4f6f792b
+RWKV_VERSION?=ccb05c3e1c6efd098017d114dcb58ab3262b40b2
-
+WHISPER_CPP_VERSION?=9b926844e3ae0ca6a0d13573b2e0349be1a4b573
-# whisper.cpp version
+BERT_VERSION?=cea1ed76a7f48ef386a8e369f6c82c48cdf2d551
 WHISPER_CPP_VERSION?=85ed71aaec8e0612a84c0b67804bde75aa75a273
 # bert.cpp version
 BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
 # go-piper version
 PIPER_VERSION?=56b8a81b4760a6fbee1a82e62f007ae7e8f010a7
 # go-bloomz version
 BLOOMZ_VERSION?=1834e77b83faafe912ad4092ccf7f77937349e2f
-
+BUILD_TYPE?=
 # stablediffusion version
 STABLEDIFFUSION_VERSION?=d89260f598afb809279bc72aa0107b4292587632
 # Go-ggllm
 GOGGLLM_VERSION?=862477d16eefb0805261c19c9b0d053e3b2b684b
 export BUILD_TYPE?=
 CGO_LDFLAGS?=
 CUDA_LIBPATH?=/usr/local/cuda/lib64/
 STABLEDIFFUSION_VERSION?=c0748eca3642d58bcf9521108bcee46959c647dc
 GO_TAGS?=
 BUILD_ID?=git
-
+LD_FLAGS=?=
 VERSION?=$(shell git describe --always --tags || echo "dev" )
 # go tool nm ./local-ai | grep Commit
 LD_FLAGS?=
 override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Version=$(VERSION)"
 override LD_FLAGS += -X "github.com/go-skynet/LocalAI/internal.Commit=$(shell git rev-parse HEAD)"
 OPTIONAL_TARGETS?=
 ESPEAK_DATA?=
 OS := $(shell uname -s)
 ARCH := $(shell uname -m)
@ -68,14 +29,8 @@ WHITE  := $(shell tput -Txterm setaf 7)
 CYAN   := $(shell tput -Txterm setaf 6)
 RESET  := $(shell tput -Txterm sgr0)
-ifndef UNAME_S
+C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
-UNAME_S := $(shell uname -s)
+LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-stable-diffusion/:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-ggml-transformers:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
 endif
 # workaround for rwkv.cpp
 ifeq ($(UNAME_S),Darwin)
        CGO_LDFLAGS += -lcblas -framework Accelerate 
 endif
 ifeq ($(BUILD_TYPE),openblas)
 	CGO_LDFLAGS+=-lopenblas
@ -86,11 +41,6 @@ ifeq ($(BUILD_TYPE),cublas)
 	export LLAMA_CUBLAS=1
 endif
 ifeq ($(BUILD_TYPE),metal)
 	CGO_LDFLAGS+=-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
 	export LLAMA_METAL=1
 endif
 ifeq ($(BUILD_TYPE),clblas)
 	CGO_LDFLAGS+=-lOpenCL -lclblast
 endif
@ -100,15 +50,8 @@ ifeq ($(STATIC),true)
 	LD_FLAGS=-linkmode external -extldflags -static
 endif
-ifeq ($(findstring stablediffusion,$(GO_TAGS)),stablediffusion)
+ifeq ($(GO_TAGS),stablediffusion)
-#	OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
+	OPTIONAL_TARGETS+=go-stable-diffusion/libstablediffusion.a
 	OPTIONAL_GRPC+=backend-assets/grpc/stablediffusion
 endif
 ifeq ($(findstring tts,$(GO_TAGS)),tts)
 #	OPTIONAL_TARGETS+=go-piper/libpiper_binding.a
 #	OPTIONAL_TARGETS+=backend-assets/espeak-ng-data
 	OPTIONAL_GRPC+=backend-assets/grpc/piper
 endif
 .PHONY: all test build vendor
@ -119,24 +62,31 @@ all: help
 gpt4all:
 	git clone --recurse-submodules $(GPT4ALL_REPO) gpt4all
 	cd gpt4all && git checkout -b build $(GPT4ALL_VERSION) && git submodule update --init --recursive --depth 1
-
+	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
-## go-ggllm
+	@find ./gpt4all -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
-go-ggllm:
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
-	git clone --recurse-submodules https://github.com/mudler/go-ggllm.cpp go-ggllm
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gptj_/g' {} +
-	cd go-ggllm && git checkout -b build $(GOGGLLM_VERSION) && git submodule update --init --recursive --depth 1
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
-
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gptj_/g' {} +
-go-ggllm/libggllm.a: go-ggllm
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
-	$(MAKE) -C go-ggllm BUILD_TYPE=$(BUILD_TYPE) libggllm.a
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/set_console_color/set_gptj_console_color/g' {} +
-
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
-## go-piper
+	@find ./gpt4all -type f -name "*.go" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
-go-piper:
+	@find ./gpt4all -type f -name "*.h" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
-	git clone --recurse-submodules https://github.com/mudler/go-piper go-piper
+	@find ./gpt4all -type f -name "*.txt" -exec sed -i'' -e 's/llama_/gptjllama_/g' {} +
-	cd go-piper && git checkout -b build $(PIPER_VERSION) && git submodule update --init --recursive --depth 1
+	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gptj_/g' {} +
 	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
 	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
 	@find ./gpt4all -type f -name "*.cpp" -exec sed -i'' -e 's/regex_escape/gpt4allregex_escape/g' {} +
 	mv ./gpt4all/gpt4all-backend/llama.cpp/llama_util.h ./gpt4all/gpt4all-backend/llama.cpp/gptjllama_util.h
 ## BERT embeddings
 go-bert:
 	git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
 	cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
 	@find ./go-bert -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
 	@find ./go-bert -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
 	@find ./go-bert -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
 ## stable diffusion
 go-stable-diffusion:
@ -150,14 +100,23 @@ go-stable-diffusion/libstablediffusion.a:
 go-rwkv:
 	git clone --recurse-submodules $(RWKV_REPO) go-rwkv
 	cd go-rwkv && git checkout -b build $(RWKV_VERSION) && git submodule update --init --recursive --depth 1
 	@find ./go-rwkv -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
 	@find ./go-rwkv -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
 	@find ./go-rwkv -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_rwkv_/g' {} +
 go-rwkv/librwkv.a: go-rwkv
-	cd go-rwkv && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a ..
+	cd go-rwkv && cd rwkv.cpp &&	cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF &&	cmake --build . && 	cp librwkv.a .. && cp ggml/src/libggml.a ..
 ## bloomz
 bloomz:
 	git clone --recurse-submodules https://github.com/go-skynet/bloomz.cpp bloomz
-	cd bloomz && git checkout -b build $(BLOOMZ_VERSION) && git submodule update --init --recursive --depth 1
+	@find ./bloomz -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
 	@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
 	@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bloomz_/g' {} +
 	@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
 	@find ./bloomz -type f -name "*.h" -exec sed -i'' -e 's/gpt_/gpt_bloomz_/g' {} +
 	@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_bloomz_replace/g' {} +
 	@find ./bloomz -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_bloomz_replace/g' {} +
 bloomz/libbloomz.a: bloomz
 	cd bloomz && make libbloomz.a
@ -165,21 +124,6 @@ bloomz/libbloomz.a: bloomz
 go-bert/libgobert.a: go-bert
 	$(MAKE) -C go-bert libgobert.a
 backend-assets/gpt4all: gpt4all/gpt4all-bindings/golang/libgpt4all.a
 	mkdir -p backend-assets/gpt4all
 	@cp gpt4all/gpt4all-bindings/golang/buildllm/*.so backend-assets/gpt4all/ || true
 	@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dylib backend-assets/gpt4all/ || true
 	@cp gpt4all/gpt4all-bindings/golang/buildllm/*.dll backend-assets/gpt4all/ || true
 backend-assets/espeak-ng-data:
 	mkdir -p backend-assets/espeak-ng-data
 ifdef ESPEAK_DATA
 	@cp -rf $(ESPEAK_DATA)/. backend-assets/espeak-ng-data
 else
 	@echo "ESPEAK_DATA not set, skipping tts. Note that this will break the tts functionality."
 	@touch backend-assets/espeak-ng-data/keep
 endif
 gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
 	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
@ -187,13 +131,27 @@ gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
 go-ggml-transformers: 
 	git clone --recurse-submodules https://github.com/go-skynet/go-ggml-transformers.cpp go-ggml-transformers
 	cd go-ggml-transformers && git checkout -b build $(GOGPT2_VERSION) && git submodule update --init --recursive --depth 1
 	# This is hackish, but needed as both go-llama and go-gpt4allj have their own version of ggml..
 	@find ./go-ggml-transformers -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 	@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 	@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_gpt2_/g' {} +
 	@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
 	@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_print_usage/gpt2_print_usage/g' {} +
 	@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
 	@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_params_parse/gpt2_params_parse/g' {} +
 	@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
 	@find ./go-ggml-transformers -type f -name "*.h" -exec sed -i'' -e 's/gpt_random_prompt/gpt2_random_prompt/g' {} +
 	@find ./go-ggml-transformers -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
 go-ggml-transformers/libtransformers.a: go-ggml-transformers
-	$(MAKE) -C go-ggml-transformers BUILD_TYPE=$(BUILD_TYPE) libtransformers.a
+	$(MAKE) -C go-ggml-transformers libtransformers.a
 whisper.cpp:
 	git clone https://github.com/ggerganov/whisper.cpp.git
 	cd whisper.cpp && git checkout -b build $(WHISPER_CPP_VERSION) && git submodule update --init --recursive --depth 1
 	@find ./whisper.cpp -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
 	@find ./whisper.cpp -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
 	@find ./whisper.cpp -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_whisper_/g' {} +
 whisper.cpp/libwhisper.a: whisper.cpp
 	cd whisper.cpp && make libwhisper.a
@ -202,28 +160,11 @@ go-llama:
 	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
 	cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
 go-llama-grammar:
 	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama-grammar
 	cd go-llama-grammar && git checkout -b build $(GOLLAMA_GRAMMAR_VERSION) && git submodule update --init --recursive --depth 1
 ifneq ($(LLAMA_CPP_GRAMMAR_REPO),)
 	cd go-llama-grammar && rm -rf llama.cpp && git clone $(LLAMA_CPP_GRAMMAR_REPO) llama.cpp && cd llama.cpp && git checkout -b build $(LLAMA_CPP_GRAMMAR_VERSION) && git submodule update --init --recursive --depth 1
 endif
 go-llama/libbinding.a: go-llama 
 	$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
 go-llama-grammar/libbinding.a: go-llama-grammar
 	$(MAKE) -C go-llama-grammar BUILD_TYPE=$(BUILD_TYPE) libbinding.a
 go-piper/libpiper_binding.a:
 	$(MAKE) -C go-piper libpiper_binding.a example/main
 get-sources: go-llama go-ggllm go-llama-grammar go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
 	touch $@
 replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp-grammar=$(shell pwd)/go-llama-grammar
 	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
@ -231,17 +172,13 @@ replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
 	$(GOCMD) mod edit -replace github.com/go-skynet/bloomz.cpp=$(shell pwd)/bloomz
 	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(shell pwd)/go-stable-diffusion
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(shell pwd)/go-piper
 	$(GOCMD) mod edit -replace github.com/mudler/go-ggllm.cpp=$(shell pwd)/go-ggllm
-prepare-sources: get-sources replace
+prepare-sources: go-llama go-ggml-transformers gpt4all go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion replace
 	$(GOCMD) mod download
 ## GENERIC
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C go-llama clean
 	$(MAKE) -C go-llama-grammar clean
 	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
 	$(MAKE) -C go-ggml-transformers clean
 	$(MAKE) -C go-rwkv clean
@ -249,47 +186,41 @@ rebuild: ## Rebuilds the project
 	$(MAKE) -C go-stable-diffusion clean
 	$(MAKE) -C go-bert clean
 	$(MAKE) -C bloomz clean
 	$(MAKE) -C go-piper clean
 	$(MAKE) -C go-ggllm clean
 	$(MAKE) build
-prepare: prepare-sources $(OPTIONAL_TARGETS) 
+prepare: prepare-sources gpt4all/gpt4all-bindings/golang/libgpt4all.a $(OPTIONAL_TARGETS) go-llama/libbinding.a go-bert/libgobert.a go-ggml-transformers/libtransformers.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a bloomz/libbloomz.a  ## Prepares for building
 	touch $@
 clean: ## Remove build related file
 	$(GOCMD) clean -cache
 	rm -fr ./go-llama
 	rm -rf ./gpt4all
 	rm -rf ./go-gpt2
 	rm -rf ./go-stable-diffusion
 	rm -rf ./go-ggml-transformers
 	rm -rf ./backend-assets
 	rm -rf ./go-rwkv
 	rm -rf ./go-bert
 	rm -rf ./bloomz
 	rm -rf ./whisper.cpp
 	rm -rf ./go-piper
 	rm -rf ./go-ggllm
 	rm -rf $(BINARY_NAME)
 	rm -rf release/
 ## Build:
-build: grpcs prepare ## Build the project
+build: prepare ## Build the project
 	$(info ${GREEN}I local-ai build info:${RESET})
 	$(info ${GREEN}I BUILD_TYPE: ${YELLOW}$(BUILD_TYPE)${RESET})
 	$(info ${GREEN}I GO_TAGS: ${YELLOW}$(GO_TAGS)${RESET})
-	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
 dist: build
 	mkdir -p release
 	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
 generic-build: ## Build the project using generic
 	BUILD_TYPE="generic" $(MAKE) build
 ## Run
 run: prepare ## run local-ai
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) run ./
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GOCMD) run ./main.go
 test-models/testmodel:
 	mkdir test-models
@ -302,40 +233,9 @@ test-models/testmodel:
 	wget https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
 	cp tests/models_fixtures/* test-models
-prepare-test: grpcs
+test: prepare test-models/testmodel
 	cp -rf backend-assets api
 	cp tests/models_fixtures/* test-models
-
+	C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flakeAttempts 5 -v -r ./api ./pkg
 test: prepare test-models/testmodel grpcs
 	@echo 'Running tests'
 	export GO_TAGS="tts stablediffusion"
 	$(MAKE) prepare-test
 	HUGGINGFACE_GRPC=$(abspath ./)/extra/grpc/huggingface/huggingface.py TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!gpt4all && !llama" --flake-attempts 5 -v -r ./api ./pkg
 	$(MAKE) test-gpt4all
 	$(MAKE) test-llama
 	$(MAKE) test-tts
 	$(MAKE) test-stablediffusion
 test-gpt4all: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg
 test-llama: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="llama" --flake-attempts 5 -v -r ./api ./pkg
 test-tts: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="tts" --flake-attempts 1 -v -r ./api ./pkg
 test-stablediffusion: prepare-test
 	TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \
 	$(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="stablediffusion" --flake-attempts 1 -v -r ./api ./pkg
 test-container:
 	docker build --target requirements -t local-ai-test-container .
 	docker run -ti --rm --entrypoint /bin/bash -ti -v $(abspath ./):/build local-ai-test-container
 ## Help:
 help: ## Show this help.
@ -348,98 +248,3 @@ help: ## Show this help.
 		if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf "    ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \
 		else if (/^## .*$$/) {printf "  ${CYAN}%s${RESET}\n", substr($$1,4)} \
 		}' $(MAKEFILE_LIST)
 protogen: protogen-go protogen-python
 protogen-go:
 	protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative \
    pkg/grpc/proto/backend.proto
 protogen-python:
 	python -m grpc_tools.protoc -Ipkg/grpc/proto/ --python_out=extra/grpc/huggingface/ --grpc_python_out=extra/grpc/huggingface/ pkg/grpc/proto/backend.proto
 ## GRPC
 backend-assets/grpc:
 	mkdir -p backend-assets/grpc
 backend-assets/grpc/falcon: backend-assets/grpc go-ggllm/libggllm.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggllm LIBRARY_PATH=$(shell pwd)/go-ggllm \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon ./cmd/grpc/falcon/
 backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./cmd/grpc/llama/
 # TODO: every binary should have its own folder instead, so can have different metal implementations
 ifeq ($(BUILD_TYPE),metal)
 	cp go-llama/build/bin/ggml-metal.metal backend-assets/grpc/
 endif
 backend-assets/grpc/llama-grammar: backend-assets/grpc go-llama-grammar/libbinding.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-grammar LIBRARY_PATH=$(shell pwd)/go-llama-grammar \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-grammar ./cmd/grpc/llama-grammar/
 backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all gpt4all/gpt4all-bindings/golang/libgpt4all.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./cmd/grpc/gpt4all/
 backend-assets/grpc/dolly: backend-assets/grpc go-ggml-transformers/libtransformers.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/dolly ./cmd/grpc/dolly/
 backend-assets/grpc/gpt2: backend-assets/grpc go-ggml-transformers/libtransformers.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt2 ./cmd/grpc/gpt2/
 backend-assets/grpc/gptj: backend-assets/grpc go-ggml-transformers/libtransformers.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptj ./cmd/grpc/gptj/
 backend-assets/grpc/gptneox: backend-assets/grpc go-ggml-transformers/libtransformers.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gptneox ./cmd/grpc/gptneox/
 backend-assets/grpc/mpt: backend-assets/grpc go-ggml-transformers/libtransformers.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/mpt ./cmd/grpc/mpt/
 backend-assets/grpc/replit: backend-assets/grpc go-ggml-transformers/libtransformers.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/replit ./cmd/grpc/replit/
 backend-assets/grpc/falcon-ggml: backend-assets/grpc go-ggml-transformers/libtransformers.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/falcon-ggml ./cmd/grpc/falcon-ggml/
 backend-assets/grpc/starcoder: backend-assets/grpc go-ggml-transformers/libtransformers.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-ggml-transformers LIBRARY_PATH=$(shell pwd)/go-ggml-transformers \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/starcoder ./cmd/grpc/starcoder/
 backend-assets/grpc/rwkv: backend-assets/grpc go-rwkv/librwkv.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-rwkv LIBRARY_PATH=$(shell pwd)/go-rwkv \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./cmd/grpc/rwkv/
 backend-assets/grpc/bloomz: backend-assets/grpc bloomz/libbloomz.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/bloomz LIBRARY_PATH=$(shell pwd)/bloomz \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bloomz ./cmd/grpc/bloomz/
 backend-assets/grpc/bert-embeddings: backend-assets/grpc go-bert/libgobert.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-bert LIBRARY_PATH=$(shell pwd)/go-bert \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./cmd/grpc/bert-embeddings/
 backend-assets/grpc/langchain-huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./cmd/grpc/langchain-huggingface/
 backend-assets/grpc/stablediffusion: backend-assets/grpc go-stable-diffusion/libstablediffusion.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-stable-diffusion/ LIBRARY_PATH=$(shell pwd)/go-stable-diffusion/ \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./cmd/grpc/stablediffusion/
 backend-assets/grpc/piper: backend-assets/grpc backend-assets/espeak-ng-data go-piper/libpiper_binding.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" LIBRARY_PATH=$(shell pwd)/go-piper \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./cmd/grpc/piper/
 backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/whisper.cpp LIBRARY_PATH=$(shell pwd)/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./cmd/grpc/whisper/
 grpcs: prepare backend-assets/grpc/langchain-huggingface backend-assets/grpc/llama-grammar backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
--- a/README.md
+++ b/README.md
@ -1,8 +1,93 @@
-# LOCAL AI
+<h1 align="center">
  <br>
  <img height="300" src="https://user-images.githubusercontent.com/2420543/233147843-88697415-6dbf-4368-a862-ab217f9f7342.jpeg"> <br>
    LocalAI
 <br>
 </h1>
-## USAGE
+[![tests](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/test.yml) [![build container images](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml/badge.svg)](https://github.com/go-skynet/LocalAI/actions/workflows/image.yml)
- Installation et démarrage:
+[![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy) 
 **LocalAI** is a drop-in replacement REST API that’s compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format. Does not require GPU.
 For a list of the supported model families, please see [the model compatibility table](https://localai.io/model-compatibility/index.html#model-compatibility-table).
 In a nutshell:
 - Local, OpenAI drop-in alternative REST API. You own your data.
 - NO GPU required. NO Internet access is required either. Optional, GPU Acceleration is available in `llama.cpp`-compatible LLMs. [See building instructions](https://localai.io/basics/build/index.html).
 - Supports multiple models, Audio transcription, Text generation with GPTs, Image generation with stable diffusion (experimental)
 - Once loaded the first time, it keep models loaded in memory for faster inference
 - Doesn't shell-out, but uses C++ bindings for a faster inference and better performance. 
 LocalAI was created by [Ettore Di Giacinto](https://github.com/mudler/) and is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome!
 | [ChatGPT OSS alternative](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui)                                                                                                                | [Image generation](https://localai.io/api-endpoints/index.html#image-generation)                                                                                                              |
 |------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
 |  ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)            | ![b6441997879](https://github.com/go-skynet/LocalAI/assets/2420543/d50af51c-51b7-4f39-b6c2-bf04c403894c)                  |
 See the [Getting started](https://localai.io/basics/getting_started/index.html) and [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/) sections to learn how to use LocalAI. For a list of curated models check out the [model gallery](https://github.com/go-skynet/model-gallery).
 ## News
 - 29-05-2023: LocalAI now has a website, [https://localai.io](https://localai.io)! check the news in the [dedicated section](https://localai.io/basics/news/index.html)!
 For latest news, follow also on Twitter [@LocalAI_API](https://twitter.com/LocalAI_API) and [@mudler_it](https://twitter.com/mudler_it)
 ## Contribute and help
 To help the project you can:
 - Upvote the [Reddit post](https://www.reddit.com/r/selfhosted/comments/12w4p2f/localai_openai_compatible_api_to_run_llm_models/) about LocalAI.
 - [Hacker news post](https://news.ycombinator.com/item?id=35726934) - help us out by voting if you like this project.
 - If you have technological skills and want to contribute to development, have a look at the open issues. If you are new you can have a look at the [good-first-issue](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) and [help-wanted](https://github.com/go-skynet/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels.
 - If you don't have technological skills you can still help improving documentation or add examples or share your user-stories with our community, any help and contribution is welcome!
 ## Usage
 Check out the [Getting started](https://localai.io/basics/getting_started/index.html) section. Here below you will find generic, quick instructions to get ready and use LocalAI.
 The easiest way to run LocalAI is by using `docker-compose` (to build locally, see [building LocalAI](https://localai.io/basics/build/index.html)):
 ```bash
 git clone https://github.com/go-skynet/LocalAI
 cd LocalAI
 # (optional) Checkout a specific LocalAI tag
 # git checkout -b build <TAG>
 # copy your models to models/
 cp your-model.bin models/
 # (optional) Edit the .env file to set things like context size and threads
 # vim .env
 # start with docker-compose
 docker-compose up -d --pull always
 # or you can build the images with:
 # docker-compose up -d --build
 # Now API is accessible at localhost:8080
 curl http://localhost:8080/v1/models
 # {"object":"list","data":[{"id":"your-model.bin","object":"model"}]}
 curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
     "model": "your-model.bin",            
     "prompt": "A long time ago in a galaxy far, far away",
     "temperature": 0.7
   }'
 ```
 ### Example: Use GPT4ALL-J model
 <details>
 ```bash
 # Clone LocalAI
@ -23,9 +108,9 @@ cp -rf prompt-templates/ggml-gpt4all-j.tmpl models/
 # vim .env
 # start with docker-compose
-# docker-compose up -d --pull always
+docker-compose up -d --pull always
 # or you can build the images with:
-docker-compose up -d --build
+# docker-compose up -d --build
 # Now API is accessible at localhost:8080
 curl http://localhost:8080/v1/models
 # {"object":"list","data":[{"id":"ggml-gpt4all-j","object":"model"}]}
@ -38,23 +123,95 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 # {"model":"ggml-gpt4all-j","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
 ```
 </details>
- Python implementation:
+### Build locally
-```python
+<details>
 import openai
-openai.api_base = "http://localhost:8080/v1"
+In order to build the `LocalAI` container image locally you can use `docker`:
-# create a chat completion
+```
-chat_completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}])
+# build the image
 docker build -t localai .
 docker run localai
 ```
 Or you can build the binary with `make`:
 # print the completion
 print(completion.choices[0].message.content)
 ```
 make build
 ```
 </details>
 See the [build section](https://localai.io/basics/build/index.html) in our documentation for detailed instructions.
 ### Run LocalAI in Kubernetes
 LocalAI can be installed inside Kubernetes with helm. See [installation instructions](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes).
 ## Supported API endpoints
 See the [list of the supported API endpoints](https://localai.io/api-endpoints/index.html) and how to configure image generation and audio transcription.
 ## Frequently asked questions
 See [the FAQ](https://localai.io/faq/index.html) section for a list of common questions.
 ## Projects already using LocalAI to run local models
 Feel free to open up a PR to get your project listed!
 - [Kairos](https://github.com/kairos-io/kairos)
 - [k8sgpt](https://github.com/k8sgpt-ai/k8sgpt#running-local-models)
 - [Spark](https://github.com/cedriking/spark)
 - [autogpt4all](https://github.com/aorumbayev/autogpt4all)
 - [Mods](https://github.com/charmbracelet/mods)
 - [Flowise](https://github.com/FlowiseAI/Flowise)
 ## Short-term roadmap
 - [x] Mimic OpenAI API (https://github.com/go-skynet/LocalAI/issues/10)
 - [ ] Binary releases (https://github.com/go-skynet/LocalAI/issues/6)
 - [ ] Upstream our golang bindings to llama.cpp (https://github.com/ggerganov/llama.cpp/issues/351) and [gpt4all](https://github.com/go-skynet/LocalAI/issues/85)
 - [x] Multi-model support
 - [x] Have a webUI!
 - [x] Allow configuration of defaults for models.
 - [x] Support for embeddings
 - [x] Support for audio transcription with https://github.com/ggerganov/whisper.cpp
 - [ ] GPU/CUDA support ( https://github.com/go-skynet/LocalAI/issues/69 )
 - [ ] Enable automatic downloading of models from a curated gallery, with only free-licensed models, directly from the webui.
 ## Star history
 [![LocalAI Star history Chart](https://api.star-history.com/svg?repos=go-skynet/LocalAI&type=Date)](https://star-history.com/#go-skynet/LocalAI&Date)
 ## License
 LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
 MIT
 ## Author
 Ettore Di Giacinto and others
 ## Acknowledgements
 LocalAI couldn't have been built without the help of great software already available from the community. Thank you!
 - [llama.cpp](https://github.com/ggerganov/llama.cpp)
 - https://github.com/tatsu-lab/stanford_alpaca
 - https://github.com/cornelk/llama-go for the initial ideas
 - https://github.com/antimatter15/alpaca.cpp
 - https://github.com/EdVince/Stable-Diffusion-NCNN
 - https://github.com/ggerganov/whisper.cpp
 - https://github.com/saharNooby/rwkv.cpp
-## TO DO 
+## Contributors
- [ ] Flask app frontend
+<a href="https://github.com/go-skynet/LocalAI/graphs/contributors">
- [ ] Keycloak auth
+  <img src="https://contrib.rocks/image?repo=go-skynet/LocalAI" />
- [ ] speech to text avec openVINO
+</a>
--- a/api/api.go
+++ b/api/api.go
@ -3,13 +3,6 @@ package api
 import (
 	"errors"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/localai"
 	"github.com/go-skynet/LocalAI/api/openai"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/internal"
 	"github.com/go-skynet/LocalAI/pkg/assets"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
 	"github.com/gofiber/fiber/v2/middleware/logger"
@ -18,18 +11,18 @@ import (
 	"github.com/rs/zerolog/log"
 )
-func App(opts ...options.AppOption) (*fiber.App, error) {
+func App(opts ...AppOption) (*fiber.App, error) {
-	options := options.NewOptions(opts...)
+	options := newOptions(opts...)
 	zerolog.SetGlobalLevel(zerolog.InfoLevel)
-	if options.Debug {
+	if options.debug {
 		zerolog.SetGlobalLevel(zerolog.DebugLevel)
 	}
 	// Return errors as JSON responses
 	app := fiber.New(fiber.Config{
-		BodyLimit:             options.UploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
+		BodyLimit:             options.uploadLimitMB * 1024 * 1024, // this is the default limit of 4MB
-		DisableStartupMessage: options.DisableMessage,
+		DisableStartupMessage: options.disableMessage,
 		// Override default error handler
 		ErrorHandler: func(ctx *fiber.Ctx, err error) error {
 			// Status code defaults to 500
@ -43,122 +36,94 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
 			// Send custom error page
 			return ctx.Status(code).JSON(
-				openai.ErrorResponse{
+				ErrorResponse{
-					Error: &openai.APIError{Message: err.Error(), Code: code},
+					Error: &APIError{Message: err.Error(), Code: code},
 				},
 			)
 		},
 	})
-	if options.Debug {
+	if options.debug {
 		app.Use(logger.New(logger.Config{
 			Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
 		}))
 	}
-	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
+	cm := NewConfigMerger()
-	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
+	if err := cm.LoadConfigs(options.loader.ModelPath); err != nil {
 	cm := config.NewConfigLoader()
 	if err := cm.LoadConfigs(options.Loader.ModelPath); err != nil {
 		log.Error().Msgf("error loading config files: %s", err.Error())
 	}
-	if options.ConfigFile != "" {
+	if options.configFile != "" {
-		if err := cm.LoadConfigFile(options.ConfigFile); err != nil {
+		if err := cm.LoadConfigFile(options.configFile); err != nil {
 			log.Error().Msgf("error loading config file: %s", err.Error())
 		}
 	}
-	if options.Debug {
+	if options.debug {
 		for _, v := range cm.ListConfigs() {
 			cfg, _ := cm.GetConfig(v)
 			log.Debug().Msgf("Model: %s (config: %+v)", v, cfg)
 		}
 	}
 	if options.AssetsDestination != "" {
 		// Extract files from the embedded FS
 		err := assets.ExtractFiles(options.BackendAssets, options.AssetsDestination)
 		log.Debug().Msgf("Extracting backend assets files to %s", options.AssetsDestination)
 		if err != nil {
 			log.Warn().Msgf("Failed extracting backend assets files: %s (might be required for some backends to work properly, like gpt4all)", err)
 		}
 	}
 	// Default middleware config
 	app.Use(recover.New())
-	if options.PreloadJSONModels != "" {
+	if options.preloadJSONModels != "" {
-		if err := localai.ApplyGalleryFromString(options.Loader.ModelPath, options.PreloadJSONModels, cm, options.Galleries); err != nil {
+		if err := ApplyGalleryFromString(options.loader.ModelPath, options.preloadJSONModels, cm); err != nil {
 			return nil, err
 		}
 	}
-	if options.PreloadModelsFromPath != "" {
+	if options.preloadModelsFromPath != "" {
-		if err := localai.ApplyGalleryFromFile(options.Loader.ModelPath, options.PreloadModelsFromPath, cm, options.Galleries); err != nil {
+		if err := ApplyGalleryFromFile(options.loader.ModelPath, options.preloadModelsFromPath, cm); err != nil {
 			return nil, err
 		}
 	}
-	if options.CORS {
+	if options.cors {
-		var c func(ctx *fiber.Ctx) error
+		if options.corsAllowOrigins == "" {
-		if options.CORSAllowOrigins == "" {
+			app.Use(cors.New())
 			c = cors.New()
 		} else {
-			c = cors.New(cors.Config{AllowOrigins: options.CORSAllowOrigins})
+			app.Use(cors.New(cors.Config{
 				AllowOrigins: options.corsAllowOrigins,
 			}))
 		}
 		app.Use(c)
 	}
 	// LocalAI API endpoints
-	galleryService := localai.NewGalleryService(options.Loader.ModelPath)
+	applier := newGalleryApplier(options.loader.ModelPath)
-	galleryService.Start(options.Context, cm)
+	applier.start(options.context, cm)
-
+	app.Post("/models/apply", applyModelGallery(options.loader.ModelPath, cm, applier.C))
-	app.Get("/version", func(c *fiber.Ctx) error {
+	app.Get("/models/jobs/:uuid", getOpStatus(applier))
 		return c.JSON(struct {
 			Version string `json:"version"`
 		}{Version: internal.PrintableVersion()})
 	})
 	app.Post("/models/apply", localai.ApplyModelGalleryEndpoint(options.Loader.ModelPath, cm, galleryService.C, options.Galleries))
 	app.Get("/models/available", localai.ListModelFromGalleryEndpoint(options.Galleries, options.Loader.ModelPath))
 	app.Get("/models/jobs/:uuid", localai.GetOpStatusEndpoint(galleryService))
 	// openAI compatible API endpoint
 	// chat
-	app.Post("/v1/chat/completions", openai.ChatEndpoint(cm, options))
+	app.Post("/v1/chat/completions", chatEndpoint(cm, options))
-	app.Post("/chat/completions", openai.ChatEndpoint(cm, options))
+	app.Post("/chat/completions", chatEndpoint(cm, options))
 	// edit
-	app.Post("/v1/edits", openai.EditEndpoint(cm, options))
+	app.Post("/v1/edits", editEndpoint(cm, options))
-	app.Post("/edits", openai.EditEndpoint(cm, options))
+	app.Post("/edits", editEndpoint(cm, options))
 	// completion
-	app.Post("/v1/completions", openai.CompletionEndpoint(cm, options))
+	app.Post("/v1/completions", completionEndpoint(cm, options))
-	app.Post("/completions", openai.CompletionEndpoint(cm, options))
+	app.Post("/completions", completionEndpoint(cm, options))
 	app.Post("/v1/engines/:model/completions", openai.CompletionEndpoint(cm, options))
 	// embeddings
-	app.Post("/v1/embeddings", openai.EmbeddingsEndpoint(cm, options))
+	app.Post("/v1/embeddings", embeddingsEndpoint(cm, options))
-	app.Post("/embeddings", openai.EmbeddingsEndpoint(cm, options))
+	app.Post("/embeddings", embeddingsEndpoint(cm, options))
-	app.Post("/v1/engines/:model/embeddings", openai.EmbeddingsEndpoint(cm, options))
+	app.Post("/v1/engines/:model/embeddings", embeddingsEndpoint(cm, options))
 	// audio
-	app.Post("/v1/audio/transcriptions", openai.TranscriptEndpoint(cm, options))
+	app.Post("/v1/audio/transcriptions", transcriptEndpoint(cm, options))
 	app.Post("/tts", localai.TTSEndpoint(cm, options))
 	// images
-	app.Post("/v1/images/generations", openai.ImageEndpoint(cm, options))
+	app.Post("/v1/images/generations", imageEndpoint(cm, options))
 	if options.ImageDir != "" {
 		app.Static("/generated-images", options.ImageDir)
 	}
-	if options.AudioDir != "" {
+	if options.imageDir != "" {
-		app.Static("/generated-audio", options.AudioDir)
+		app.Static("/generated-images", options.imageDir)
 	}
 	ok := func(c *fiber.Ctx) error {
@ -170,15 +135,8 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
 	app.Get("/readyz", ok)
 	// models
-	app.Get("/v1/models", openai.ListModelsEndpoint(options.Loader, cm))
+	app.Get("/v1/models", listModels(options.loader, cm))
-	app.Get("/models", openai.ListModelsEndpoint(options.Loader, cm))
+	app.Get("/models", listModels(options.loader, cm))
 	// turn off any process that was started by GRPC if the context is canceled
 	go func() {
 		<-options.Context.Done()
 		log.Debug().Msgf("Context canceled, shutting down")
 		options.Loader.StopGRPC()
 	}()
 	return app, nil
 }
--- a/api/api_test.go
+++ b/api/api_test.go
@ -3,11 +3,8 @@ package api_test
 import (
 	"bytes"
 	"context"
 	"embed"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"net/http"
 	"os"
@ -15,10 +12,7 @@ import (
 	"runtime"
 	. "github.com/go-skynet/LocalAI/api"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@ -26,11 +20,9 @@ import (
 	openaigo "github.com/otiai10/openaigo"
 	"github.com/sashabaranov/go-openai"
 	"github.com/sashabaranov/go-openai/jsonschema"
 )
 type modelApplyRequest struct {
 	ID        string            `json:"id"`
 	URL       string            `json:"url"`
 	Name      string            `json:"name"`
 	Overrides map[string]string `json:"overrides"`
@ -59,15 +51,6 @@ func getModelStatus(url string) (response map[string]interface{}) {
 	}
 	return
 }
 func getModels(url string) (response []gallery.GalleryModel) {
 	utils.GetURI(url, func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
 	return
 }
 func postModelApplyRequest(url string, request modelApplyRequest) (response map[string]interface{}) {
 	//url := "http://localhost:AI/models/apply"
@ -112,9 +95,6 @@ func postModelApplyRequest(url string, request modelApplyRequest) (response map[
 	return
 }
 //go:embed backend-assets/*
 var backendAssets embed.FS
 var _ = Describe("API test", func() {
 	var app *fiber.App
@ -125,11 +105,6 @@ var _ = Describe("API test", func() {
 	var cancel context.CancelFunc
 	var tmpdir string
 	commonOpts := []options.AppOption{
 		options.WithDebug(true),
 		options.WithDisableMessage(true),
 	}
 	Context("API with ephemeral models", func() {
 		BeforeEach(func() {
 			var err error
@ -139,35 +114,7 @@ var _ = Describe("API test", func() {
 			modelLoader = model.NewModelLoader(tmpdir)
 			c, cancel = context.WithCancel(context.Background())
-			g := []gallery.GalleryModel{
+			app, err = App(WithContext(c), WithModelLoader(modelLoader))
 				{
 					Name: "bert",
 					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
 				},
 				{
 					Name:            "bert2",
 					URL:             "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
 					Overrides:       map[string]interface{}{"foo": "bar"},
 					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
 				},
 			}
 			out, err := yaml.Marshal(g)
 			Expect(err).ToNot(HaveOccurred())
 			err = ioutil.WriteFile(filepath.Join(tmpdir, "gallery_simple.yaml"), out, 0644)
 			Expect(err).ToNot(HaveOccurred())
 			galleries := []gallery.Gallery{
 				{
 					Name: "test",
 					URL:  "file://" + filepath.Join(tmpdir, "gallery_simple.yaml"),
 				},
 			}
 			app, err = App(
 				append(commonOpts,
 					options.WithContext(c),
 					options.WithGalleries(galleries),
 					options.WithModelLoader(modelLoader), options.WithBackendAssets(backendAssets), options.WithBackendAssetsOutput(tmpdir))...)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
@ -192,53 +139,6 @@ var _ = Describe("API test", func() {
 		})
 		Context("Applying models", func() {
 			It("applies models from a gallery", func() {
 				models := getModels("http://127.0.0.1:9090/models/available")
 				Expect(len(models)).To(Equal(2), fmt.Sprint(models))
 				Expect(models[0].Installed).To(BeFalse(), fmt.Sprint(models))
 				Expect(models[1].Installed).To(BeFalse(), fmt.Sprint(models))
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
 					ID: "test@bert2",
 				})
 				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
 				uuid := response["uuid"].(string)
 				resp := map[string]interface{}{}
 				Eventually(func() bool {
 					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 					fmt.Println(response)
 					resp = response
 					return response["processed"].(bool)
 				}, "360s", "10s").Should(Equal(true))
 				Expect(resp["message"]).ToNot(ContainSubstring("error"))
 				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert2.yaml"))
 				Expect(err).ToNot(HaveOccurred())
 				_, err = os.ReadFile(filepath.Join(tmpdir, "foo.yaml"))
 				Expect(err).ToNot(HaveOccurred())
 				content := map[string]interface{}{}
 				err = yaml.Unmarshal(dat, &content)
 				Expect(err).ToNot(HaveOccurred())
 				Expect(content["backend"]).To(Equal("bert-embeddings"))
 				Expect(content["foo"]).To(Equal("bar"))
 				models = getModels("http://127.0.0.1:9090/models/available")
 				Expect(len(models)).To(Equal(2), fmt.Sprint(models))
 				Expect(models[0].Name).To(Or(Equal("bert"), Equal("bert2")))
 				Expect(models[1].Name).To(Or(Equal("bert"), Equal("bert2")))
 				for _, m := range models {
 					if m.Name == "bert2" {
 						Expect(m.Installed).To(BeTrue())
 					} else {
 						Expect(m.Installed).To(BeFalse())
 					}
 				}
 			})
 			It("overrides models", func() {
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
 					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
@ -254,8 +154,9 @@ var _ = Describe("API test", func() {
 				Eventually(func() bool {
 					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 					fmt.Println(response)
 					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
+				}, "360s").Should(Equal(true))
 				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
 				Expect(err).ToNot(HaveOccurred())
@ -278,8 +179,9 @@ var _ = Describe("API test", func() {
 				Eventually(func() bool {
 					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 					fmt.Println(response)
 					return response["processed"].(bool)
-				}, "360s", "10s").Should(Equal(true))
+				}, "360s").Should(Equal(true))
 				dat, err := os.ReadFile(filepath.Join(tmpdir, "bert.yaml"))
 				Expect(err).ToNot(HaveOccurred())
@ -289,215 +191,6 @@ var _ = Describe("API test", func() {
 				Expect(err).ToNot(HaveOccurred())
 				Expect(content["backend"]).To(Equal("bert-embeddings"))
 			})
 			It("runs openllama", Label("llama"), func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
 				}
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
 					URL:       "github:go-skynet/model-gallery/openllama_3b.yaml",
 					Name:      "openllama_3b",
 					Overrides: map[string]string{"backend": "llama-grammar"},
 				})
 				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
 				uuid := response["uuid"].(string)
 				Eventually(func() bool {
 					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 					return response["processed"].(bool)
 				}, "360s", "10s").Should(Equal(true))
 				By("testing completion")
 				resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "openllama_3b", Prompt: "Count up to five: one, two, three, four, "})
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp.Choices)).To(Equal(1))
 				Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
 				By("testing functions")
 				resp2, err := client.CreateChatCompletion(
 					context.TODO(),
 					openai.ChatCompletionRequest{
 						Model: "openllama_3b",
 						Messages: []openai.ChatCompletionMessage{
 							{
 								Role:    "user",
 								Content: "What is the weather like in San Francisco (celsius)?",
 							},
 						},
 						Functions: []openai.FunctionDefinition{
 							openai.FunctionDefinition{
 								Name:        "get_current_weather",
 								Description: "Get the current weather",
 								Parameters: jsonschema.Definition{
 									Type: jsonschema.Object,
 									Properties: map[string]jsonschema.Definition{
 										"location": {
 											Type:        jsonschema.String,
 											Description: "The city and state, e.g. San Francisco, CA",
 										},
 										"unit": {
 											Type: jsonschema.String,
 											Enum: []string{"celcius", "fahrenheit"},
 										},
 									},
 									Required: []string{"location"},
 								},
 							},
 						},
 					})
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp2.Choices)).To(Equal(1))
 				Expect(resp2.Choices[0].Message.FunctionCall).ToNot(BeNil())
 				Expect(resp2.Choices[0].Message.FunctionCall.Name).To(Equal("get_current_weather"), resp2.Choices[0].Message.FunctionCall.Name)
 				var res map[string]string
 				err = json.Unmarshal([]byte(resp2.Choices[0].Message.FunctionCall.Arguments), &res)
 				Expect(err).ToNot(HaveOccurred())
 				Expect(res["location"]).To(Equal("San Francisco"), fmt.Sprint(res))
 				Expect(res["unit"]).To(Equal("celcius"), fmt.Sprint(res))
 				Expect(string(resp2.Choices[0].FinishReason)).To(Equal("function_call"), fmt.Sprint(resp2.Choices[0].FinishReason))
 			})
 			It("runs gpt4all", Label("gpt4all"), func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
 				}
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
 					URL:       "github:go-skynet/model-gallery/gpt4all-j.yaml",
 					Name:      "gpt4all-j",
 					Overrides: map[string]string{},
 				})
 				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
 				uuid := response["uuid"].(string)
 				Eventually(func() bool {
 					response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 					return response["processed"].(bool)
 				}, "360s", "10s").Should(Equal(true))
 				resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "gpt4all-j", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "How are you?"}}})
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp.Choices)).To(Equal(1))
 				Expect(resp.Choices[0].Message.Content).To(ContainSubstring("well"))
 			})
 		})
 	})
 	Context("Model gallery", func() {
 		BeforeEach(func() {
 			var err error
 			tmpdir, err = os.MkdirTemp("", "")
 			Expect(err).ToNot(HaveOccurred())
 			modelLoader = model.NewModelLoader(tmpdir)
 			c, cancel = context.WithCancel(context.Background())
 			galleries := []gallery.Gallery{
 				{
 					Name: "model-gallery",
 					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml",
 				},
 			}
 			app, err = App(
 				append(commonOpts,
 					options.WithContext(c),
 					options.WithAudioDir(tmpdir),
 					options.WithImageDir(tmpdir),
 					options.WithGalleries(galleries),
 					options.WithModelLoader(modelLoader),
 					options.WithBackendAssets(backendAssets),
 					options.WithBackendAssetsOutput(tmpdir))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
 			defaultConfig := openai.DefaultConfig("")
 			defaultConfig.BaseURL = "http://127.0.0.1:9090/v1"
 			client2 = openaigo.NewClient("")
 			client2.BaseURL = defaultConfig.BaseURL
 			// Wait for API to be ready
 			client = openai.NewClientWithConfig(defaultConfig)
 			Eventually(func() error {
 				_, err := client.ListModels(context.TODO())
 				return err
 			}, "2m").ShouldNot(HaveOccurred())
 		})
 		AfterEach(func() {
 			cancel()
 			app.Shutdown()
 			os.RemoveAll(tmpdir)
 		})
 		It("installs and is capable to run tts", Label("tts"), func() {
 			if runtime.GOOS != "linux" {
 				Skip("test supported only on linux")
 			}
 			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
 				ID: "model-gallery@voice-en-us-kathleen-low",
 			})
 			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
 			uuid := response["uuid"].(string)
 			Eventually(func() bool {
 				response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 				fmt.Println(response)
 				return response["processed"].(bool)
 			}, "360s", "10s").Should(Equal(true))
 			// An HTTP Post to the /tts endpoint should return a wav audio file
 			resp, err := http.Post("http://127.0.0.1:9090/tts", "application/json", bytes.NewBuffer([]byte(`{"input": "Hello world", "model": "en-us-kathleen-low.onnx"}`)))
 			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
 			dat, err := io.ReadAll(resp.Body)
 			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
 			Expect(resp.StatusCode).To(Equal(200), fmt.Sprint(string(dat)))
 			Expect(resp.Header.Get("Content-Type")).To(Equal("audio/x-wav"))
 		})
 		It("installs and is capable to generate images", Label("stablediffusion"), func() {
 			if runtime.GOOS != "linux" {
 				Skip("test supported only on linux")
 			}
 			response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
 				ID: "model-gallery@stablediffusion",
 			})
 			Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
 			uuid := response["uuid"].(string)
 			Eventually(func() bool {
 				response := getModelStatus("http://127.0.0.1:9090/models/jobs/" + uuid)
 				fmt.Println(response)
 				return response["processed"].(bool)
 			}, "360s", "10s").Should(Equal(true))
 			resp, err := http.Post(
 				"http://127.0.0.1:9090/v1/images/generations",
 				"application/json",
 				bytes.NewBuffer([]byte(`{
 					 			"prompt": "floating hair, portrait, ((loli)), ((one girl)), cute face, hidden hands, asymmetrical bangs, beautiful detailed eyes, eye shadow, hair ornament, ribbons, bowties, buttons, pleated skirt, (((masterpiece))), ((best quality)), colorful|((part of the head)), ((((mutated hands and fingers)))), deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, Octane renderer, lowres, bad anatomy, bad hands, text",
 								"mode": 2,  "seed":9000,
 					 			"size": "256x256", "n":2}`)))
 			// The response should contain an URL
 			Expect(err).ToNot(HaveOccurred(), fmt.Sprint(resp))
 			dat, err := io.ReadAll(resp.Body)
 			Expect(err).ToNot(HaveOccurred(), string(dat))
 			Expect(string(dat)).To(ContainSubstring("http://127.0.0.1:9090/"), string(dat))
 			Expect(string(dat)).To(ContainSubstring(".png"), string(dat))
 		})
 	})
@ -507,12 +200,7 @@ var _ = Describe("API test", func() {
 			c, cancel = context.WithCancel(context.Background())
 			var err error
-			app, err = App(
+			app, err = App(WithContext(c), WithModelLoader(modelLoader))
 				append(commonOpts,
 					options.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")),
 					options.WithContext(c),
 					options.WithModelLoader(modelLoader),
 				)...)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
@ -536,7 +224,7 @@ var _ = Describe("API test", func() {
 		It("returns the models list", func() {
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(Equal(11))
+			Expect(len(models.Models)).To(Equal(10))
 		})
 		It("can generate completions", func() {
 			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel", Prompt: "abcdedfghikl"})
@ -567,10 +255,9 @@ var _ = Describe("API test", func() {
 		})
 		It("returns errors", func() {
 			backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
 			_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
 			Expect(err).To(HaveOccurred())
-			Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
+			Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 12 errors occurred:"))
 		})
 		It("transcribes audio", func() {
 			if runtime.GOOS != "linux" {
@ -614,98 +301,15 @@ var _ = Describe("API test", func() {
 			Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
 		})
 		Context("External gRPC calls", func() {
 			It("calculate embeddings with huggingface", func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
 				}
 				resp, err := client.CreateEmbeddings(
 					context.Background(),
 					openai.EmbeddingRequest{
 						Model: openai.AdaCodeSearchCode,
 						Input: []string{"sun", "cat"},
 					},
 				)
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
 				Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
 				sunEmbedding := resp.Data[0].Embedding
 				resp2, err := client.CreateEmbeddings(
 					context.Background(),
 					openai.EmbeddingRequest{
 						Model: openai.AdaCodeSearchCode,
 						Input: []string{"sun"},
 					},
 				)
 				Expect(err).ToNot(HaveOccurred())
 				Expect(resp2.Data[0].Embedding).To(Equal(sunEmbedding))
 				Expect(resp2.Data[0].Embedding).ToNot(Equal(resp.Data[1].Embedding))
 			})
 		})
 		Context("backends", func() {
-			It("runs rwkv completion", func() {
+			It("runs rwkv", func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
 				}
 				resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,"})
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp.Choices) > 0).To(BeTrue())
-				Expect(resp.Choices[0].Text).To(ContainSubstring("five"))
+				Expect(resp.Choices[0].Text).To(Equal(" five."))
 				stream, err := client.CreateCompletionStream(context.TODO(), openai.CompletionRequest{
 					Model: "rwkv_test", Prompt: "Count up to five: one, two, three, four,", Stream: true,
 				})
 				Expect(err).ToNot(HaveOccurred())
 				defer stream.Close()
 				tokens := 0
 				text := ""
 				for {
 					response, err := stream.Recv()
 					if errors.Is(err, io.EOF) {
 						break
 					}
 					Expect(err).ToNot(HaveOccurred())
 					text += response.Choices[0].Text
 					tokens++
 				}
 				Expect(text).ToNot(BeEmpty())
 				Expect(text).To(ContainSubstring("five"))
 				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
 			})
 			It("runs rwkv chat completion", func() {
 				if runtime.GOOS != "linux" {
 					Skip("test supported only on linux")
 				}
 				resp, err := client.CreateChatCompletion(context.TODO(),
 					openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp.Choices) > 0).To(BeTrue())
 				Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
 				stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
 				Expect(err).ToNot(HaveOccurred())
 				defer stream.Close()
 				tokens := 0
 				text := ""
 				for {
 					response, err := stream.Recv()
 					if errors.Is(err, io.EOF) {
 						break
 					}
 					Expect(err).ToNot(HaveOccurred())
 					text += response.Choices[0].Delta.Content
 					tokens++
 				}
 				Expect(text).ToNot(BeEmpty())
 				Expect(text).To(Or(ContainSubstring("Sure"), ContainSubstring("five")))
 				Expect(tokens).ToNot(Or(Equal(1), Equal(0)))
 			})
 		})
 	})
@ -716,12 +320,7 @@ var _ = Describe("API test", func() {
 			c, cancel = context.WithCancel(context.Background())
 			var err error
-			app, err = App(
+			app, err = App(WithContext(c), WithModelLoader(modelLoader), WithConfigFile(os.Getenv("CONFIG_FILE")))
 				append(commonOpts,
 					options.WithContext(c),
 					options.WithModelLoader(modelLoader),
 					options.WithConfigFile(os.Getenv("CONFIG_FILE")))...,
 			)
 			Expect(err).ToNot(HaveOccurred())
 			go app.Listen("127.0.0.1:9090")
@ -743,7 +342,7 @@ var _ = Describe("API test", func() {
 		It("can generate chat completions from config file", func() {
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(Equal(13))
+			Expect(len(models.Models)).To(Equal(12))
 		})
 		It("can generate chat completions from config file", func() {
 			resp, err := client.CreateChatCompletion(context.TODO(), openai.ChatCompletionRequest{Model: "list1", Messages: []openai.ChatCompletionMessage{openai.ChatCompletionMessage{Role: "user", Content: "abcdedfghikl"}}})
--- a/api/backend/embeddings.go
+++ b/api/backend/embeddings.go
@ -1,109 +0,0 @@
 package backend
 import (
 	"fmt"
 	"sync"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )
 func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c config.Config, o *options.Option) (func() ([]float32, error), error) {
 	if !c.Embeddings {
 		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
 	}
 	modelFile := c.Model
 	grpcOpts := gRPCModelOpts(c)
 	var inferenceModel interface{}
 	var err error
 	opts := []model.Option{
 		model.WithLoadGRPCLLMModelOpts(grpcOpts),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithModelFile(modelFile),
 		model.WithContext(o.Context),
 	}
 	for k, v := range o.ExternalGRPCBackends {
 		opts = append(opts, model.WithExternalBackend(k, v))
 	}
 	if c.Backend == "" {
 		inferenceModel, err = loader.GreedyLoader(opts...)
 	} else {
 		opts = append(opts, model.WithBackendString(c.Backend))
 		inferenceModel, err = loader.BackendLoader(opts...)
 	}
 	if err != nil {
 		return nil, err
 	}
 	var fn func() ([]float32, error)
 	switch model := inferenceModel.(type) {
 	case *grpc.Client:
 		fn = func() ([]float32, error) {
 			predictOptions := gRPCPredictOpts(c, loader.ModelPath)
 			if len(tokens) > 0 {
 				embeds := []int32{}
 				for _, t := range tokens {
 					embeds = append(embeds, int32(t))
 				}
 				predictOptions.EmbeddingTokens = embeds
 				res, err := model.Embeddings(o.Context, predictOptions)
 				if err != nil {
 					return nil, err
 				}
 				return res.Embeddings, nil
 			}
 			predictOptions.Embeddings = s
 			res, err := model.Embeddings(o.Context, predictOptions)
 			if err != nil {
 				return nil, err
 			}
 			return res.Embeddings, nil
 		}
 	default:
 		fn = func() ([]float32, error) {
 			return nil, fmt.Errorf("embeddings not supported by the backend")
 		}
 	}
 	return func() ([]float32, error) {
 		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 		mutexMap.Lock()
 		l, ok := mutexes[modelFile]
 		if !ok {
 			m := &sync.Mutex{}
 			mutexes[modelFile] = m
 			l = m
 		}
 		mutexMap.Unlock()
 		l.Lock()
 		defer l.Unlock()
 		embeds, err := fn()
 		if err != nil {
 			return embeds, err
 		}
 		// Remove trailing 0s
 		for i := len(embeds) - 1; i >= 0; i-- {
 			if embeds[i] == 0.0 {
 				embeds = embeds[:i]
 			} else {
 				break
 			}
 		}
 		return embeds, nil
 	}, nil
 }
--- a/api/backend/image.go
+++ b/api/backend/image.go
@ -1,68 +0,0 @@
 package backend
 import (
 	"fmt"
 	"sync"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )
 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c config.Config, o *options.Option) (func() error, error) {
 	if c.Backend != model.StableDiffusionBackend {
 		return nil, fmt.Errorf("endpoint only working with stablediffusion models")
 	}
 	opts := []model.Option{
 		model.WithBackendString(c.Backend),
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithContext(o.Context),
 		model.WithModelFile(c.ImageGenerationAssets),
 	}
 	for k, v := range o.ExternalGRPCBackends {
 		opts = append(opts, model.WithExternalBackend(k, v))
 	}
 	inferenceModel, err := loader.BackendLoader(
 		opts...,
 	)
 	if err != nil {
 		return nil, err
 	}
 	fn := func() error {
 		_, err := inferenceModel.GenerateImage(
 			o.Context,
 			&proto.GenerateImageRequest{
 				Height:         int32(height),
 				Width:          int32(width),
 				Mode:           int32(mode),
 				Step:           int32(step),
 				Seed:           int32(seed),
 				PositivePrompt: positive_prompt,
 				NegativePrompt: negative_prompt,
 				Dst:            dst,
 			})
 		return err
 	}
 	return func() error {
 		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 		mutexMap.Lock()
 		l, ok := mutexes[c.Backend]
 		if !ok {
 			m := &sync.Mutex{}
 			mutexes[c.Backend] = m
 			l = m
 		}
 		mutexMap.Unlock()
 		l.Lock()
 		defer l.Unlock()
 		return fn()
 	}, nil
 }
--- a/api/backend/llm.go
+++ b/api/backend/llm.go
@ -1,124 +0,0 @@
 package backend
 import (
 	"os"
 	"regexp"
 	"strings"
 	"sync"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/grpc"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 )
 func ModelInference(s string, loader *model.ModelLoader, c config.Config, o *options.Option, tokenCallback func(string) bool) (func() (string, error), error) {
 	modelFile := c.Model
 	grpcOpts := gRPCModelOpts(c)
 	var inferenceModel *grpc.Client
 	var err error
 	opts := []model.Option{
 		model.WithLoadGRPCLLMModelOpts(grpcOpts),
 		model.WithThreads(uint32(c.Threads)), // some models uses this to allocate threads during startup
 		model.WithAssetDir(o.AssetsDestination),
 		model.WithModelFile(modelFile),
 		model.WithContext(o.Context),
 	}
 	for k, v := range o.ExternalGRPCBackends {
 		opts = append(opts, model.WithExternalBackend(k, v))
 	}
 	if c.Backend != "" {
 		opts = append(opts, model.WithBackendString(c.Backend))
 	}
 	// Check if the modelFile exists, if it doesn't try to load it from the gallery
 	if o.AutoloadGalleries { // experimental
 		if _, err := os.Stat(modelFile); os.IsNotExist(err) {
 			utils.ResetDownloadTimers()
 			// if we failed to load the model, we try to download it
 			err := gallery.InstallModelFromGalleryByName(o.Galleries, modelFile, loader.ModelPath, gallery.GalleryModel{}, utils.DisplayDownloadFunction)
 			if err != nil {
 				return nil, err
 			}
 		}
 	}
 	if c.Backend == "" {
 		inferenceModel, err = loader.GreedyLoader(opts...)
 	} else {
 		inferenceModel, err = loader.BackendLoader(opts...)
 	}
 	if err != nil {
 		return nil, err
 	}
 	// in GRPC, the backend is supposed to answer to 1 single token if stream is not supported
 	fn := func() (string, error) {
 		opts := gRPCPredictOpts(c, loader.ModelPath)
 		opts.Prompt = s
 		if tokenCallback != nil {
 			ss := ""
 			err := inferenceModel.PredictStream(o.Context, opts, func(s string) {
 				tokenCallback(s)
 				ss += s
 			})
 			return ss, err
 		} else {
 			reply, err := inferenceModel.Predict(o.Context, opts)
 			if err != nil {
 				return "", err
 			}
 			return reply.Message, err
 		}
 	}
 	return func() (string, error) {
 		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 		mutexMap.Lock()
 		l, ok := mutexes[modelFile]
 		if !ok {
 			m := &sync.Mutex{}
 			mutexes[modelFile] = m
 			l = m
 		}
 		mutexMap.Unlock()
 		l.Lock()
 		defer l.Unlock()
 		return fn()
 	}, nil
 }
 var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
 var mu sync.Mutex = sync.Mutex{}
 func Finetune(config config.Config, input, prediction string) string {
 	if config.Echo {
 		prediction = input + prediction
 	}
 	for _, c := range config.Cutstrings {
 		mu.Lock()
 		reg, ok := cutstrings[c]
 		if !ok {
 			cutstrings[c] = regexp.MustCompile(c)
 			reg = cutstrings[c]
 		}
 		mu.Unlock()
 		prediction = reg.ReplaceAllString(prediction, "")
 	}
 	for _, c := range config.TrimSpace {
 		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
 	}
 	return prediction
 }
--- a/api/backend/lock.go
+++ b/api/backend/lock.go
@ -1,22 +0,0 @@
 package backend
 import "sync"
 // mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 var mutexMap sync.Mutex
 var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
 func Lock(s string) *sync.Mutex {
 	// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 	mutexMap.Lock()
 	l, ok := mutexes[s]
 	if !ok {
 		m := &sync.Mutex{}
 		mutexes[s] = m
 		l = m
 	}
 	mutexMap.Unlock()
 	l.Lock()
 	return l
 }
--- a/api/backend/options.go
+++ b/api/backend/options.go
@ -1,72 +0,0 @@
 package backend
 import (
 	"os"
 	"path/filepath"
 	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	config "github.com/go-skynet/LocalAI/api/config"
 )
 func gRPCModelOpts(c config.Config) *pb.ModelOptions {
 	b := 512
 	if c.Batch != 0 {
 		b = c.Batch
 	}
 	return &pb.ModelOptions{
 		ContextSize: int32(c.ContextSize),
 		Seed:        int32(c.Seed),
 		NBatch:      int32(b),
 		F16Memory:   c.F16,
 		MLock:       c.MMlock,
 		NUMA:        c.NUMA,
 		Embeddings:  c.Embeddings,
 		LowVRAM:     c.LowVRAM,
 		NGPULayers:  int32(c.NGPULayers),
 		MMap:        c.MMap,
 		MainGPU:     c.MainGPU,
 		Threads:     int32(c.Threads),
 		TensorSplit: c.TensorSplit,
 	}
 }
 func gRPCPredictOpts(c config.Config, modelPath string) *pb.PredictOptions {
 	promptCachePath := ""
 	if c.PromptCachePath != "" {
 		p := filepath.Join(modelPath, c.PromptCachePath)
 		os.MkdirAll(filepath.Dir(p), 0755)
 		promptCachePath = p
 	}
 	return &pb.PredictOptions{
 		Temperature:     float32(c.Temperature),
 		TopP:            float32(c.TopP),
 		TopK:            int32(c.TopK),
 		Tokens:          int32(c.Maxtokens),
 		Threads:         int32(c.Threads),
 		PromptCacheAll:  c.PromptCacheAll,
 		PromptCacheRO:   c.PromptCacheRO,
 		PromptCachePath: promptCachePath,
 		F16KV:           c.F16,
 		DebugMode:       c.Debug,
 		Grammar:         c.Grammar,
 		Mirostat:          int32(c.Mirostat),
 		MirostatETA:       float32(c.MirostatETA),
 		MirostatTAU:       float32(c.MirostatTAU),
 		Debug:             c.Debug,
 		StopPrompts:       c.StopWords,
 		Repeat:            int32(c.RepeatPenalty),
 		NKeep:             int32(c.Keep),
 		Batch:             int32(c.Batch),
 		IgnoreEOS:         c.IgnoreEOS,
 		Seed:              int32(c.Seed),
 		FrequencyPenalty:  float32(c.FrequencyPenalty),
 		MLock:             c.MMlock,
 		MMap:              c.MMap,
 		MainGPU:           c.MainGPU,
 		TensorSplit:       c.TensorSplit,
 		TailFreeSamplingZ: float32(c.TFZ),
 		TypicalP:          float32(c.TypicalP),
 	}
 }
--- a/api/backend/transcript.go
+++ b/api/backend/transcript.go
@ -1,42 +0,0 @@
 package backend
 import (
 	"context"
 	"fmt"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )
 func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) {
 	opts := []model.Option{
 		model.WithBackendString(model.WhisperBackend),
 		model.WithModelFile(c.Model),
 		model.WithContext(o.Context),
 		model.WithThreads(uint32(c.Threads)),
 		model.WithAssetDir(o.AssetsDestination),
 	}
 	for k, v := range o.ExternalGRPCBackends {
 		opts = append(opts, model.WithExternalBackend(k, v))
 	}
 	whisperModel, err := o.Loader.BackendLoader(opts...)
 	if err != nil {
 		return nil, err
 	}
 	if whisperModel == nil {
 		return nil, fmt.Errorf("could not load whisper model")
 	}
 	return whisperModel.AudioTranscription(context.Background(), &proto.TranscriptRequest{
 		Dst:      audio,
 		Language: language,
 		Threads:  uint32(c.Threads),
 	})
 }
--- a/api/backend/tts.go
+++ b/api/backend/tts.go
@ -1,72 +0,0 @@
 package backend
 import (
 	"context"
 	"fmt"
 	"os"
 	"path/filepath"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grpc/proto"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 )
 func generateUniqueFileName(dir, baseName, ext string) string {
 	counter := 1
 	fileName := baseName + ext
 	for {
 		filePath := filepath.Join(dir, fileName)
 		_, err := os.Stat(filePath)
 		if os.IsNotExist(err) {
 			return fileName
 		}
 		counter++
 		fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
 	}
 }
 func ModelTTS(text, modelFile string, loader *model.ModelLoader, o *options.Option) (string, *proto.Result, error) {
 	opts := []model.Option{
 		model.WithBackendString(model.PiperBackend),
 		model.WithModelFile(modelFile),
 		model.WithContext(o.Context),
 		model.WithAssetDir(o.AssetsDestination),
 	}
 	for k, v := range o.ExternalGRPCBackends {
 		opts = append(opts, model.WithExternalBackend(k, v))
 	}
 	piperModel, err := o.Loader.BackendLoader(opts...)
 	if err != nil {
 		return "", nil, err
 	}
 	if piperModel == nil {
 		return "", nil, fmt.Errorf("could not load piper model")
 	}
 	if err := os.MkdirAll(o.AudioDir, 0755); err != nil {
 		return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
 	}
 	fileName := generateUniqueFileName(o.AudioDir, "piper", ".wav")
 	filePath := filepath.Join(o.AudioDir, fileName)
 	modelPath := filepath.Join(o.Loader.ModelPath, modelFile)
 	if err := utils.VerifyPath(modelPath, o.Loader.ModelPath); err != nil {
 		return "", nil, err
 	}
 	res, err := piperModel.TTS(context.Background(), &proto.TTSRequest{
 		Text:  text,
 		Model: modelPath,
 		Dst:   filePath,
 	})
 	return filePath, res, err
 }
--- a/api/config.go
+++ b/api/config.go
@ -0,0 +1,333 @@
 package api
 import (
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v3"
 )
 type Config struct {
 	OpenAIRequest         `yaml:"parameters"`
 	Name                  string            `yaml:"name"`
 	StopWords             []string          `yaml:"stopwords"`
 	Cutstrings            []string          `yaml:"cutstrings"`
 	TrimSpace             []string          `yaml:"trimspace"`
 	ContextSize           int               `yaml:"context_size"`
 	F16                   bool              `yaml:"f16"`
 	Threads               int               `yaml:"threads"`
 	Debug                 bool              `yaml:"debug"`
 	Roles                 map[string]string `yaml:"roles"`
 	Embeddings            bool              `yaml:"embeddings"`
 	Backend               string            `yaml:"backend"`
 	TemplateConfig        TemplateConfig    `yaml:"template"`
 	MirostatETA           float64           `yaml:"mirostat_eta"`
 	MirostatTAU           float64           `yaml:"mirostat_tau"`
 	Mirostat              int               `yaml:"mirostat"`
 	NGPULayers            int               `yaml:"gpu_layers"`
 	ImageGenerationAssets string            `yaml:"asset_dir"`
 	PromptCachePath string `yaml:"prompt_cache_path"`
 	PromptCacheAll  bool   `yaml:"prompt_cache_all"`
 	PromptStrings, InputStrings []string
 	InputToken                  [][]int
 }
 type TemplateConfig struct {
 	Completion string `yaml:"completion"`
 	Chat       string `yaml:"chat"`
 	Edit       string `yaml:"edit"`
 }
 type ConfigMerger struct {
 	configs map[string]Config
 	sync.Mutex
 }
 func NewConfigMerger() *ConfigMerger {
 	return &ConfigMerger{
 		configs: make(map[string]Config),
 	}
 }
 func ReadConfigFile(file string) ([]*Config, error) {
 	c := &[]*Config{}
 	f, err := os.ReadFile(file)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read config file: %w", err)
 	}
 	if err := yaml.Unmarshal(f, c); err != nil {
 		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
 	}
 	return *c, nil
 }
 func ReadConfig(file string) (*Config, error) {
 	c := &Config{}
 	f, err := os.ReadFile(file)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read config file: %w", err)
 	}
 	if err := yaml.Unmarshal(f, c); err != nil {
 		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
 	}
 	return c, nil
 }
 func (cm ConfigMerger) LoadConfigFile(file string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	c, err := ReadConfigFile(file)
 	if err != nil {
 		return fmt.Errorf("cannot load config file: %w", err)
 	}
 	for _, cc := range c {
 		cm.configs[cc.Name] = *cc
 	}
 	return nil
 }
 func (cm ConfigMerger) LoadConfig(file string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	c, err := ReadConfig(file)
 	if err != nil {
 		return fmt.Errorf("cannot read config file: %w", err)
 	}
 	cm.configs[c.Name] = *c
 	return nil
 }
 func (cm ConfigMerger) GetConfig(m string) (Config, bool) {
 	cm.Lock()
 	defer cm.Unlock()
 	v, exists := cm.configs[m]
 	return v, exists
 }
 func (cm ConfigMerger) ListConfigs() []string {
 	cm.Lock()
 	defer cm.Unlock()
 	var res []string
 	for k := range cm.configs {
 		res = append(res, k)
 	}
 	return res
 }
 func (cm ConfigMerger) LoadConfigs(path string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	files, err := ioutil.ReadDir(path)
 	if err != nil {
 		return err
 	}
 	for _, file := range files {
 		// Skip templates, YAML and .keep files
 		if !strings.Contains(file.Name(), ".yaml") {
 			continue
 		}
 		c, err := ReadConfig(filepath.Join(path, file.Name()))
 		if err == nil {
 			cm.configs[c.Name] = *c
 		}
 	}
 	return nil
 }
 func updateConfig(config *Config, input *OpenAIRequest) {
 	if input.Echo {
 		config.Echo = input.Echo
 	}
 	if input.TopK != 0 {
 		config.TopK = input.TopK
 	}
 	if input.TopP != 0 {
 		config.TopP = input.TopP
 	}
 	if input.Temperature != 0 {
 		config.Temperature = input.Temperature
 	}
 	if input.Maxtokens != 0 {
 		config.Maxtokens = input.Maxtokens
 	}
 	switch stop := input.Stop.(type) {
 	case string:
 		if stop != "" {
 			config.StopWords = append(config.StopWords, stop)
 		}
 	case []interface{}:
 		for _, pp := range stop {
 			if s, ok := pp.(string); ok {
 				config.StopWords = append(config.StopWords, s)
 			}
 		}
 	}
 	if input.RepeatPenalty != 0 {
 		config.RepeatPenalty = input.RepeatPenalty
 	}
 	if input.Keep != 0 {
 		config.Keep = input.Keep
 	}
 	if input.Batch != 0 {
 		config.Batch = input.Batch
 	}
 	if input.F16 {
 		config.F16 = input.F16
 	}
 	if input.IgnoreEOS {
 		config.IgnoreEOS = input.IgnoreEOS
 	}
 	if input.Seed != 0 {
 		config.Seed = input.Seed
 	}
 	if input.Mirostat != 0 {
 		config.Mirostat = input.Mirostat
 	}
 	if input.MirostatETA != 0 {
 		config.MirostatETA = input.MirostatETA
 	}
 	if input.MirostatTAU != 0 {
 		config.MirostatTAU = input.MirostatTAU
 	}
 	switch inputs := input.Input.(type) {
 	case string:
 		if inputs != "" {
 			config.InputStrings = append(config.InputStrings, inputs)
 		}
 	case []interface{}:
 		for _, pp := range inputs {
 			switch i := pp.(type) {
 			case string:
 				config.InputStrings = append(config.InputStrings, i)
 			case []interface{}:
 				tokens := []int{}
 				for _, ii := range i {
 					tokens = append(tokens, int(ii.(float64)))
 				}
 				config.InputToken = append(config.InputToken, tokens)
 			}
 		}
 	}
 	switch p := input.Prompt.(type) {
 	case string:
 		config.PromptStrings = append(config.PromptStrings, p)
 	case []interface{}:
 		for _, pp := range p {
 			if s, ok := pp.(string); ok {
 				config.PromptStrings = append(config.PromptStrings, s)
 			}
 		}
 	}
 }
 func readInput(c *fiber.Ctx, loader *model.ModelLoader, randomModel bool) (string, *OpenAIRequest, error) {
 	input := new(OpenAIRequest)
 	// Get input data from the request body
 	if err := c.BodyParser(input); err != nil {
 		return "", nil, err
 	}
 	modelFile := input.Model
 	if c.Params("model") != "" {
 		modelFile = c.Params("model")
 	}
 	received, _ := json.Marshal(input)
 	log.Debug().Msgf("Request received: %s", string(received))
 	// Set model from bearer token, if available
 	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
 	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
 	// If no model was specified, take the first available
 	if modelFile == "" && !bearerExists && randomModel {
 		models, _ := loader.ListModels()
 		if len(models) > 0 {
 			modelFile = models[0]
 			log.Debug().Msgf("No model specified, using: %s", modelFile)
 		} else {
 			log.Debug().Msgf("No model specified, returning error")
 			return "", nil, fmt.Errorf("no model specified")
 		}
 	}
 	// If a model is found in bearer token takes precedence
 	if bearerExists {
 		log.Debug().Msgf("Using model from bearer token: %s", bearer)
 		modelFile = bearer
 	}
 	return modelFile, input, nil
 }
 func readConfig(modelFile string, input *OpenAIRequest, cm *ConfigMerger, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*Config, *OpenAIRequest, error) {
 	// Load a config file if present after the model name
 	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
 	if _, err := os.Stat(modelConfig); err == nil {
 		if err := cm.LoadConfig(modelConfig); err != nil {
 			return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
 		}
 	}
 	var config *Config
 	cfg, exists := cm.GetConfig(modelFile)
 	if !exists {
 		config = &Config{
 			OpenAIRequest: defaultRequest(modelFile),
 			ContextSize:   ctx,
 			Threads:       threads,
 			F16:           f16,
 			Debug:         debug,
 		}
 	} else {
 		config = &cfg
 	}
 	// Set the parameters for the language model prediction
 	updateConfig(config, input)
 	// Don't allow 0 as setting
 	if config.Threads == 0 {
 		if threads != 0 {
 			config.Threads = threads
 		} else {
 			config.Threads = 4
 		}
 	}
 	// Enforce debug flag if passed from CLI
 	if debug {
 		config.Debug = true
 	}
 	return config, input, nil
 }
--- a/api/config/config.go
+++ b/api/config/config.go
@ -1,209 +0,0 @@
 package api_config
 import (
 	"fmt"
 	"io/fs"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	"gopkg.in/yaml.v3"
 )
 type Config struct {
 	PredictionOptions `yaml:"parameters"`
 	Name              string            `yaml:"name"`
 	StopWords         []string          `yaml:"stopwords"`
 	Cutstrings        []string          `yaml:"cutstrings"`
 	TrimSpace         []string          `yaml:"trimspace"`
 	ContextSize       int               `yaml:"context_size"`
 	F16               bool              `yaml:"f16"`
 	NUMA              bool              `yaml:"numa"`
 	Threads           int               `yaml:"threads"`
 	Debug             bool              `yaml:"debug"`
 	Roles             map[string]string `yaml:"roles"`
 	Embeddings        bool              `yaml:"embeddings"`
 	Backend           string            `yaml:"backend"`
 	TemplateConfig    TemplateConfig    `yaml:"template"`
 	MirostatETA       float64           `yaml:"mirostat_eta"`
 	MirostatTAU       float64           `yaml:"mirostat_tau"`
 	Mirostat          int               `yaml:"mirostat"`
 	NGPULayers        int               `yaml:"gpu_layers"`
 	MMap              bool              `yaml:"mmap"`
 	MMlock            bool              `yaml:"mmlock"`
 	LowVRAM           bool              `yaml:"low_vram"`
 	TensorSplit           string `yaml:"tensor_split"`
 	MainGPU               string `yaml:"main_gpu"`
 	ImageGenerationAssets string `yaml:"asset_dir"`
 	PromptCachePath string `yaml:"prompt_cache_path"`
 	PromptCacheAll  bool   `yaml:"prompt_cache_all"`
 	PromptCacheRO   bool   `yaml:"prompt_cache_ro"`
 	Grammar string `yaml:"grammar"`
 	PromptStrings, InputStrings                []string
 	InputToken                                 [][]int
 	functionCallString, functionCallNameString string
 	FunctionsConfig Functions `yaml:"function"`
 }
 type Functions struct {
 	DisableNoAction         bool   `yaml:"disable_no_action"`
 	NoActionFunctionName    string `yaml:"no_action_function_name"`
 	NoActionDescriptionName string `yaml:"no_action_description_name"`
 }
 type TemplateConfig struct {
 	Completion string `yaml:"completion"`
 	Functions  string `yaml:"function"`
 	Chat       string `yaml:"chat"`
 	Edit       string `yaml:"edit"`
 }
 type ConfigLoader struct {
 	configs map[string]Config
 	sync.Mutex
 }
 func (c *Config) SetFunctionCallString(s string) {
 	c.functionCallString = s
 }
 func (c *Config) SetFunctionCallNameString(s string) {
 	c.functionCallNameString = s
 }
 func (c *Config) ShouldUseFunctions() bool {
 	return ((c.functionCallString != "none" || c.functionCallString == "") || c.ShouldCallSpecificFunction())
 }
 func (c *Config) ShouldCallSpecificFunction() bool {
 	return len(c.functionCallNameString) > 0
 }
 func (c *Config) FunctionToCall() string {
 	return c.functionCallNameString
 }
 func defaultPredictOptions(modelFile string) PredictionOptions {
 	return PredictionOptions{
 		TopP:        0.7,
 		TopK:        80,
 		Maxtokens:   512,
 		Temperature: 0.9,
 		Model:       modelFile,
 	}
 }
 func DefaultConfig(modelFile string) *Config {
 	return &Config{
 		PredictionOptions: defaultPredictOptions(modelFile),
 	}
 }
 func NewConfigLoader() *ConfigLoader {
 	return &ConfigLoader{
 		configs: make(map[string]Config),
 	}
 }
 func ReadConfigFile(file string) ([]*Config, error) {
 	c := &[]*Config{}
 	f, err := os.ReadFile(file)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read config file: %w", err)
 	}
 	if err := yaml.Unmarshal(f, c); err != nil {
 		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
 	}
 	return *c, nil
 }
 func ReadConfig(file string) (*Config, error) {
 	c := &Config{}
 	f, err := os.ReadFile(file)
 	if err != nil {
 		return nil, fmt.Errorf("cannot read config file: %w", err)
 	}
 	if err := yaml.Unmarshal(f, c); err != nil {
 		return nil, fmt.Errorf("cannot unmarshal config file: %w", err)
 	}
 	return c, nil
 }
 func (cm *ConfigLoader) LoadConfigFile(file string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	c, err := ReadConfigFile(file)
 	if err != nil {
 		return fmt.Errorf("cannot load config file: %w", err)
 	}
 	for _, cc := range c {
 		cm.configs[cc.Name] = *cc
 	}
 	return nil
 }
 func (cm *ConfigLoader) LoadConfig(file string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	c, err := ReadConfig(file)
 	if err != nil {
 		return fmt.Errorf("cannot read config file: %w", err)
 	}
 	cm.configs[c.Name] = *c
 	return nil
 }
 func (cm *ConfigLoader) GetConfig(m string) (Config, bool) {
 	cm.Lock()
 	defer cm.Unlock()
 	v, exists := cm.configs[m]
 	return v, exists
 }
 func (cm *ConfigLoader) ListConfigs() []string {
 	cm.Lock()
 	defer cm.Unlock()
 	var res []string
 	for k := range cm.configs {
 		res = append(res, k)
 	}
 	return res
 }
 func (cm *ConfigLoader) LoadConfigs(path string) error {
 	cm.Lock()
 	defer cm.Unlock()
 	entries, err := os.ReadDir(path)
 	if err != nil {
 		return err
 	}
 	files := make([]fs.FileInfo, 0, len(entries))
 	for _, entry := range entries {
 		info, err := entry.Info()
 		if err != nil {
 			return err
 		}
 		files = append(files, info)
 	}
 	for _, file := range files {
 		// Skip templates, YAML and .keep files
 		if !strings.Contains(file.Name(), ".yaml") {
 			continue
 		}
 		c, err := ReadConfig(filepath.Join(path, file.Name()))
 		if err == nil {
 			cm.configs[c.Name] = *c
 		}
 	}
 	return nil
 }
--- a/api/config/config_test.go
+++ b/api/config/config_test.go
@ -1,56 +0,0 @@
 package api_config_test
 import (
 	"os"
 	. "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/model"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 var _ = Describe("Test cases for config related functions", func() {
 	var (
 		configFile string
 	)
 	Context("Test Read configuration functions", func() {
 		configFile = os.Getenv("CONFIG_FILE")
 		It("Test ReadConfigFile", func() {
 			config, err := ReadConfigFile(configFile)
 			Expect(err).To(BeNil())
 			Expect(config).ToNot(BeNil())
 			// two configs in config.yaml
 			Expect(config[0].Name).To(Equal("list1"))
 			Expect(config[1].Name).To(Equal("list2"))
 		})
 		It("Test LoadConfigs", func() {
 			cm := NewConfigLoader()
 			opts := options.NewOptions()
 			modelLoader := model.NewModelLoader(os.Getenv("MODELS_PATH"))
 			options.WithModelLoader(modelLoader)(opts)
 			err := cm.LoadConfigs(opts.Loader.ModelPath)
 			Expect(err).To(BeNil())
 			Expect(cm.ListConfigs()).ToNot(BeNil())
 			// config should includes gpt4all models's api.config
 			Expect(cm.ListConfigs()).To(ContainElements("gpt4all"))
 			// config should includes gpt2 models's api.config
 			Expect(cm.ListConfigs()).To(ContainElements("gpt4all-2"))
 			// config should includes text-embedding-ada-002 models's api.config
 			Expect(cm.ListConfigs()).To(ContainElements("text-embedding-ada-002"))
 			// config should includes rwkv_test models's api.config
 			Expect(cm.ListConfigs()).To(ContainElements("rwkv_test"))
 			// config should includes whisper-1 models's api.config
 			Expect(cm.ListConfigs()).To(ContainElements("whisper-1"))
 		})
 	})
 })
--- a/api/config/prediction.go
+++ b/api/config/prediction.go
@ -1,37 +0,0 @@
 package api_config
 type PredictionOptions struct {
 	// Also part of the OpenAI official spec
 	Model string `json:"model" yaml:"model"`
 	// Also part of the OpenAI official spec
 	Language string `json:"language"`
 	// Also part of the OpenAI official spec. use it for returning multiple results
 	N int `json:"n"`
 	// Common options between all the API calls, part of the OpenAI spec
 	TopP        float64 `json:"top_p" yaml:"top_p"`
 	TopK        int     `json:"top_k" yaml:"top_k"`
 	Temperature float64 `json:"temperature" yaml:"temperature"`
 	Maxtokens   int     `json:"max_tokens" yaml:"max_tokens"`
 	Echo        bool    `json:"echo"`
 	// Custom parameters - not present in the OpenAI API
 	Batch         int     `json:"batch" yaml:"batch"`
 	F16           bool    `json:"f16" yaml:"f16"`
 	IgnoreEOS     bool    `json:"ignore_eos" yaml:"ignore_eos"`
 	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
 	Keep          int     `json:"n_keep" yaml:"n_keep"`
 	MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
 	MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
 	Mirostat    int     `json:"mirostat" yaml:"mirostat"`
 	FrequencyPenalty float64 `json:"frequency_penalty" yaml:"frequency_penalty"`
 	TFZ              float64 `json:"tfz" yaml:"tfz"`
 	TypicalP float64 `json:"typical_p" yaml:"typical_p"`
 	Seed     int     `json:"seed" yaml:"seed"`
 }
--- a/api/config_test.go
+++ b/api/config_test.go
@ -0,0 +1,27 @@
 package api
 import (
 	"os"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 var _ = Describe("Test cases for config related functions", func() {
 	var (
 		configFile string
 	)
 	Context("Test Read configuration functions", func() {
 		configFile = os.Getenv("CONFIG_FILE")
 		It("Test ReadConfigFile", func() {
 			config, err := ReadConfigFile(configFile)
 			Expect(err).To(BeNil())
 			Expect(config).ToNot(BeNil())
 			// two configs in config.yaml
 			Expect(len(config)).To(Equal(2))
 		})
 	})
 })
--- a/api/gallery.go
+++ b/api/gallery.go
@ -0,0 +1,233 @@
 package api
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"net/http"
 	"net/url"
 	"os"
 	"strings"
 	"sync"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"gopkg.in/yaml.v3"
 )
 type galleryOp struct {
 	req ApplyGalleryModelRequest
 	id  string
 }
 type galleryOpStatus struct {
 	Error     error  `json:"error"`
 	Processed bool   `json:"processed"`
 	Message   string `json:"message"`
 }
 type galleryApplier struct {
 	modelPath string
 	sync.Mutex
 	C        chan galleryOp
 	statuses map[string]*galleryOpStatus
 }
 func newGalleryApplier(modelPath string) *galleryApplier {
 	return &galleryApplier{
 		modelPath: modelPath,
 		C:         make(chan galleryOp),
 		statuses:  make(map[string]*galleryOpStatus),
 	}
 }
 func applyGallery(modelPath string, req ApplyGalleryModelRequest, cm *ConfigMerger) error {
 	url, err := req.DecodeURL()
 	if err != nil {
 		return err
 	}
 	// Send a GET request to the URL
 	response, err := http.Get(url)
 	if err != nil {
 		return err
 	}
 	defer response.Body.Close()
 	// Read the response body
 	body, err := ioutil.ReadAll(response.Body)
 	if err != nil {
 		return err
 	}
 	// Unmarshal YAML data into a Config struct
 	var config gallery.Config
 	err = yaml.Unmarshal(body, &config)
 	if err != nil {
 		return err
 	}
 	config.Files = append(config.Files, req.AdditionalFiles...)
 	if err := gallery.Apply(modelPath, req.Name, &config, req.Overrides); err != nil {
 		return err
 	}
 	// Reload models
 	return cm.LoadConfigs(modelPath)
 }
 func (g *galleryApplier) updatestatus(s string, op *galleryOpStatus) {
 	g.Lock()
 	defer g.Unlock()
 	g.statuses[s] = op
 }
 func (g *galleryApplier) getstatus(s string) *galleryOpStatus {
 	g.Lock()
 	defer g.Unlock()
 	return g.statuses[s]
 }
 func (g *galleryApplier) start(c context.Context, cm *ConfigMerger) {
 	go func() {
 		for {
 			select {
 			case <-c.Done():
 				return
 			case op := <-g.C:
 				g.updatestatus(op.id, &galleryOpStatus{Message: "processing"})
 				updateError := func(e error) {
 					g.updatestatus(op.id, &galleryOpStatus{Error: e, Processed: true})
 				}
 				if err := applyGallery(g.modelPath, op.req, cm); err != nil {
 					updateError(err)
 					continue
 				}
 				g.updatestatus(op.id, &galleryOpStatus{Processed: true, Message: "completed"})
 			}
 		}
 	}()
 }
 func ApplyGalleryFromFile(modelPath, s string, cm *ConfigMerger) error {
 	dat, err := os.ReadFile(s)
 	if err != nil {
 		return err
 	}
 	var requests []ApplyGalleryModelRequest
 	err = json.Unmarshal(dat, &requests)
 	if err != nil {
 		return err
 	}
 	for _, r := range requests {
 		if err := applyGallery(modelPath, r, cm); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 func ApplyGalleryFromString(modelPath, s string, cm *ConfigMerger) error {
 	var requests []ApplyGalleryModelRequest
 	err := json.Unmarshal([]byte(s), &requests)
 	if err != nil {
 		return err
 	}
 	for _, r := range requests {
 		if err := applyGallery(modelPath, r, cm); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 // endpoints
 type ApplyGalleryModelRequest struct {
 	URL             string                 `json:"url"`
 	Name            string                 `json:"name"`
 	Overrides       map[string]interface{} `json:"overrides"`
 	AdditionalFiles []gallery.File         `json:"files"`
 }
 const (
 	githubURI = "github:"
 )
 func (request ApplyGalleryModelRequest) DecodeURL() (string, error) {
 	input := request.URL
 	var rawURL string
 	if strings.HasPrefix(input, githubURI) {
 		parts := strings.Split(input, ":")
 		repoParts := strings.Split(parts[1], "@")
 		branch := "main"
 		if len(repoParts) > 1 {
 			branch = repoParts[1]
 		}
 		repoPath := strings.Split(repoParts[0], "/")
 		org := repoPath[0]
 		project := repoPath[1]
 		projectPath := strings.Join(repoPath[2:], "/")
 		rawURL = fmt.Sprintf("https://raw.githubusercontent.com/%s/%s/%s/%s", org, project, branch, projectPath)
 	} else if strings.HasPrefix(input, "http://") || strings.HasPrefix(input, "https://") {
 		// Handle regular URLs
 		u, err := url.Parse(input)
 		if err != nil {
 			return "", fmt.Errorf("invalid URL: %w", err)
 		}
 		rawURL = u.String()
 	} else {
 		return "", fmt.Errorf("invalid URL format")
 	}
 	return rawURL, nil
 }
 func getOpStatus(g *galleryApplier) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		status := g.getstatus(c.Params("uuid"))
 		if status == nil {
 			return fmt.Errorf("could not find any status for ID")
 		}
 		return c.JSON(status)
 	}
 }
 func applyModelGallery(modelPath string, cm *ConfigMerger, g chan galleryOp) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(ApplyGalleryModelRequest)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
 		uuid, err := uuid.NewUUID()
 		if err != nil {
 			return err
 		}
 		g <- galleryOp{
 			req: *input,
 			id:  uuid.String(),
 		}
 		return c.JSON(struct {
 			ID        string `json:"uuid"`
 			StatusURL string `json:"status"`
 		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 	}
 }
--- a/api/gallery_test.go
+++ b/api/gallery_test.go
@ -0,0 +1,30 @@
 package api_test
 import (
 	. "github.com/go-skynet/LocalAI/api"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 )
 var _ = Describe("Gallery API tests", func() {
 	Context("requests", func() {
 		It("parses github with a branch", func() {
 			req := ApplyGalleryModelRequest{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
 			str, err := req.DecodeURL()
 			Expect(err).ToNot(HaveOccurred())
 			Expect(str).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 		})
 		It("parses github without a branch", func() {
 			req := ApplyGalleryModelRequest{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml"}
 			str, err := req.DecodeURL()
 			Expect(err).ToNot(HaveOccurred())
 			Expect(str).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 		})
 		It("parses URLS", func() {
 			req := ApplyGalleryModelRequest{URL: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"}
 			str, err := req.DecodeURL()
 			Expect(err).ToNot(HaveOccurred())
 			Expect(str).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 		})
 	})
 })
--- a/api/localai/gallery.go
+++ b/api/localai/gallery.go
@ -1,224 +0,0 @@
 package localai
 import (
 	"context"
 	"fmt"
 	"os"
 	"strings"
 	"sync"
 	json "github.com/json-iterator/go"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	"github.com/google/uuid"
 	"github.com/rs/zerolog/log"
 )
 type galleryOp struct {
 	req         gallery.GalleryModel
 	id          string
 	galleries   []gallery.Gallery
 	galleryName string
 }
 type galleryOpStatus struct {
 	Error              error   `json:"error"`
 	Processed          bool    `json:"processed"`
 	Message            string  `json:"message"`
 	Progress           float64 `json:"progress"`
 	TotalFileSize      string  `json:"file_size"`
 	DownloadedFileSize string  `json:"downloaded_size"`
 }
 type galleryApplier struct {
 	modelPath string
 	sync.Mutex
 	C        chan galleryOp
 	statuses map[string]*galleryOpStatus
 }
 func NewGalleryService(modelPath string) *galleryApplier {
 	return &galleryApplier{
 		modelPath: modelPath,
 		C:         make(chan galleryOp),
 		statuses:  make(map[string]*galleryOpStatus),
 	}
 }
 // prepareModel applies a
 func prepareModel(modelPath string, req gallery.GalleryModel, cm *config.ConfigLoader, downloadStatus func(string, string, string, float64)) error {
 	config, err := gallery.GetGalleryConfigFromURL(req.URL)
 	if err != nil {
 		return err
 	}
 	config.Files = append(config.Files, req.AdditionalFiles...)
 	return gallery.InstallModel(modelPath, req.Name, &config, req.Overrides, downloadStatus)
 }
 func (g *galleryApplier) updateStatus(s string, op *galleryOpStatus) {
 	g.Lock()
 	defer g.Unlock()
 	g.statuses[s] = op
 }
 func (g *galleryApplier) getStatus(s string) *galleryOpStatus {
 	g.Lock()
 	defer g.Unlock()
 	return g.statuses[s]
 }
 func (g *galleryApplier) Start(c context.Context, cm *config.ConfigLoader) {
 	go func() {
 		for {
 			select {
 			case <-c.Done():
 				return
 			case op := <-g.C:
 				utils.ResetDownloadTimers()
 				g.updateStatus(op.id, &galleryOpStatus{Message: "processing", Progress: 0})
 				// updates the status with an error
 				updateError := func(e error) {
 					g.updateStatus(op.id, &galleryOpStatus{Error: e, Processed: true, Message: "error: " + e.Error()})
 				}
 				// displayDownload displays the download progress
 				progressCallback := func(fileName string, current string, total string, percentage float64) {
 					g.updateStatus(op.id, &galleryOpStatus{Message: "processing", Progress: percentage, TotalFileSize: total, DownloadedFileSize: current})
 					utils.DisplayDownloadFunction(fileName, current, total, percentage)
 				}
 				var err error
 				// if the request contains a gallery name, we apply the gallery from the gallery list
 				if op.galleryName != "" {
 					if strings.Contains(op.galleryName, "@") {
 						err = gallery.InstallModelFromGallery(op.galleries, op.galleryName, g.modelPath, op.req, progressCallback)
 					} else {
 						err = gallery.InstallModelFromGalleryByName(op.galleries, op.galleryName, g.modelPath, op.req, progressCallback)
 					}
 				} else {
 					err = prepareModel(g.modelPath, op.req, cm, progressCallback)
 				}
 				if err != nil {
 					updateError(err)
 					continue
 				}
 				// Reload models
 				err = cm.LoadConfigs(g.modelPath)
 				if err != nil {
 					updateError(err)
 					continue
 				}
 				g.updateStatus(op.id, &galleryOpStatus{Processed: true, Message: "completed", Progress: 100})
 			}
 		}
 	}()
 }
 type galleryModel struct {
 	gallery.GalleryModel
 	ID string `json:"id"`
 }
 func ApplyGalleryFromFile(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery) error {
 	dat, err := os.ReadFile(s)
 	if err != nil {
 		return err
 	}
 	return ApplyGalleryFromString(modelPath, string(dat), cm, galleries)
 }
 func ApplyGalleryFromString(modelPath, s string, cm *config.ConfigLoader, galleries []gallery.Gallery) error {
 	var requests []galleryModel
 	err := json.Unmarshal([]byte(s), &requests)
 	if err != nil {
 		return err
 	}
 	for _, r := range requests {
 		utils.ResetDownloadTimers()
 		if r.ID == "" {
 			err = prepareModel(modelPath, r.GalleryModel, cm, utils.DisplayDownloadFunction)
 		} else {
 			err = gallery.InstallModelFromGallery(galleries, r.ID, modelPath, r.GalleryModel, utils.DisplayDownloadFunction)
 		}
 	}
 	return err
 }
 /// Endpoints
 func GetOpStatusEndpoint(g *galleryApplier) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		status := g.getStatus(c.Params("uuid"))
 		if status == nil {
 			return fmt.Errorf("could not find any status for ID")
 		}
 		return c.JSON(status)
 	}
 }
 type GalleryModel struct {
 	ID string `json:"id"`
 	gallery.GalleryModel
 }
 func ApplyModelGalleryEndpoint(modelPath string, cm *config.ConfigLoader, g chan galleryOp, galleries []gallery.Gallery) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(GalleryModel)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
 		uuid, err := uuid.NewUUID()
 		if err != nil {
 			return err
 		}
 		g <- galleryOp{
 			req:         input.GalleryModel,
 			id:          uuid.String(),
 			galleryName: input.ID,
 			galleries:   galleries,
 		}
 		return c.JSON(struct {
 			ID        string `json:"uuid"`
 			StatusURL string `json:"status"`
 		}{ID: uuid.String(), StatusURL: c.BaseURL() + "/models/jobs/" + uuid.String()})
 	}
 }
 func ListModelFromGalleryEndpoint(galleries []gallery.Gallery, basePath string) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		log.Debug().Msgf("Listing models from galleries: %+v", galleries)
 		models, err := gallery.AvailableGalleryModels(galleries, basePath)
 		if err != nil {
 			return err
 		}
 		log.Debug().Msgf("Models found from galleries: %+v", models)
 		for _, m := range models {
 			log.Debug().Msgf("Model found from galleries: %+v", m)
 		}
 		dat, err := json.Marshal(models)
 		if err != nil {
 			return err
 		}
 		return c.Send(dat)
 	}
 }
--- a/api/localai/localai.go
+++ b/api/localai/localai.go
@ -1,31 +0,0 @@
 package localai
 import (
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/gofiber/fiber/v2"
 )
 type TTSRequest struct {
 	Model string `json:"model" yaml:"model"`
 	Input string `json:"input" yaml:"input"`
 }
 func TTSEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		input := new(TTSRequest)
 		// Get input data from the request body
 		if err := c.BodyParser(input); err != nil {
 			return err
 		}
 		filePath, _, err := backend.ModelTTS(input.Input, input.Model, o.Loader, o)
 		if err != nil {
 			return err
 		}
 		return c.Download(filePath)
 	}
 }
--- a/api/openai.go
+++ b/api/openai.go
@ -0,0 +1,678 @@
 package api
 import (
 	"bufio"
 	"bytes"
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"net/http"
 	"os"
 	"path"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	whisperutil "github.com/go-skynet/LocalAI/pkg/whisper"
 	llama "github.com/go-skynet/go-llama.cpp"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
 // APIError provides error information returned by the OpenAI API.
 type APIError struct {
 	Code    any     `json:"code,omitempty"`
 	Message string  `json:"message"`
 	Param   *string `json:"param,omitempty"`
 	Type    string  `json:"type"`
 }
 type ErrorResponse struct {
 	Error *APIError `json:"error,omitempty"`
 }
 type OpenAIUsage struct {
 	PromptTokens     int `json:"prompt_tokens"`
 	CompletionTokens int `json:"completion_tokens"`
 	TotalTokens      int `json:"total_tokens"`
 }
 type Item struct {
 	Embedding []float32 `json:"embedding"`
 	Index     int       `json:"index"`
 	Object    string    `json:"object,omitempty"`
 	// Images
 	URL     string `json:"url,omitempty"`
 	B64JSON string `json:"b64_json,omitempty"`
 }
 type OpenAIResponse struct {
 	Created int      `json:"created,omitempty"`
 	Object  string   `json:"object,omitempty"`
 	ID      string   `json:"id,omitempty"`
 	Model   string   `json:"model,omitempty"`
 	Choices []Choice `json:"choices,omitempty"`
 	Data    []Item   `json:"data,omitempty"`
 	Usage OpenAIUsage `json:"usage"`
 }
 type Choice struct {
 	Index        int      `json:"index,omitempty"`
 	FinishReason string   `json:"finish_reason,omitempty"`
 	Message      *Message `json:"message,omitempty"`
 	Delta        *Message `json:"delta,omitempty"`
 	Text         string   `json:"text,omitempty"`
 }
 type Message struct {
 	Role    string `json:"role,omitempty" yaml:"role"`
 	Content string `json:"content,omitempty" yaml:"content"`
 }
 type OpenAIModel struct {
 	ID     string `json:"id"`
 	Object string `json:"object"`
 }
 type OpenAIRequest struct {
 	Model string `json:"model" yaml:"model"`
 	// whisper
 	File     string `json:"file" validate:"required"`
 	Language string `json:"language"`
 	//whisper/image
 	ResponseFormat string `json:"response_format"`
 	// image
 	Size string `json:"size"`
 	// Prompt is read only by completion/image API calls
 	Prompt interface{} `json:"prompt" yaml:"prompt"`
 	// Edit endpoint
 	Instruction string      `json:"instruction" yaml:"instruction"`
 	Input       interface{} `json:"input" yaml:"input"`
 	Stop interface{} `json:"stop" yaml:"stop"`
 	// Messages is read only by chat/completion API calls
 	Messages []Message `json:"messages" yaml:"messages"`
 	Stream bool `json:"stream"`
 	Echo   bool `json:"echo"`
 	// Common options between all the API calls
 	TopP        float64 `json:"top_p" yaml:"top_p"`
 	TopK        int     `json:"top_k" yaml:"top_k"`
 	Temperature float64 `json:"temperature" yaml:"temperature"`
 	Maxtokens   int     `json:"max_tokens" yaml:"max_tokens"`
 	N int `json:"n"`
 	// Custom parameters - not present in the OpenAI API
 	Batch         int     `json:"batch" yaml:"batch"`
 	F16           bool    `json:"f16" yaml:"f16"`
 	IgnoreEOS     bool    `json:"ignore_eos" yaml:"ignore_eos"`
 	RepeatPenalty float64 `json:"repeat_penalty" yaml:"repeat_penalty"`
 	Keep          int     `json:"n_keep" yaml:"n_keep"`
 	MirostatETA float64 `json:"mirostat_eta" yaml:"mirostat_eta"`
 	MirostatTAU float64 `json:"mirostat_tau" yaml:"mirostat_tau"`
 	Mirostat    int     `json:"mirostat" yaml:"mirostat"`
 	Seed int `json:"seed" yaml:"seed"`
 	// Image (not supported by OpenAI)
 	Mode int `json:"mode"`
 	Step int `json:"step"`
 }
 func defaultRequest(modelFile string) OpenAIRequest {
 	return OpenAIRequest{
 		TopP:        0.7,
 		TopK:        80,
 		Maxtokens:   512,
 		Temperature: 0.9,
 		Model:       modelFile,
 	}
 }
 // https://platform.openai.com/docs/api-reference/completions
 func completionEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		model, input, err := readInput(c, o.loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		templateFile := config.Model
 		if config.TemplateConfig.Completion != "" {
 			templateFile = config.TemplateConfig.Completion
 		}
 		var result []Choice
 		for _, i := range config.PromptStrings {
 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 			templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
 				Input string
 			}{Input: i})
 			if err == nil {
 				i = templatedInput
 				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}
 			r, err := ComputeChoices(i, input, config, o.loader, func(s string, c *[]Choice) {
 				*c = append(*c, Choice{Text: s})
 			}, nil)
 			if err != nil {
 				return err
 			}
 			result = append(result, r...)
 		}
 		resp := &OpenAIResponse{
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "text_completion",
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
 // https://platform.openai.com/docs/api-reference/embeddings
 func embeddingsEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		model, input, err := readInput(c, o.loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		items := []Item{}
 		for i, s := range config.InputToken {
 			// get the model function to call for the result
 			embedFn, err := ModelEmbedding("", s, o.loader, *config)
 			if err != nil {
 				return err
 			}
 			embeddings, err := embedFn()
 			if err != nil {
 				return err
 			}
 			items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
 		}
 		for i, s := range config.InputStrings {
 			// get the model function to call for the result
 			embedFn, err := ModelEmbedding(s, []int{}, o.loader, *config)
 			if err != nil {
 				return err
 			}
 			embeddings, err := embedFn()
 			if err != nil {
 				return err
 			}
 			items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
 		}
 		resp := &OpenAIResponse{
 			Model:  input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Data:   items,
 			Object: "list",
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
 func chatEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	process := func(s string, req *OpenAIRequest, config *Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
 		initialMessage := OpenAIResponse{
 			Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: []Choice{{Delta: &Message{Role: "assistant"}}},
 			Object:  "chat.completion.chunk",
 		}
 		responses <- initialMessage
 		ComputeChoices(s, req, config, loader, func(s string, c *[]Choice) {}, func(s string) bool {
 			resp := OpenAIResponse{
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: []Choice{{Delta: &Message{Content: s}}},
 				Object:  "chat.completion.chunk",
 			}
 			log.Debug().Msgf("Sending goroutine: %s", s)
 			responses <- resp
 			return true
 		})
 		close(responses)
 	}
 	return func(c *fiber.Ctx) error {
 		model, input, err := readInput(c, o.loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		var predInput string
 		mess := []string{}
 		for _, i := range input.Messages {
 			var content string
 			r := config.Roles[i.Role]
 			if r != "" {
 				content = fmt.Sprint(r, " ", i.Content)
 			} else {
 				content = i.Content
 			}
 			mess = append(mess, content)
 		}
 		predInput = strings.Join(mess, "\n")
 		if input.Stream {
 			log.Debug().Msgf("Stream request received")
 			c.Context().SetContentType("text/event-stream")
 			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
 			//	c.Set("Content-Type", "text/event-stream")
 			c.Set("Cache-Control", "no-cache")
 			c.Set("Connection", "keep-alive")
 			c.Set("Transfer-Encoding", "chunked")
 		}
 		templateFile := config.Model
 		if config.TemplateConfig.Chat != "" {
 			templateFile = config.TemplateConfig.Chat
 		}
 		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 		templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
 			Input string
 		}{Input: predInput})
 		if err == nil {
 			predInput = templatedInput
 			log.Debug().Msgf("Template found, input modified to: %s", predInput)
 		}
 		if input.Stream {
 			responses := make(chan OpenAIResponse)
 			go process(predInput, input, config, o.loader, responses)
 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
 				for ev := range responses {
 					var buf bytes.Buffer
 					enc := json.NewEncoder(&buf)
 					enc.Encode(ev)
 					log.Debug().Msgf("Sending chunk: %s", buf.String())
 					fmt.Fprintf(w, "data: %v\n", buf.String())
 					w.Flush()
 				}
 				resp := &OpenAIResponse{
 					Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []Choice{{FinishReason: "stop"}},
 				}
 				respData, _ := json.Marshal(resp)
 				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
 				w.WriteString("data: [DONE]\n\n")
 				w.Flush()
 			}))
 			return nil
 		}
 		result, err := ComputeChoices(predInput, input, config, o.loader, func(s string, c *[]Choice) {
 			*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: s}})
 		}, nil)
 		if err != nil {
 			return err
 		}
 		resp := &OpenAIResponse{
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "chat.completion",
 		}
 		respData, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", respData)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
 func editEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		model, input, err := readInput(c, o.loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(model, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		templateFile := config.Model
 		if config.TemplateConfig.Edit != "" {
 			templateFile = config.TemplateConfig.Edit
 		}
 		var result []Choice
 		for _, i := range config.InputStrings {
 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 			templatedInput, err := o.loader.TemplatePrefix(templateFile, struct {
 				Input       string
 				Instruction string
 			}{Input: i})
 			if err == nil {
 				i = templatedInput
 				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}
 			r, err := ComputeChoices(i, input, config, o.loader, func(s string, c *[]Choice) {
 				*c = append(*c, Choice{Text: s})
 			}, nil)
 			if err != nil {
 				return err
 			}
 			result = append(result, r...)
 		}
 		resp := &OpenAIResponse{
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "edit",
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
 // https://platform.openai.com/docs/api-reference/images/create
 /*
 *
 	curl http://localhost:8080/v1/images/generations \
 	  -H "Content-Type: application/json" \
 	  -d '{
 	    "prompt": "A cute baby sea otter",
 	    "n": 1,
 	    "size": "512x512"
 	  }'
 *
 */
 func imageEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		m, input, err := readInput(c, o.loader, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		if m == "" {
 			m = model.StableDiffusionBackend
 		}
 		log.Debug().Msgf("Loading model: %+v", m)
 		config, input, err := readConfig(m, input, cm, o.loader, o.debug, 0, 0, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		// XXX: Only stablediffusion is supported for now
 		if config.Backend == "" {
 			config.Backend = model.StableDiffusionBackend
 		}
 		sizeParts := strings.Split(input.Size, "x")
 		if len(sizeParts) != 2 {
 			return fmt.Errorf("Invalid value for 'size'")
 		}
 		width, err := strconv.Atoi(sizeParts[0])
 		if err != nil {
 			return fmt.Errorf("Invalid value for 'size'")
 		}
 		height, err := strconv.Atoi(sizeParts[1])
 		if err != nil {
 			return fmt.Errorf("Invalid value for 'size'")
 		}
 		b64JSON := false
 		if input.ResponseFormat == "b64_json" {
 			b64JSON = true
 		}
 		var result []Item
 		for _, i := range config.PromptStrings {
 			n := input.N
 			if input.N == 0 {
 				n = 1
 			}
 			for j := 0; j < n; j++ {
 				prompts := strings.Split(i, "|")
 				positive_prompt := prompts[0]
 				negative_prompt := ""
 				if len(prompts) > 1 {
 					negative_prompt = prompts[1]
 				}
 				mode := 0
 				step := 15
 				if input.Mode != 0 {
 					mode = input.Mode
 				}
 				if input.Step != 0 {
 					step = input.Step
 				}
 				tempDir := ""
 				if !b64JSON {
 					tempDir = o.imageDir
 				}
 				// Create a temporary file
 				outputFile, err := ioutil.TempFile(tempDir, "b64")
 				if err != nil {
 					return err
 				}
 				outputFile.Close()
 				output := outputFile.Name() + ".png"
 				// Rename the temporary file
 				err = os.Rename(outputFile.Name(), output)
 				if err != nil {
 					return err
 				}
 				baseURL := c.BaseURL()
 				fn, err := ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, o.loader, *config)
 				if err != nil {
 					return err
 				}
 				if err := fn(); err != nil {
 					return err
 				}
 				item := &Item{}
 				if b64JSON {
 					defer os.RemoveAll(output)
 					data, err := os.ReadFile(output)
 					if err != nil {
 						return err
 					}
 					item.B64JSON = base64.StdEncoding.EncodeToString(data)
 				} else {
 					base := filepath.Base(output)
 					item.URL = baseURL + "/generated-images/" + base
 				}
 				result = append(result, *item)
 			}
 		}
 		resp := &OpenAIResponse{
 			Data: result,
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
 // https://platform.openai.com/docs/api-reference/audio/create
 func transcriptEndpoint(cm *ConfigMerger, o *Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		m, input, err := readInput(c, o.loader, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(m, input, cm, o.loader, o.debug, o.threads, o.ctxSize, o.f16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		// retrieve the file data from the request
 		file, err := c.FormFile("file")
 		if err != nil {
 			return err
 		}
 		f, err := file.Open()
 		if err != nil {
 			return err
 		}
 		defer f.Close()
 		dir, err := os.MkdirTemp("", "whisper")
 		if err != nil {
 			return err
 		}
 		defer os.RemoveAll(dir)
 		dst := filepath.Join(dir, path.Base(file.Filename))
 		dstFile, err := os.Create(dst)
 		if err != nil {
 			return err
 		}
 		if _, err := io.Copy(dstFile, f); err != nil {
 			log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
 			return err
 		}
 		log.Debug().Msgf("Audio file copied to: %+v", dst)
 		whisperModel, err := o.loader.BackendLoader(model.WhisperBackend, config.Model, []llama.ModelOption{}, uint32(config.Threads))
 		if err != nil {
 			return err
 		}
 		if whisperModel == nil {
 			return fmt.Errorf("could not load whisper model")
 		}
 		w, ok := whisperModel.(whisper.Model)
 		if !ok {
 			return fmt.Errorf("loader returned non-whisper object")
 		}
 		tr, err := whisperutil.Transcript(w, dst, input.Language, uint(config.Threads))
 		if err != nil {
 			return err
 		}
 		log.Debug().Msgf("Trascribed: %+v", tr)
 		// TODO: handle different outputs here
 		return c.Status(http.StatusOK).JSON(fiber.Map{"text": tr})
 	}
 }
 func listModels(loader *model.ModelLoader, cm *ConfigMerger) func(ctx *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		models, err := loader.ListModels()
 		if err != nil {
 			return err
 		}
 		var mm map[string]interface{} = map[string]interface{}{}
 		dataModels := []OpenAIModel{}
 		for _, m := range models {
 			mm[m] = nil
 			dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
 		}
 		for _, k := range cm.ListConfigs() {
 			if _, exists := mm[k]; !exists {
 				dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"})
 			}
 		}
 		return c.JSON(struct {
 			Object string        `json:"object"`
 			Data   []OpenAIModel `json:"data"`
 		}{
 			Object: "list",
 			Data:   dataModels,
 		})
 	}
 }
--- a/api/openai/api.go
+++ b/api/openai/api.go
@ -1,105 +0,0 @@
 package openai
 import (
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/pkg/grammar"
 )
 // APIError provides error information returned by the OpenAI API.
 type APIError struct {
 	Code    any     `json:"code,omitempty"`
 	Message string  `json:"message"`
 	Param   *string `json:"param,omitempty"`
 	Type    string  `json:"type"`
 }
 type ErrorResponse struct {
 	Error *APIError `json:"error,omitempty"`
 }
 type OpenAIUsage struct {
 	PromptTokens     int `json:"prompt_tokens"`
 	CompletionTokens int `json:"completion_tokens"`
 	TotalTokens      int `json:"total_tokens"`
 }
 type Item struct {
 	Embedding []float32 `json:"embedding"`
 	Index     int       `json:"index"`
 	Object    string    `json:"object,omitempty"`
 	// Images
 	URL     string `json:"url,omitempty"`
 	B64JSON string `json:"b64_json,omitempty"`
 }
 type OpenAIResponse struct {
 	Created int      `json:"created,omitempty"`
 	Object  string   `json:"object,omitempty"`
 	ID      string   `json:"id,omitempty"`
 	Model   string   `json:"model,omitempty"`
 	Choices []Choice `json:"choices,omitempty"`
 	Data    []Item   `json:"data,omitempty"`
 	Usage OpenAIUsage `json:"usage"`
 }
 type Choice struct {
 	Index        int      `json:"index"`
 	FinishReason string   `json:"finish_reason,omitempty"`
 	Message      *Message `json:"message,omitempty"`
 	Delta        *Message `json:"delta,omitempty"`
 	Text         string   `json:"text,omitempty"`
 }
 type Message struct {
 	// The message role
 	Role string `json:"role,omitempty" yaml:"role"`
 	// The message content
 	Content *string `json:"content" yaml:"content"`
 	// A result of a function call
 	FunctionCall interface{} `json:"function_call,omitempty" yaml:"function_call,omitempty"`
 }
 type OpenAIModel struct {
 	ID     string `json:"id"`
 	Object string `json:"object"`
 }
 type OpenAIRequest struct {
 	config.PredictionOptions
 	// whisper
 	File string `json:"file" validate:"required"`
 	//whisper/image
 	ResponseFormat string `json:"response_format"`
 	// image
 	Size string `json:"size"`
 	// Prompt is read only by completion/image API calls
 	Prompt interface{} `json:"prompt" yaml:"prompt"`
 	// Edit endpoint
 	Instruction string      `json:"instruction" yaml:"instruction"`
 	Input       interface{} `json:"input" yaml:"input"`
 	Stop interface{} `json:"stop" yaml:"stop"`
 	// Messages is read only by chat/completion API calls
 	Messages []Message `json:"messages" yaml:"messages"`
 	// A list of available functions to call
 	Functions    []grammar.Function `json:"functions" yaml:"functions"`
 	FunctionCall interface{}        `json:"function_call" yaml:"function_call"` // might be a string or an object
 	Stream bool `json:"stream"`
 	// Image (not supported by OpenAI)
 	Mode int `json:"mode"`
 	Step int `json:"step"`
 	// A grammar to constrain the LLM output
 	Grammar string `json:"grammar" yaml:"grammar"`
 	JSONFunctionGrammarObject *grammar.JSONFunctionStructure `json:"grammar_json_functions" yaml:"grammar_json_functions"`
 }
--- a/api/openai/chat.go
+++ b/api/openai/chat.go
@ -1,322 +0,0 @@
 package openai
 import (
 	"bufio"
 	"bytes"
 	"encoding/json"
 	"fmt"
 	"strings"
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/pkg/grammar"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
 func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	emptyMessage := ""
 	process := func(s string, req *OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
 		initialMessage := OpenAIResponse{
 			Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: []Choice{{Delta: &Message{Role: "assistant", Content: &emptyMessage}}},
 			Object:  "chat.completion.chunk",
 		}
 		responses <- initialMessage
 		ComputeChoices(s, req.N, config, o, loader, func(s string, c *[]Choice) {}, func(s string) bool {
 			resp := OpenAIResponse{
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: []Choice{{Delta: &Message{Content: &s}, Index: 0}},
 				Object:  "chat.completion.chunk",
 			}
 			responses <- resp
 			return true
 		})
 		close(responses)
 	}
 	return func(c *fiber.Ctx) error {
 		processFunctions := false
 		funcs := grammar.Functions{}
 		model, input, err := readInput(c, o.Loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Configuration read: %+v", config)
 		// Allow the user to set custom actions via config file
 		// to be "embedded" in each model
 		noActionName := "answer"
 		noActionDescription := "use this action to answer without performing any action"
 		if config.FunctionsConfig.NoActionFunctionName != "" {
 			noActionName = config.FunctionsConfig.NoActionFunctionName
 		}
 		if config.FunctionsConfig.NoActionDescriptionName != "" {
 			noActionDescription = config.FunctionsConfig.NoActionDescriptionName
 		}
 		// process functions if we have any defined or if we have a function call string
 		if len(input.Functions) > 0 && config.ShouldUseFunctions() {
 			log.Debug().Msgf("Response needs to process functions")
 			processFunctions = true
 			noActionGrammar := grammar.Function{
 				Name:        noActionName,
 				Description: noActionDescription,
 				Parameters: map[string]interface{}{
 					"properties": map[string]interface{}{
 						"message": map[string]interface{}{
 							"type":        "string",
 							"description": "The message to reply the user with",
 						}},
 				},
 			}
 			// Append the no action function
 			funcs = append(funcs, input.Functions...)
 			if !config.FunctionsConfig.DisableNoAction {
 				funcs = append(funcs, noActionGrammar)
 			}
 			// Force picking one of the functions by the request
 			if config.FunctionToCall() != "" {
 				funcs = funcs.Select(config.FunctionToCall())
 			}
 			// Update input grammar
 			jsStruct := funcs.ToJSONStructure()
 			config.Grammar = jsStruct.Grammar("")
 		} else if input.JSONFunctionGrammarObject != nil {
 			config.Grammar = input.JSONFunctionGrammarObject.Grammar("")
 		}
 		// functions are not supported in stream mode (yet?)
 		toStream := input.Stream && !processFunctions
 		log.Debug().Msgf("Parameters: %+v", config)
 		var predInput string
 		mess := []string{}
 		for _, i := range input.Messages {
 			var content string
 			role := i.Role
 			// if function call, we might want to customize the role so we can display better that the "assistant called a json action"
 			// if an "assistant_function_call" role is defined, we use it, otherwise we use the role that is passed by in the request
 			if i.FunctionCall != nil && i.Role == "assistant" {
 				roleFn := "assistant_function_call"
 				r := config.Roles[roleFn]
 				if r != "" {
 					role = roleFn
 				}
 			}
 			r := config.Roles[role]
 			contentExists := i.Content != nil && *i.Content != ""
 			if r != "" {
 				if contentExists {
 					content = fmt.Sprint(r, " ", *i.Content)
 				}
 				if i.FunctionCall != nil {
 					j, err := json.Marshal(i.FunctionCall)
 					if err == nil {
 						if contentExists {
 							content += "\n" + fmt.Sprint(r, " ", string(j))
 						} else {
 							content = fmt.Sprint(r, " ", string(j))
 						}
 					}
 				}
 			} else {
 				if contentExists {
 					content = fmt.Sprint(*i.Content)
 				}
 				if i.FunctionCall != nil {
 					j, err := json.Marshal(i.FunctionCall)
 					if err == nil {
 						if contentExists {
 							content += "\n" + string(j)
 						} else {
 							content = string(j)
 						}
 					}
 				}
 			}
 			mess = append(mess, content)
 		}
 		predInput = strings.Join(mess, "\n")
 		log.Debug().Msgf("Prompt (before templating): %s", predInput)
 		if toStream {
 			log.Debug().Msgf("Stream request received")
 			c.Context().SetContentType("text/event-stream")
 			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
 			//	c.Set("Content-Type", "text/event-stream")
 			c.Set("Cache-Control", "no-cache")
 			c.Set("Connection", "keep-alive")
 			c.Set("Transfer-Encoding", "chunked")
 		}
 		templateFile := config.Model
 		if config.TemplateConfig.Chat != "" && !processFunctions {
 			templateFile = config.TemplateConfig.Chat
 		}
 		if config.TemplateConfig.Functions != "" && processFunctions {
 			templateFile = config.TemplateConfig.Functions
 		}
 		// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 		templatedInput, err := o.Loader.TemplatePrefix(templateFile, struct {
 			Input     string
 			Functions []grammar.Function
 		}{
 			Input:     predInput,
 			Functions: funcs,
 		})
 		if err == nil {
 			predInput = templatedInput
 			log.Debug().Msgf("Template found, input modified to: %s", predInput)
 		} else {
 			log.Debug().Msgf("Template failed loading: %s", err.Error())
 		}
 		log.Debug().Msgf("Prompt (after templating): %s", predInput)
 		if processFunctions {
 			log.Debug().Msgf("Grammar: %+v", config.Grammar)
 		}
 		if toStream {
 			responses := make(chan OpenAIResponse)
 			go process(predInput, input, config, o.Loader, responses)
 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
 				for ev := range responses {
 					var buf bytes.Buffer
 					enc := json.NewEncoder(&buf)
 					enc.Encode(ev)
 					log.Debug().Msgf("Sending chunk: %s", buf.String())
 					fmt.Fprintf(w, "data: %v\n", buf.String())
 					w.Flush()
 				}
 				resp := &OpenAIResponse{
 					Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []Choice{
 						{
 							FinishReason: "stop",
 							Index:        0,
 							Delta:        &Message{Content: &emptyMessage},
 						}},
 					Object: "chat.completion.chunk",
 				}
 				respData, _ := json.Marshal(resp)
 				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
 				w.WriteString("data: [DONE]\n\n")
 				w.Flush()
 			}))
 			return nil
 		}
 		result, err := ComputeChoices(predInput, input.N, config, o, o.Loader, func(s string, c *[]Choice) {
 			if processFunctions {
 				// As we have to change the result before processing, we can't stream the answer (yet?)
 				ss := map[string]interface{}{}
 				json.Unmarshal([]byte(s), &ss)
 				log.Debug().Msgf("Function return: %s %+v", s, ss)
 				// The grammar defines the function name as "function", while OpenAI returns "name"
 				func_name := ss["function"]
 				// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
 				args := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
 				d, _ := json.Marshal(args)
 				ss["arguments"] = string(d)
 				ss["name"] = func_name
 				// if do nothing, reply with a message
 				if func_name == noActionName {
 					log.Debug().Msgf("nothing to do, computing a reply")
 					// If there is a message that the LLM already sends as part of the JSON reply, use it
 					arguments := map[string]interface{}{}
 					json.Unmarshal([]byte(d), &arguments)
 					m, exists := arguments["message"]
 					if exists {
 						switch message := m.(type) {
 						case string:
 							if message != "" {
 								log.Debug().Msgf("Reply received from LLM: %s", message)
 								message = backend.Finetune(*config, predInput, message)
 								log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
 								*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: &message}})
 								return
 							}
 						}
 					}
 					log.Debug().Msgf("No action received from LLM, without a message, computing a reply")
 					// Otherwise ask the LLM to understand the JSON output and the context, and return a message
 					// Note: This costs (in term of CPU) another computation
 					config.Grammar = ""
 					predFunc, err := backend.ModelInference(predInput, o.Loader, *config, o, nil)
 					if err != nil {
 						log.Error().Msgf("inference error: %s", err.Error())
 						return
 					}
 					prediction, err := predFunc()
 					if err != nil {
 						log.Error().Msgf("inference error: %s", err.Error())
 						return
 					}
 					prediction = backend.Finetune(*config, predInput, prediction)
 					*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: &prediction}})
 				} else {
 					// otherwise reply with the function call
 					*c = append(*c, Choice{
 						FinishReason: "function_call",
 						Message:      &Message{Role: "assistant", FunctionCall: ss},
 					})
 				}
 				return
 			}
 			*c = append(*c, Choice{FinishReason: "stop", Index: 0, Message: &Message{Role: "assistant", Content: &s}})
 		}, nil)
 		if err != nil {
 			return err
 		}
 		resp := &OpenAIResponse{
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "chat.completion",
 		}
 		respData, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", respData)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
--- a/api/openai/completion.go
+++ b/api/openai/completion.go
@ -1,159 +0,0 @@
 package openai
 import (
 	"bufio"
 	"bytes"
 	"encoding/json"
 	"errors"
 	"fmt"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 	"github.com/valyala/fasthttp"
 )
 // https://platform.openai.com/docs/api-reference/completions
 func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	process := func(s string, req *OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
 		ComputeChoices(s, req.N, config, o, loader, func(s string, c *[]Choice) {}, func(s string) bool {
 			resp := OpenAIResponse{
 				Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
 				Choices: []Choice{
 					{
 						Index: 0,
 						Text:  s,
 					},
 				},
 				Object: "text_completion",
 			}
 			log.Debug().Msgf("Sending goroutine: %s", s)
 			responses <- resp
 			return true
 		})
 		close(responses)
 	}
 	return func(c *fiber.Ctx) error {
 		model, input, err := readInput(c, o.Loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("`input`: %+v", input)
 		config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		if input.Stream {
 			log.Debug().Msgf("Stream request received")
 			c.Context().SetContentType("text/event-stream")
 			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
 			//c.Set("Content-Type", "text/event-stream")
 			c.Set("Cache-Control", "no-cache")
 			c.Set("Connection", "keep-alive")
 			c.Set("Transfer-Encoding", "chunked")
 		}
 		templateFile := config.Model
 		if config.TemplateConfig.Completion != "" {
 			templateFile = config.TemplateConfig.Completion
 		}
 		if input.Stream {
 			if len(config.PromptStrings) > 1 {
 				return errors.New("cannot handle more than 1 `PromptStrings` when Streaming")
 			}
 			predInput := config.PromptStrings[0]
 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 			templatedInput, err := o.Loader.TemplatePrefix(templateFile, struct {
 				Input string
 			}{
 				Input: predInput,
 			})
 			if err == nil {
 				predInput = templatedInput
 				log.Debug().Msgf("Template found, input modified to: %s", predInput)
 			}
 			responses := make(chan OpenAIResponse)
 			go process(predInput, input, config, o.Loader, responses)
 			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
 				for ev := range responses {
 					var buf bytes.Buffer
 					enc := json.NewEncoder(&buf)
 					enc.Encode(ev)
 					log.Debug().Msgf("Sending chunk: %s", buf.String())
 					fmt.Fprintf(w, "data: %v\n", buf.String())
 					w.Flush()
 				}
 				resp := &OpenAIResponse{
 					Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []Choice{
 						{
 							Index:        0,
 							FinishReason: "stop",
 						},
 					},
 					Object: "text_completion",
 				}
 				respData, _ := json.Marshal(resp)
 				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
 				w.WriteString("data: [DONE]\n\n")
 				w.Flush()
 			}))
 			return nil
 		}
 		var result []Choice
 		for k, i := range config.PromptStrings {
 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 			templatedInput, err := o.Loader.TemplatePrefix(templateFile, struct {
 				Input string
 			}{
 				Input: i,
 			})
 			if err == nil {
 				i = templatedInput
 				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}
 			r, err := ComputeChoices(i, input.N, config, o, o.Loader, func(s string, c *[]Choice) {
 				*c = append(*c, Choice{Text: s, FinishReason: "stop", Index: k})
 			}, nil)
 			if err != nil {
 				return err
 			}
 			result = append(result, r...)
 		}
 		resp := &OpenAIResponse{
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "text_completion",
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
--- a/api/openai/edit.go
+++ b/api/openai/edit.go
@ -1,67 +0,0 @@
 package openai
 import (
 	"encoding/json"
 	"fmt"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 )
 func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		model, input, err := readInput(c, o.Loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		templateFile := config.Model
 		if config.TemplateConfig.Edit != "" {
 			templateFile = config.TemplateConfig.Edit
 		}
 		var result []Choice
 		for _, i := range config.InputStrings {
 			// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
 			templatedInput, err := o.Loader.TemplatePrefix(templateFile, struct {
 				Input       string
 				Instruction string
 			}{Input: i})
 			if err == nil {
 				i = templatedInput
 				log.Debug().Msgf("Template found, input modified to: %s", i)
 			}
 			r, err := ComputeChoices(i, input.N, config, o, o.Loader, func(s string, c *[]Choice) {
 				*c = append(*c, Choice{Text: s})
 			}, nil)
 			if err != nil {
 				return err
 			}
 			result = append(result, r...)
 		}
 		resp := &OpenAIResponse{
 			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Choices: result,
 			Object:  "edit",
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
--- a/api/openai/embeddings.go
+++ b/api/openai/embeddings.go
@ -1,70 +0,0 @@
 package openai
 import (
 	"encoding/json"
 	"fmt"
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 )
 // https://platform.openai.com/docs/api-reference/embeddings
 func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		model, input, err := readInput(c, o.Loader, true)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(model, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		items := []Item{}
 		for i, s := range config.InputToken {
 			// get the model function to call for the result
 			embedFn, err := backend.ModelEmbedding("", s, o.Loader, *config, o)
 			if err != nil {
 				return err
 			}
 			embeddings, err := embedFn()
 			if err != nil {
 				return err
 			}
 			items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
 		}
 		for i, s := range config.InputStrings {
 			// get the model function to call for the result
 			embedFn, err := backend.ModelEmbedding(s, []int{}, o.Loader, *config, o)
 			if err != nil {
 				return err
 			}
 			embeddings, err := embedFn()
 			if err != nil {
 				return err
 			}
 			items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
 		}
 		resp := &OpenAIResponse{
 			Model:  input.Model, // we have to return what the user sent here, due to OpenAI spec.
 			Data:   items,
 			Object: "list",
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
--- a/api/openai/image.go
+++ b/api/openai/image.go
@ -1,158 +0,0 @@
 package openai
 import (
 	"encoding/base64"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 )
 // https://platform.openai.com/docs/api-reference/images/create
 /*
 *
 	curl http://localhost:8080/v1/images/generations \
 	  -H "Content-Type: application/json" \
 	  -d '{
 	    "prompt": "A cute baby sea otter",
 	    "n": 1,
 	    "size": "512x512"
 	  }'
 *
 */
 func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		m, input, err := readInput(c, o.Loader, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		if m == "" {
 			m = model.StableDiffusionBackend
 		}
 		log.Debug().Msgf("Loading model: %+v", m)
 		config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, 0, 0, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		log.Debug().Msgf("Parameter Config: %+v", config)
 		// XXX: Only stablediffusion is supported for now
 		if config.Backend == "" {
 			config.Backend = model.StableDiffusionBackend
 		}
 		sizeParts := strings.Split(input.Size, "x")
 		if len(sizeParts) != 2 {
 			return fmt.Errorf("Invalid value for 'size'")
 		}
 		width, err := strconv.Atoi(sizeParts[0])
 		if err != nil {
 			return fmt.Errorf("Invalid value for 'size'")
 		}
 		height, err := strconv.Atoi(sizeParts[1])
 		if err != nil {
 			return fmt.Errorf("Invalid value for 'size'")
 		}
 		b64JSON := false
 		if input.ResponseFormat == "b64_json" {
 			b64JSON = true
 		}
 		var result []Item
 		for _, i := range config.PromptStrings {
 			n := input.N
 			if input.N == 0 {
 				n = 1
 			}
 			for j := 0; j < n; j++ {
 				prompts := strings.Split(i, "|")
 				positive_prompt := prompts[0]
 				negative_prompt := ""
 				if len(prompts) > 1 {
 					negative_prompt = prompts[1]
 				}
 				mode := 0
 				step := 15
 				if input.Mode != 0 {
 					mode = input.Mode
 				}
 				if input.Step != 0 {
 					step = input.Step
 				}
 				tempDir := ""
 				if !b64JSON {
 					tempDir = o.ImageDir
 				}
 				// Create a temporary file
 				outputFile, err := ioutil.TempFile(tempDir, "b64")
 				if err != nil {
 					return err
 				}
 				outputFile.Close()
 				output := outputFile.Name() + ".png"
 				// Rename the temporary file
 				err = os.Rename(outputFile.Name(), output)
 				if err != nil {
 					return err
 				}
 				baseURL := c.BaseURL()
 				fn, err := backend.ImageGeneration(height, width, mode, step, input.Seed, positive_prompt, negative_prompt, output, o.Loader, *config, o)
 				if err != nil {
 					return err
 				}
 				if err := fn(); err != nil {
 					return err
 				}
 				item := &Item{}
 				if b64JSON {
 					defer os.RemoveAll(output)
 					data, err := os.ReadFile(output)
 					if err != nil {
 						return err
 					}
 					item.B64JSON = base64.StdEncoding.EncodeToString(data)
 				} else {
 					base := filepath.Base(output)
 					item.URL = baseURL + "/generated-images/" + base
 				}
 				result = append(result, *item)
 			}
 		}
 		resp := &OpenAIResponse{
 			Data: result,
 		}
 		jsonResult, _ := json.Marshal(resp)
 		log.Debug().Msgf("Response: %s", jsonResult)
 		// Return the prediction in the response body
 		return c.JSON(resp)
 	}
 }
--- a/api/openai/inference.go
+++ b/api/openai/inference.go
@ -1,36 +0,0 @@
 package openai
 import (
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )
 func ComputeChoices(predInput string, n int, config *config.Config, o *options.Option, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
 	result := []Choice{}
 	if n == 0 {
 		n = 1
 	}
 	// get the model function to call for the result
 	predFunc, err := backend.ModelInference(predInput, loader, *config, o, tokenCallback)
 	if err != nil {
 		return result, err
 	}
 	for i := 0; i < n; i++ {
 		prediction, err := predFunc()
 		if err != nil {
 			return result, err
 		}
 		prediction = backend.Finetune(*config, predInput, prediction)
 		cb(prediction, &result)
 		//result = append(result, Choice{Text: prediction})
 	}
 	return result, err
 }
--- a/api/openai/list.go
+++ b/api/openai/list.go
@ -1,37 +0,0 @@
 package openai
 import (
 	config "github.com/go-skynet/LocalAI/api/config"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 )
 func ListModelsEndpoint(loader *model.ModelLoader, cm *config.ConfigLoader) func(ctx *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		models, err := loader.ListModels()
 		if err != nil {
 			return err
 		}
 		var mm map[string]interface{} = map[string]interface{}{}
 		dataModels := []OpenAIModel{}
 		for _, m := range models {
 			mm[m] = nil
 			dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
 		}
 		for _, k := range cm.ListConfigs() {
 			if _, exists := mm[k]; !exists {
 				dataModels = append(dataModels, OpenAIModel{ID: k, Object: "model"})
 			}
 		}
 		return c.JSON(struct {
 			Object string        `json:"object"`
 			Data   []OpenAIModel `json:"data"`
 		}{
 			Object: "list",
 			Data:   dataModels,
 		})
 	}
 }
--- a/api/openai/request.go
+++ b/api/openai/request.go
@ -1,234 +0,0 @@
 package openai
 import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strings"
 	config "github.com/go-skynet/LocalAI/api/config"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 )
 func readInput(c *fiber.Ctx, loader *model.ModelLoader, randomModel bool) (string, *OpenAIRequest, error) {
 	input := new(OpenAIRequest)
 	// Get input data from the request body
 	if err := c.BodyParser(input); err != nil {
 		return "", nil, err
 	}
 	modelFile := input.Model
 	if c.Params("model") != "" {
 		modelFile = c.Params("model")
 	}
 	received, _ := json.Marshal(input)
 	log.Debug().Msgf("Request received: %s", string(received))
 	// Set model from bearer token, if available
 	bearer := strings.TrimLeft(c.Get("authorization"), "Bearer ")
 	bearerExists := bearer != "" && loader.ExistsInModelPath(bearer)
 	// If no model was specified, take the first available
 	if modelFile == "" && !bearerExists && randomModel {
 		models, _ := loader.ListModels()
 		if len(models) > 0 {
 			modelFile = models[0]
 			log.Debug().Msgf("No model specified, using: %s", modelFile)
 		} else {
 			log.Debug().Msgf("No model specified, returning error")
 			return "", nil, fmt.Errorf("no model specified")
 		}
 	}
 	// If a model is found in bearer token takes precedence
 	if bearerExists {
 		log.Debug().Msgf("Using model from bearer token: %s", bearer)
 		modelFile = bearer
 	}
 	return modelFile, input, nil
 }
 func updateConfig(config *config.Config, input *OpenAIRequest) {
 	if input.Echo {
 		config.Echo = input.Echo
 	}
 	if input.TopK != 0 {
 		config.TopK = input.TopK
 	}
 	if input.TopP != 0 {
 		config.TopP = input.TopP
 	}
 	if input.Grammar != "" {
 		config.Grammar = input.Grammar
 	}
 	if input.Temperature != 0 {
 		config.Temperature = input.Temperature
 	}
 	if input.Maxtokens != 0 {
 		config.Maxtokens = input.Maxtokens
 	}
 	switch stop := input.Stop.(type) {
 	case string:
 		if stop != "" {
 			config.StopWords = append(config.StopWords, stop)
 		}
 	case []interface{}:
 		for _, pp := range stop {
 			if s, ok := pp.(string); ok {
 				config.StopWords = append(config.StopWords, s)
 			}
 		}
 	}
 	if input.RepeatPenalty != 0 {
 		config.RepeatPenalty = input.RepeatPenalty
 	}
 	if input.Keep != 0 {
 		config.Keep = input.Keep
 	}
 	if input.Batch != 0 {
 		config.Batch = input.Batch
 	}
 	if input.F16 {
 		config.F16 = input.F16
 	}
 	if input.IgnoreEOS {
 		config.IgnoreEOS = input.IgnoreEOS
 	}
 	if input.Seed != 0 {
 		config.Seed = input.Seed
 	}
 	if input.Mirostat != 0 {
 		config.Mirostat = input.Mirostat
 	}
 	if input.MirostatETA != 0 {
 		config.MirostatETA = input.MirostatETA
 	}
 	if input.MirostatTAU != 0 {
 		config.MirostatTAU = input.MirostatTAU
 	}
 	if input.TypicalP != 0 {
 		config.TypicalP = input.TypicalP
 	}
 	switch inputs := input.Input.(type) {
 	case string:
 		if inputs != "" {
 			config.InputStrings = append(config.InputStrings, inputs)
 		}
 	case []interface{}:
 		for _, pp := range inputs {
 			switch i := pp.(type) {
 			case string:
 				config.InputStrings = append(config.InputStrings, i)
 			case []interface{}:
 				tokens := []int{}
 				for _, ii := range i {
 					tokens = append(tokens, int(ii.(float64)))
 				}
 				config.InputToken = append(config.InputToken, tokens)
 			}
 		}
 	}
 	// Can be either a string or an object
 	switch fnc := input.FunctionCall.(type) {
 	case string:
 		if fnc != "" {
 			config.SetFunctionCallString(fnc)
 		}
 	case map[string]interface{}:
 		var name string
 		n, exists := fnc["name"]
 		if exists {
 			nn, e := n.(string)
 			if !e {
 				name = nn
 			}
 		}
 		config.SetFunctionCallNameString(name)
 	}
 	switch p := input.Prompt.(type) {
 	case string:
 		config.PromptStrings = append(config.PromptStrings, p)
 	case []interface{}:
 		for _, pp := range p {
 			if s, ok := pp.(string); ok {
 				config.PromptStrings = append(config.PromptStrings, s)
 			}
 		}
 	}
 }
 func readConfig(modelFile string, input *OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *OpenAIRequest, error) {
 	// Load a config file if present after the model name
 	modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
 	var cfg *config.Config
 	defaults := func() {
 		cfg = config.DefaultConfig(modelFile)
 		cfg.ContextSize = ctx
 		cfg.Threads = threads
 		cfg.F16 = f16
 		cfg.Debug = debug
 	}
 	cfgExisting, exists := cm.GetConfig(modelFile)
 	if !exists {
 		if _, err := os.Stat(modelConfig); err == nil {
 			if err := cm.LoadConfig(modelConfig); err != nil {
 				return nil, nil, fmt.Errorf("failed loading model config (%s) %s", modelConfig, err.Error())
 			}
 			cfgExisting, exists = cm.GetConfig(modelFile)
 			if exists {
 				cfg = &cfgExisting
 			} else {
 				defaults()
 			}
 		} else {
 			defaults()
 		}
 	} else {
 		cfg = &cfgExisting
 	}
 	// Set the parameters for the language model prediction
 	updateConfig(cfg, input)
 	// Don't allow 0 as setting
 	if cfg.Threads == 0 {
 		if threads != 0 {
 			cfg.Threads = threads
 		} else {
 			cfg.Threads = 4
 		}
 	}
 	// Enforce debug flag if passed from CLI
 	if debug {
 		cfg.Debug = true
 	}
 	return cfg, input, nil
 }
--- a/api/openai/transcription.go
+++ b/api/openai/transcription.go
@ -1,71 +0,0 @@
 package openai
 import (
 	"fmt"
 	"io"
 	"net/http"
 	"os"
 	"path"
 	"path/filepath"
 	"github.com/go-skynet/LocalAI/api/backend"
 	config "github.com/go-skynet/LocalAI/api/config"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/gofiber/fiber/v2"
 	"github.com/rs/zerolog/log"
 )
 // https://platform.openai.com/docs/api-reference/audio/create
 func TranscriptEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		m, input, err := readInput(c, o.Loader, false)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		config, input, err := readConfig(m, input, cm, o.Loader, o.Debug, o.Threads, o.ContextSize, o.F16)
 		if err != nil {
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 		// retrieve the file data from the request
 		file, err := c.FormFile("file")
 		if err != nil {
 			return err
 		}
 		f, err := file.Open()
 		if err != nil {
 			return err
 		}
 		defer f.Close()
 		dir, err := os.MkdirTemp("", "whisper")
 		if err != nil {
 			return err
 		}
 		defer os.RemoveAll(dir)
 		dst := filepath.Join(dir, path.Base(file.Filename))
 		dstFile, err := os.Create(dst)
 		if err != nil {
 			return err
 		}
 		if _, err := io.Copy(dstFile, f); err != nil {
 			log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
 			return err
 		}
 		log.Debug().Msgf("Audio file copied to: %+v", dst)
 		tr, err := backend.ModelTranscription(dst, input.Language, o.Loader, *config, o)
 		if err != nil {
 			return err
 		}
 		log.Debug().Msgf("Trascribed: %+v", tr)
 		// TODO: handle different outputs here
 		return c.Status(http.StatusOK).JSON(tr)
 	}
 }
--- a/api/options.go
+++ b/api/options.go
@ -0,0 +1,121 @@
 package api
 import (
 	"context"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 )
 type Option struct {
 	context                         context.Context
 	configFile                      string
 	loader                          *model.ModelLoader
 	uploadLimitMB, threads, ctxSize int
 	f16                             bool
 	debug, disableMessage           bool
 	imageDir                        string
 	cors                            bool
 	preloadJSONModels               string
 	preloadModelsFromPath           string
 	corsAllowOrigins                string
 }
 type AppOption func(*Option)
 func newOptions(o ...AppOption) *Option {
 	opt := &Option{
 		context:        context.Background(),
 		uploadLimitMB:  15,
 		threads:        1,
 		ctxSize:        512,
 		debug:          true,
 		disableMessage: true,
 	}
 	for _, oo := range o {
 		oo(opt)
 	}
 	return opt
 }
 func WithCors(b bool) AppOption {
 	return func(o *Option) {
 		o.cors = b
 	}
 }
 func WithCorsAllowOrigins(b string) AppOption {
 	return func(o *Option) {
 		o.corsAllowOrigins = b
 	}
 }
 func WithContext(ctx context.Context) AppOption {
 	return func(o *Option) {
 		o.context = ctx
 	}
 }
 func WithYAMLConfigPreload(configFile string) AppOption {
 	return func(o *Option) {
 		o.preloadModelsFromPath = configFile
 	}
 }
 func WithJSONStringPreload(configFile string) AppOption {
 	return func(o *Option) {
 		o.preloadJSONModels = configFile
 	}
 }
 func WithConfigFile(configFile string) AppOption {
 	return func(o *Option) {
 		o.configFile = configFile
 	}
 }
 func WithModelLoader(loader *model.ModelLoader) AppOption {
 	return func(o *Option) {
 		o.loader = loader
 	}
 }
 func WithUploadLimitMB(limit int) AppOption {
 	return func(o *Option) {
 		o.uploadLimitMB = limit
 	}
 }
 func WithThreads(threads int) AppOption {
 	return func(o *Option) {
 		o.threads = threads
 	}
 }
 func WithContextSize(ctxSize int) AppOption {
 	return func(o *Option) {
 		o.ctxSize = ctxSize
 	}
 }
 func WithF16(f16 bool) AppOption {
 	return func(o *Option) {
 		o.f16 = f16
 	}
 }
 func WithDebug(debug bool) AppOption {
 	return func(o *Option) {
 		o.debug = debug
 	}
 }
 func WithDisableMessage(disableMessage bool) AppOption {
 	return func(o *Option) {
 		o.disableMessage = disableMessage
 	}
 }
 func WithImageDir(imageDir string) AppOption {
 	return func(o *Option) {
 		o.imageDir = imageDir
 	}
 }
--- a/api/options/options.go
+++ b/api/options/options.go
@ -1,186 +0,0 @@
 package options
 import (
 	"context"
 	"embed"
 	"encoding/json"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/rs/zerolog/log"
 )
 type Option struct {
 	Context                             context.Context
 	ConfigFile                          string
 	Loader                              *model.ModelLoader
 	UploadLimitMB, Threads, ContextSize int
 	F16                                 bool
 	Debug, DisableMessage               bool
 	ImageDir                            string
 	AudioDir                            string
 	CORS                                bool
 	PreloadJSONModels                   string
 	PreloadModelsFromPath               string
 	CORSAllowOrigins                    string
 	Galleries []gallery.Gallery
 	BackendAssets     embed.FS
 	AssetsDestination string
 	ExternalGRPCBackends map[string]string
 	AutoloadGalleries bool
 }
 type AppOption func(*Option)
 func NewOptions(o ...AppOption) *Option {
 	opt := &Option{
 		Context:        context.Background(),
 		UploadLimitMB:  15,
 		Threads:        1,
 		ContextSize:    512,
 		Debug:          true,
 		DisableMessage: true,
 	}
 	for _, oo := range o {
 		oo(opt)
 	}
 	return opt
 }
 func WithCors(b bool) AppOption {
 	return func(o *Option) {
 		o.CORS = b
 	}
 }
 var EnableGalleriesAutoload = func(o *Option) {
 	o.AutoloadGalleries = true
 }
 func WithExternalBackend(name string, uri string) AppOption {
 	return func(o *Option) {
 		if o.ExternalGRPCBackends == nil {
 			o.ExternalGRPCBackends = make(map[string]string)
 		}
 		o.ExternalGRPCBackends[name] = uri
 	}
 }
 func WithCorsAllowOrigins(b string) AppOption {
 	return func(o *Option) {
 		o.CORSAllowOrigins = b
 	}
 }
 func WithBackendAssetsOutput(out string) AppOption {
 	return func(o *Option) {
 		o.AssetsDestination = out
 	}
 }
 func WithBackendAssets(f embed.FS) AppOption {
 	return func(o *Option) {
 		o.BackendAssets = f
 	}
 }
 func WithStringGalleries(galls string) AppOption {
 	return func(o *Option) {
 		if galls == "" {
 			log.Debug().Msgf("no galleries to load")
 			return
 		}
 		var galleries []gallery.Gallery
 		if err := json.Unmarshal([]byte(galls), &galleries); err != nil {
 			log.Error().Msgf("failed loading galleries: %s", err.Error())
 		}
 		o.Galleries = append(o.Galleries, galleries...)
 	}
 }
 func WithGalleries(galleries []gallery.Gallery) AppOption {
 	return func(o *Option) {
 		o.Galleries = append(o.Galleries, galleries...)
 	}
 }
 func WithContext(ctx context.Context) AppOption {
 	return func(o *Option) {
 		o.Context = ctx
 	}
 }
 func WithYAMLConfigPreload(configFile string) AppOption {
 	return func(o *Option) {
 		o.PreloadModelsFromPath = configFile
 	}
 }
 func WithJSONStringPreload(configFile string) AppOption {
 	return func(o *Option) {
 		o.PreloadJSONModels = configFile
 	}
 }
 func WithConfigFile(configFile string) AppOption {
 	return func(o *Option) {
 		o.ConfigFile = configFile
 	}
 }
 func WithModelLoader(loader *model.ModelLoader) AppOption {
 	return func(o *Option) {
 		o.Loader = loader
 	}
 }
 func WithUploadLimitMB(limit int) AppOption {
 	return func(o *Option) {
 		o.UploadLimitMB = limit
 	}
 }
 func WithThreads(threads int) AppOption {
 	return func(o *Option) {
 		o.Threads = threads
 	}
 }
 func WithContextSize(ctxSize int) AppOption {
 	return func(o *Option) {
 		o.ContextSize = ctxSize
 	}
 }
 func WithF16(f16 bool) AppOption {
 	return func(o *Option) {
 		o.F16 = f16
 	}
 }
 func WithDebug(debug bool) AppOption {
 	return func(o *Option) {
 		o.Debug = debug
 	}
 }
 func WithDisableMessage(disableMessage bool) AppOption {
 	return func(o *Option) {
 		o.DisableMessage = disableMessage
 	}
 }
 func WithAudioDir(audioDir string) AppOption {
 	return func(o *Option) {
 		o.AudioDir = audioDir
 	}
 }
 func WithImageDir(imageDir string) AppOption {
 	return func(o *Option) {
 		o.ImageDir = imageDir
 	}
 }
--- a/api/prediction.go
+++ b/api/prediction.go
@ -0,0 +1,574 @@
 package api
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"sync"
 	"github.com/donomii/go-rwkv.cpp"
 	model "github.com/go-skynet/LocalAI/pkg/model"
 	"github.com/go-skynet/LocalAI/pkg/stablediffusion"
 	"github.com/go-skynet/bloomz.cpp"
 	bert "github.com/go-skynet/go-bert.cpp"
 	transformers "github.com/go-skynet/go-ggml-transformers.cpp"
 	llama "github.com/go-skynet/go-llama.cpp"
 	gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
 )
 // mutex still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 var mutexMap sync.Mutex
 var mutexes map[string]*sync.Mutex = make(map[string]*sync.Mutex)
 func defaultLLamaOpts(c Config) []llama.ModelOption {
 	llamaOpts := []llama.ModelOption{}
 	if c.ContextSize != 0 {
 		llamaOpts = append(llamaOpts, llama.SetContext(c.ContextSize))
 	}
 	if c.F16 {
 		llamaOpts = append(llamaOpts, llama.EnableF16Memory)
 	}
 	if c.Embeddings {
 		llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
 	}
 	if c.NGPULayers != 0 {
 		llamaOpts = append(llamaOpts, llama.SetGPULayers(c.NGPULayers))
 	}
 	return llamaOpts
 }
 func ImageGeneration(height, width, mode, step, seed int, positive_prompt, negative_prompt, dst string, loader *model.ModelLoader, c Config) (func() error, error) {
 	if c.Backend != model.StableDiffusionBackend {
 		return nil, fmt.Errorf("endpoint only working with stablediffusion models")
 	}
 	inferenceModel, err := loader.BackendLoader(c.Backend, c.ImageGenerationAssets, []llama.ModelOption{}, uint32(c.Threads))
 	if err != nil {
 		return nil, err
 	}
 	var fn func() error
 	switch model := inferenceModel.(type) {
 	case *stablediffusion.StableDiffusion:
 		fn = func() error {
 			return model.GenerateImage(height, width, mode, step, seed, positive_prompt, negative_prompt, dst)
 		}
 	default:
 		fn = func() error {
 			return fmt.Errorf("creation of images not supported by the backend")
 		}
 	}
 	return func() error {
 		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 		mutexMap.Lock()
 		l, ok := mutexes[c.Backend]
 		if !ok {
 			m := &sync.Mutex{}
 			mutexes[c.Backend] = m
 			l = m
 		}
 		mutexMap.Unlock()
 		l.Lock()
 		defer l.Unlock()
 		return fn()
 	}, nil
 }
 func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config) (func() ([]float32, error), error) {
 	if !c.Embeddings {
 		return nil, fmt.Errorf("endpoint disabled for this model by API configuration")
 	}
 	modelFile := c.Model
 	llamaOpts := defaultLLamaOpts(c)
 	var inferenceModel interface{}
 	var err error
 	if c.Backend == "" {
 		inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
 	} else {
 		inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
 	}
 	if err != nil {
 		return nil, err
 	}
 	var fn func() ([]float32, error)
 	switch model := inferenceModel.(type) {
 	case *llama.LLama:
 		fn = func() ([]float32, error) {
 			predictOptions := buildLLamaPredictOptions(c, loader.ModelPath)
 			if len(tokens) > 0 {
 				return model.TokenEmbeddings(tokens, predictOptions...)
 			}
 			return model.Embeddings(s, predictOptions...)
 		}
 	// bert embeddings
 	case *bert.Bert:
 		fn = func() ([]float32, error) {
 			if len(tokens) > 0 {
 				return model.TokenEmbeddings(tokens, bert.SetThreads(c.Threads))
 			}
 			return model.Embeddings(s, bert.SetThreads(c.Threads))
 		}
 	default:
 		fn = func() ([]float32, error) {
 			return nil, fmt.Errorf("embeddings not supported by the backend")
 		}
 	}
 	return func() ([]float32, error) {
 		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 		mutexMap.Lock()
 		l, ok := mutexes[modelFile]
 		if !ok {
 			m := &sync.Mutex{}
 			mutexes[modelFile] = m
 			l = m
 		}
 		mutexMap.Unlock()
 		l.Lock()
 		defer l.Unlock()
 		embeds, err := fn()
 		if err != nil {
 			return embeds, err
 		}
 		// Remove trailing 0s
 		for i := len(embeds) - 1; i >= 0; i-- {
 			if embeds[i] == 0.0 {
 				embeds = embeds[:i]
 			} else {
 				break
 			}
 		}
 		return embeds, nil
 	}, nil
 }
 func buildLLamaPredictOptions(c Config, modelPath string) []llama.PredictOption {
 	// Generate the prediction using the language model
 	predictOptions := []llama.PredictOption{
 		llama.SetTemperature(c.Temperature),
 		llama.SetTopP(c.TopP),
 		llama.SetTopK(c.TopK),
 		llama.SetTokens(c.Maxtokens),
 		llama.SetThreads(c.Threads),
 	}
 	if c.PromptCacheAll {
 		predictOptions = append(predictOptions, llama.EnablePromptCacheAll)
 	}
 	if c.PromptCachePath != "" {
 		// Create parent directory
 		p := filepath.Join(modelPath, c.PromptCachePath)
 		os.MkdirAll(filepath.Dir(p), 0755)
 		predictOptions = append(predictOptions, llama.SetPathPromptCache(p))
 	}
 	if c.Mirostat != 0 {
 		predictOptions = append(predictOptions, llama.SetMirostat(c.Mirostat))
 	}
 	if c.MirostatETA != 0 {
 		predictOptions = append(predictOptions, llama.SetMirostatETA(c.MirostatETA))
 	}
 	if c.MirostatTAU != 0 {
 		predictOptions = append(predictOptions, llama.SetMirostatTAU(c.MirostatTAU))
 	}
 	if c.Debug {
 		predictOptions = append(predictOptions, llama.Debug)
 	}
 	predictOptions = append(predictOptions, llama.SetStopWords(c.StopWords...))
 	if c.RepeatPenalty != 0 {
 		predictOptions = append(predictOptions, llama.SetPenalty(c.RepeatPenalty))
 	}
 	if c.Keep != 0 {
 		predictOptions = append(predictOptions, llama.SetNKeep(c.Keep))
 	}
 	if c.Batch != 0 {
 		predictOptions = append(predictOptions, llama.SetBatch(c.Batch))
 	}
 	if c.F16 {
 		predictOptions = append(predictOptions, llama.EnableF16KV)
 	}
 	if c.IgnoreEOS {
 		predictOptions = append(predictOptions, llama.IgnoreEOS)
 	}
 	if c.Seed != 0 {
 		predictOptions = append(predictOptions, llama.SetSeed(c.Seed))
 	}
 	return predictOptions
 }
 func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback func(string) bool) (func() (string, error), error) {
 	supportStreams := false
 	modelFile := c.Model
 	llamaOpts := defaultLLamaOpts(c)
 	var inferenceModel interface{}
 	var err error
 	if c.Backend == "" {
 		inferenceModel, err = loader.GreedyLoader(modelFile, llamaOpts, uint32(c.Threads))
 	} else {
 		inferenceModel, err = loader.BackendLoader(c.Backend, modelFile, llamaOpts, uint32(c.Threads))
 	}
 	if err != nil {
 		return nil, err
 	}
 	var fn func() (string, error)
 	switch model := inferenceModel.(type) {
 	case *rwkv.RwkvState:
 		supportStreams = true
 		fn = func() (string, error) {
 			stopWord := "\n"
 			if len(c.StopWords) > 0 {
 				stopWord = c.StopWords[0]
 			}
 			if err := model.ProcessInput(s); err != nil {
 				return "", err
 			}
 			response := model.GenerateResponse(c.Maxtokens, stopWord, float32(c.Temperature), float32(c.TopP), tokenCallback)
 			return response, nil
 		}
 	case *transformers.GPTNeoX:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
 			predictOptions := []transformers.PredictOption{
 				transformers.SetTemperature(c.Temperature),
 				transformers.SetTopP(c.TopP),
 				transformers.SetTopK(c.TopK),
 				transformers.SetTokens(c.Maxtokens),
 				transformers.SetThreads(c.Threads),
 			}
 			if c.Batch != 0 {
 				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 			if c.Seed != 0 {
 				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 			return model.Predict(
 				s,
 				predictOptions...,
 			)
 		}
 	case *transformers.Replit:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
 			predictOptions := []transformers.PredictOption{
 				transformers.SetTemperature(c.Temperature),
 				transformers.SetTopP(c.TopP),
 				transformers.SetTopK(c.TopK),
 				transformers.SetTokens(c.Maxtokens),
 				transformers.SetThreads(c.Threads),
 			}
 			if c.Batch != 0 {
 				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 			if c.Seed != 0 {
 				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 			return model.Predict(
 				s,
 				predictOptions...,
 			)
 		}
 	case *transformers.Starcoder:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
 			predictOptions := []transformers.PredictOption{
 				transformers.SetTemperature(c.Temperature),
 				transformers.SetTopP(c.TopP),
 				transformers.SetTopK(c.TopK),
 				transformers.SetTokens(c.Maxtokens),
 				transformers.SetThreads(c.Threads),
 			}
 			if c.Batch != 0 {
 				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 			if c.Seed != 0 {
 				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 			return model.Predict(
 				s,
 				predictOptions...,
 			)
 		}
 	case *transformers.MPT:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
 			predictOptions := []transformers.PredictOption{
 				transformers.SetTemperature(c.Temperature),
 				transformers.SetTopP(c.TopP),
 				transformers.SetTopK(c.TopK),
 				transformers.SetTokens(c.Maxtokens),
 				transformers.SetThreads(c.Threads),
 			}
 			if c.Batch != 0 {
 				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 			if c.Seed != 0 {
 				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 			return model.Predict(
 				s,
 				predictOptions...,
 			)
 		}
 	case *bloomz.Bloomz:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
 			predictOptions := []bloomz.PredictOption{
 				bloomz.SetTemperature(c.Temperature),
 				bloomz.SetTopP(c.TopP),
 				bloomz.SetTopK(c.TopK),
 				bloomz.SetTokens(c.Maxtokens),
 				bloomz.SetThreads(c.Threads),
 			}
 			if c.Seed != 0 {
 				predictOptions = append(predictOptions, bloomz.SetSeed(c.Seed))
 			}
 			return model.Predict(
 				s,
 				predictOptions...,
 			)
 		}
 	case *transformers.GPTJ:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
 			predictOptions := []transformers.PredictOption{
 				transformers.SetTemperature(c.Temperature),
 				transformers.SetTopP(c.TopP),
 				transformers.SetTopK(c.TopK),
 				transformers.SetTokens(c.Maxtokens),
 				transformers.SetThreads(c.Threads),
 			}
 			if c.Batch != 0 {
 				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 			if c.Seed != 0 {
 				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 			return model.Predict(
 				s,
 				predictOptions...,
 			)
 		}
 	case *transformers.Dolly:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
 			predictOptions := []transformers.PredictOption{
 				transformers.SetTemperature(c.Temperature),
 				transformers.SetTopP(c.TopP),
 				transformers.SetTopK(c.TopK),
 				transformers.SetTokens(c.Maxtokens),
 				transformers.SetThreads(c.Threads),
 			}
 			if c.Batch != 0 {
 				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 			if c.Seed != 0 {
 				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 			return model.Predict(
 				s,
 				predictOptions...,
 			)
 		}
 	case *transformers.GPT2:
 		fn = func() (string, error) {
 			// Generate the prediction using the language model
 			predictOptions := []transformers.PredictOption{
 				transformers.SetTemperature(c.Temperature),
 				transformers.SetTopP(c.TopP),
 				transformers.SetTopK(c.TopK),
 				transformers.SetTokens(c.Maxtokens),
 				transformers.SetThreads(c.Threads),
 			}
 			if c.Batch != 0 {
 				predictOptions = append(predictOptions, transformers.SetBatch(c.Batch))
 			}
 			if c.Seed != 0 {
 				predictOptions = append(predictOptions, transformers.SetSeed(c.Seed))
 			}
 			return model.Predict(
 				s,
 				predictOptions...,
 			)
 		}
 	case *gpt4all.Model:
 		supportStreams = true
 		fn = func() (string, error) {
 			if tokenCallback != nil {
 				model.SetTokenCallback(tokenCallback)
 			}
 			// Generate the prediction using the language model
 			predictOptions := []gpt4all.PredictOption{
 				gpt4all.SetTemperature(c.Temperature),
 				gpt4all.SetTopP(c.TopP),
 				gpt4all.SetTopK(c.TopK),
 				gpt4all.SetTokens(c.Maxtokens),
 			}
 			if c.Batch != 0 {
 				predictOptions = append(predictOptions, gpt4all.SetBatch(c.Batch))
 			}
 			str, er := model.Predict(
 				s,
 				predictOptions...,
 			)
 			// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
 			// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
 			// after a stream event has occurred
 			model.SetTokenCallback(nil)
 			return str, er
 		}
 	case *llama.LLama:
 		supportStreams = true
 		fn = func() (string, error) {
 			if tokenCallback != nil {
 				model.SetTokenCallback(tokenCallback)
 			}
 			predictOptions := buildLLamaPredictOptions(c, loader.ModelPath)
 			str, er := model.Predict(
 				s,
 				predictOptions...,
 			)
 			// Seems that if we don't free the callback explicitly we leave functions registered (that might try to send on closed channels)
 			// For instance otherwise the API returns: {"error":{"code":500,"message":"send on closed channel","type":""}}
 			// after a stream event has occurred
 			model.SetTokenCallback(nil)
 			return str, er
 		}
 	}
 	return func() (string, error) {
 		// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
 		mutexMap.Lock()
 		l, ok := mutexes[modelFile]
 		if !ok {
 			m := &sync.Mutex{}
 			mutexes[modelFile] = m
 			l = m
 		}
 		mutexMap.Unlock()
 		l.Lock()
 		defer l.Unlock()
 		res, err := fn()
 		if tokenCallback != nil && !supportStreams {
 			tokenCallback(res)
 		}
 		return res, err
 	}, nil
 }
 func ComputeChoices(predInput string, input *OpenAIRequest, config *Config, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string) bool) ([]Choice, error) {
 	result := []Choice{}
 	n := input.N
 	if input.N == 0 {
 		n = 1
 	}
 	// get the model function to call for the result
 	predFunc, err := ModelInference(predInput, loader, *config, tokenCallback)
 	if err != nil {
 		return result, err
 	}
 	for i := 0; i < n; i++ {
 		prediction, err := predFunc()
 		if err != nil {
 			return result, err
 		}
 		prediction = Finetune(*config, predInput, prediction)
 		cb(prediction, &result)
 		//result = append(result, Choice{Text: prediction})
 	}
 	return result, err
 }
 var cutstrings map[string]*regexp.Regexp = make(map[string]*regexp.Regexp)
 var mu sync.Mutex = sync.Mutex{}
 func Finetune(config Config, input, prediction string) string {
 	if config.Echo {
 		prediction = input + prediction
 	}
 	for _, c := range config.Cutstrings {
 		mu.Lock()
 		reg, ok := cutstrings[c]
 		if !ok {
 			cutstrings[c] = regexp.MustCompile(c)
 			reg = cutstrings[c]
 		}
 		mu.Unlock()
 		prediction = reg.ReplaceAllString(prediction, "")
 	}
 	for _, c := range config.TrimSpace {
 		prediction = strings.TrimSpace(strings.TrimPrefix(prediction, c))
 	}
 	return prediction
 }
--- a/assets.go
+++ b/assets.go
@ -1,6 +0,0 @@
 package main
 import "embed"
 //go:embed backend-assets/*
 var backendAssets embed.FS
--- a/cmd/grpc/bert-embeddings/main.go
+++ b/cmd/grpc/bert-embeddings/main.go
@ -1,22 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 	bert "github.com/go-skynet/LocalAI/pkg/grpc/llm/bert"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &bert.Embeddings{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/bloomz/main.go
+++ b/cmd/grpc/bloomz/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	bloomz "github.com/go-skynet/LocalAI/pkg/grpc/llm/bloomz"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &bloomz.LLM{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/dolly/main.go
+++ b/cmd/grpc/dolly/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &transformers.Dolly{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/falcon-ggml/main.go
+++ b/cmd/grpc/falcon-ggml/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &transformers.Falcon{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/falcon/main.go
+++ b/cmd/grpc/falcon/main.go
@ -1,25 +0,0 @@
 package main
 // GRPC Falcon server
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	falcon "github.com/go-skynet/LocalAI/pkg/grpc/llm/falcon"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &falcon.LLM{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/gpt2/main.go
+++ b/cmd/grpc/gpt2/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &transformers.GPT2{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/gpt4all/main.go
+++ b/cmd/grpc/gpt4all/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	gpt4all "github.com/go-skynet/LocalAI/pkg/grpc/llm/gpt4all"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &gpt4all.LLM{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/gptj/main.go
+++ b/cmd/grpc/gptj/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &transformers.GPTJ{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/gptneox/main.go
+++ b/cmd/grpc/gptneox/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &transformers.GPTNeoX{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/langchain-huggingface/main.go
+++ b/cmd/grpc/langchain-huggingface/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	langchain "github.com/go-skynet/LocalAI/pkg/grpc/llm/langchain"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &langchain.LLM{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/llama-grammar/main.go
+++ b/cmd/grpc/llama-grammar/main.go
@ -1,25 +0,0 @@
 package main
 // GRPC Falcon server
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	llama "github.com/go-skynet/LocalAI/pkg/grpc/llm/llama-grammar"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &llama.LLM{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/llama/main.go
+++ b/cmd/grpc/llama/main.go
@ -1,25 +0,0 @@
 package main
 // GRPC Falcon server
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	llama "github.com/go-skynet/LocalAI/pkg/grpc/llm/llama"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &llama.LLM{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/mpt/main.go
+++ b/cmd/grpc/mpt/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &transformers.MPT{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/piper/main.go
+++ b/cmd/grpc/piper/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	tts "github.com/go-skynet/LocalAI/pkg/grpc/tts"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &tts.Piper{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/replit/main.go
+++ b/cmd/grpc/replit/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &transformers.Replit{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/rwkv/main.go
+++ b/cmd/grpc/rwkv/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	rwkv "github.com/go-skynet/LocalAI/pkg/grpc/llm/rwkv"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &rwkv.LLM{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/stablediffusion/main.go
+++ b/cmd/grpc/stablediffusion/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	image "github.com/go-skynet/LocalAI/pkg/grpc/image"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &image.StableDiffusion{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/starcoder/main.go
+++ b/cmd/grpc/starcoder/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &transformers.Starcoder{}); err != nil {
 		panic(err)
 	}
 }
--- a/cmd/grpc/whisper/main.go
+++ b/cmd/grpc/whisper/main.go
@ -1,23 +0,0 @@
 package main
 // Note: this is started internally by LocalAI and a server is allocated for each model
 import (
 	"flag"
 	transcribe "github.com/go-skynet/LocalAI/pkg/grpc/transcribe"
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
 var (
 	addr = flag.String("addr", "localhost:50051", "the address to connect to")
 )
 func main() {
 	flag.Parse()
 	if err := grpc.StartServer(*addr, &transcribe.Whisper{}); err != nil {
 		panic(err)
 	}
 }
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -5,17 +5,7 @@ cd /build
 if [ "$REBUILD" != "false" ]; then
 	rm -rf ./local-ai
-	ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build -j${BUILD_PARALLELISM:-1}
+	make build
 else
 	echo "@@@@@"
 	echo "Skipping rebuild"
 	echo "@@@@@"
 	echo "If you are experiencing issues with the pre-compiled builds, try setting REBUILD=true"
 	echo "If you are still experiencing issues with the build, try setting CMAKE_ARGS and disable the instructions set as needed:"
 	echo 'CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF"'
 	echo "see the documentation at: https://localai.io/basics/build/index.html"
 	echo "Note: See also https://github.com/go-skynet/LocalAI/issues/288"
 	echo "@@@@@"
 fi
 ./local-ai "$@"
--- a/examples/README.md
+++ b/examples/README.md
@ -22,16 +22,6 @@ This integration shows how to use LocalAI with [mckaywrigley/chatbot-ui](https:/
 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui/)
 There is also a separate example to show how to manually setup a model: [example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui-manual/)
 ### K8sGPT
 _by [@mudler](https://github.com/mudler)_
 This example show how to use LocalAI inside Kubernetes with [k8sgpt](https://k8sgpt.ai).
 ![Screenshot from 2023-06-19 23-58-47](https://github.com/go-skynet/go-ggml-transformers.cpp/assets/2420543/cab87409-ee68-44ae-8d53-41627fb49509)
 ### Flowise
 _by [@mudler](https://github.com/mudler)_
@ -64,14 +54,6 @@ A ready to use example to show e2e how to integrate LocalAI with langchain
 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-python/)
 ### LocalAI functions
 _by [@mudler](https://github.com/mudler)_
 A ready to use example to show how to use OpenAI functions with LocalAI
 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/functions/)
 ### LocalAI WebUI
 _by [@dhruvgera](https://github.com/dhruvgera)_
@ -106,14 +88,6 @@ Run a slack bot which lets you talk directly with a model
 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-bot/)
 ### Slack bot (Question answering)
 _by [@mudler](https://github.com/mudler)_
 Run a slack bot, ideally for teams, which lets you ask questions on a documentation website, or a github repository.
 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/slack-qa-bot/)
 ### Question answering on documents with llama-index
 _by [@mudler](https://github.com/mudler)_
@ -130,16 +104,6 @@ Shows how to integrate with `Langchain` and `Chroma` to enable question answerin
 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/langchain-chroma/)
 ### Telegram bot
 _by [@mudler](https://github.com/mudler)
 ![Screenshot from 2023-06-09 00-36-26](https://github.com/go-skynet/LocalAI/assets/2420543/e98b4305-fa2d-41cf-9d2f-1bb2d75ca902)
 Use LocalAI to power a Telegram bot assistant, with Image generation and audio support!
 [Check it out here](https://github.com/go-skynet/LocalAI/tree/master/examples/telegram-bot/)
 ### Template for Runpod.io
 _by [@fHachenberg](https://github.com/fHachenberg)_
--- a/examples/chatbot-ui-manual/README.md
+++ b/examples/chatbot-ui-manual/README.md
@ -1,48 +0,0 @@
 # chatbot-ui
 Example of integration with [mckaywrigley/chatbot-ui](https://github.com/mckaywrigley/chatbot-ui).
 ![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
 ## Setup
 ```bash
 # Clone LocalAI
 git clone https://github.com/go-skynet/LocalAI
 cd LocalAI/examples/chatbot-ui
 # (optional) Checkout a specific LocalAI tag
 # git checkout -b build <TAG>
 # Download gpt4all-j to models/
 wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
 # start with docker-compose
 docker-compose up -d --pull always
 # or you can build the images with:
 # docker-compose up -d --build
 ```
 ## Pointing chatbot-ui to a separately managed LocalAI service
 If you want to use the [chatbot-ui example](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) with an externally managed LocalAI service, you can alter the `docker-compose` file so that it looks like the below. You will notice the file is smaller, because we have removed the section that would normally start the LocalAI service. Take care to update the IP address (or FQDN) that the chatbot-ui service tries to access (marked `<<LOCALAI_IP>>` below):
 ```
 version: '3.6'
 services:
  chatgpt:
    image: ghcr.io/mckaywrigley/chatbot-ui:main
    ports:
      - 3000:3000
    environment:
      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
      - 'OPENAI_API_HOST=http://<<LOCALAI_IP>>:8080'
 ```
 Once you've edited the Dockerfile, you can start it with `docker compose up`, then browse to `http://localhost:3000`.
 ## Accessing chatbot-ui
 Open http://localhost:3000 for the Web UI.
--- a/examples/chatbot-ui-manual/docker-compose.yaml
+++ b/examples/chatbot-ui-manual/docker-compose.yaml
@ -1,24 +0,0 @@
 version: '3.6'
 services:
  api:
    image: quay.io/go-skynet/local-ai:latest
    build:
      context: ../../
      dockerfile: Dockerfile
    ports:
      - 8080:8080
    environment:
      - DEBUG=true
      - MODELS_PATH=/models
    volumes:
      - ./models:/models:cached
    command: ["/usr/bin/local-ai" ]
  chatgpt:
    image: ghcr.io/mckaywrigley/chatbot-ui:main
    ports:
      - 3000:3000
    environment:
      - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX'
      - 'OPENAI_API_HOST=http://api:8080'
--- a/examples/chatbot-ui-manual/models/completion.tmpl
+++ b/examples/chatbot-ui-manual/models/completion.tmpl
@ -1 +0,0 @@
 {{.Input}}
--- a/examples/chatbot-ui-manual/models/gpt-3.5-turbo.yaml
+++ b/examples/chatbot-ui-manual/models/gpt-3.5-turbo.yaml
@ -1,16 +0,0 @@
 name: gpt-3.5-turbo
 parameters:
  model: ggml-gpt4all-j
  top_k: 80
  temperature: 0.2
  top_p: 0.7
 context_size: 1024
 stopwords:
 - "HUMAN:"
 - "GPT:"
 roles:
  user: " "
  system: " "
 template:
  completion: completion
  chat: gpt4all
--- a/examples/chatbot-ui-manual/models/gpt4all.tmpl
+++ b/examples/chatbot-ui-manual/models/gpt4all.tmpl
@ -1,4 +0,0 @@
 The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
 ### Prompt:
 {{.Input}}
 ### Response:
--- a/examples/flowise/README.md
+++ b/examples/flowise/README.md
@ -24,7 +24,3 @@ docker-compose up --pull always
 Open http://localhost:3000.
 ## Using LocalAI
 Search for LocalAI in the integration, and use the `http://api:8080/` as URL.
--- a/examples/functions/.env
+++ b/examples/functions/.env
@ -1,9 +0,0 @@
 OPENAI_API_KEY=sk---anystringhere
 OPENAI_API_BASE=http://api:8080/v1
 # Models to preload at start
 # Here we configure gpt4all as gpt-3.5-turbo and bert as embeddings
 PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/openllama-7b-open-instruct.yaml", "name": "gpt-3.5-turbo"}]
 ## Change the default number of threads
 #THREADS=14
--- a/examples/functions/Dockerfile
+++ b/examples/functions/Dockerfile
@ -1,5 +0,0 @@
 FROM python:3.10-bullseye
 COPY . /app
 WORKDIR /app
 RUN pip install --no-cache-dir -r requirements.txt
 ENTRYPOINT [ "python", "./functions-openai.py" ];
--- a/examples/functions/README.md
+++ b/examples/functions/README.md
@ -1,18 +0,0 @@
 # LocalAI functions
 Example of using LocalAI functions, see the [OpenAI](https://openai.com/blog/function-calling-and-other-api-updates) blog post.
 ## Run
 ```bash
 # Clone LocalAI
 git clone https://github.com/go-skynet/LocalAI
 cd LocalAI/examples/functions
 docker-compose run --rm functions
 ```
 Note: The example automatically downloads the `openllama` model as it is under a permissive license.
 See the `.env` configuration file to set a different model with the [model-gallery](https://github.com/go-skynet/model-gallery) by editing `PRELOAD_MODELS`.
--- a/examples/functions/docker-compose.yaml
+++ b/examples/functions/docker-compose.yaml
@ -1,23 +0,0 @@
 version: "3.9"
 services:
  api:
    image: quay.io/go-skynet/local-ai:master
    ports:
      - 8080:8080
    env_file:
      - .env
    environment:
      - DEBUG=true
      - MODELS_PATH=/models
    volumes:
      - ./models:/models:cached
    command: ["/usr/bin/local-ai" ]
  functions:
    build:
      context: .
      dockerfile: Dockerfile
    depends_on:
      api:
        condition: service_healthy
    env_file:
      - .env
--- a/examples/functions/functions-openai.py
+++ b/examples/functions/functions-openai.py
@ -1,76 +0,0 @@
 import openai
 import json
 # Example dummy function hard coded to return the same weather
 # In production, this could be your backend API or an external API
 def get_current_weather(location, unit="fahrenheit"):
    """Get the current weather in a given location"""
    weather_info = {
        "location": location,
        "temperature": "72",
        "unit": unit,
        "forecast": ["sunny", "windy"],
    }
    return json.dumps(weather_info)
 def run_conversation():
    # Step 1: send the conversation and available functions to GPT
    messages = [{"role": "user", "content": "What's the weather like in Boston?"}]
    functions = [
        {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        }
    ]
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages,
        functions=functions,
        function_call="auto",  # auto is default, but we'll be explicit
    )
    response_message = response["choices"][0]["message"]
    # Step 2: check if GPT wanted to call a function
    if response_message.get("function_call"):
        # Step 3: call the function
        # Note: the JSON response may not always be valid; be sure to handle errors
        available_functions = {
            "get_current_weather": get_current_weather,
        }  # only one function in this example, but you can have multiple
        function_name = response_message["function_call"]["name"]
        fuction_to_call = available_functions[function_name]
        function_args = json.loads(response_message["function_call"]["arguments"])
        function_response = fuction_to_call(
            location=function_args.get("location"),
            unit=function_args.get("unit"),
        )
        # Step 4: send the info on the function call and function response to GPT
        messages.append(response_message)  # extend conversation with assistant's reply
        messages.append(
            {
                "role": "function",
                "name": function_name,
                "content": function_response,
            }
        )  # extend conversation with function response
        second_response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
        )  # get a new response from GPT where it can see the function response
        return second_response
 print(run_conversation())
--- a/examples/functions/requirements.txt
+++ b/examples/functions/requirements.txt
@ -1,2 +0,0 @@
 langchain==0.0.234
 openai==0.27.8
--- a/examples/k8sgpt/README.md
+++ b/examples/k8sgpt/README.md
@ -1,70 +0,0 @@
 # k8sgpt example
 This example show how to use LocalAI with k8sgpt
 ![Screenshot from 2023-06-19 23-58-47](https://github.com/go-skynet/go-ggml-transformers.cpp/assets/2420543/cab87409-ee68-44ae-8d53-41627fb49509)
 ## Create the cluster locally with Kind (optional)
 If you want to test this locally without a remote Kubernetes cluster, you can use kind.
 Install [kind](https://kind.sigs.k8s.io/) and create a cluster:
 ```
 kind create cluster
 ```
 ## Setup LocalAI
 We will use [helm](https://helm.sh/docs/intro/install/):
 ```
 helm repo add go-skynet https://go-skynet.github.io/helm-charts/
 helm repo update
 # Clone LocalAI
 git clone https://github.com/go-skynet/LocalAI
 cd LocalAI/examples/k8sgpt
 # modify values.yaml preload_models with the models you want to install.
 # CHANGE the URL to a model in huggingface.
 helm install local-ai go-skynet/local-ai --create-namespace --namespace local-ai --values values.yaml
 ```
 ## Setup K8sGPT
 ```
 # Install k8sgpt
 helm repo add k8sgpt https://charts.k8sgpt.ai/
 helm repo update
 helm install release k8sgpt/k8sgpt-operator -n k8sgpt-operator-system --create-namespace
 ```
 Apply the k8sgpt-operator configuration:
 ```
 kubectl apply -f - << EOF
 apiVersion: core.k8sgpt.ai/v1alpha1
 kind: K8sGPT
 metadata:
  name: k8sgpt-local-ai
  namespace: default
 spec:
  backend: localai
  baseUrl: http://local-ai.local-ai.svc.cluster.local:8080/v1
  noCache: false
  model: gpt-3.5-turbo
  noCache: false
  version: v0.3.0
  enableAI: true
 EOF
 ```
 ## Test
 Apply a broken pod:
 ```
 kubectl apply -f broken-pod.yaml
 ```
--- a/examples/k8sgpt/broken-pod.yaml
+++ b/examples/k8sgpt/broken-pod.yaml
@ -1,14 +0,0 @@
 apiVersion: v1
 kind: Pod
 metadata:
  name: broken-pod
 spec:
  containers:
    - name: broken-pod
      image: nginx:1.a.b.c
      livenessProbe:
        httpGet:
          path: /
          port: 90
        initialDelaySeconds: 3
        periodSeconds: 3
--- a/examples/k8sgpt/values.yaml
+++ b/examples/k8sgpt/values.yaml
@ -1,95 +0,0 @@
 replicaCount: 1
 deployment:
  # https://quay.io/repository/go-skynet/local-ai?tab=tags
  image: quay.io/go-skynet/local-ai:latest
  env:
    threads: 4
    debug: "true"
    context_size: 512
    preload_models: '[{ "url": "github:go-skynet/model-gallery/wizard.yaml", "name": "gpt-3.5-turbo", "overrides": { "parameters": { "model": "WizardLM-7B-uncensored.ggmlv3.q5_1" }},"files": [ { "uri": "https://huggingface.co//WizardLM-7B-uncensored-GGML/resolve/main/WizardLM-7B-uncensored.ggmlv3.q5_1.bin", "sha256": "d92a509d83a8ea5e08ba4c2dbaf08f29015932dc2accd627ce0665ac72c2bb2b", "filename": "WizardLM-7B-uncensored.ggmlv3.q5_1" }]}]'
  modelsPath: "/models"
 resources:
  {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  # requests:
  #   cpu: 100m
  #   memory: 128Mi
 # Prompt templates to include
 # Note: the keys of this map will be the names of the prompt template files
 promptTemplates:
  {}
  # ggml-gpt4all-j.tmpl: |
  #   The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
  #   ### Prompt:
  #   {{.Input}}
  #   ### Response:
 # Models to download at runtime
 models:
  # Whether to force download models even if they already exist
  forceDownload: false
  # The list of URLs to download models from
  # Note: the name of the file will be the name of the loaded model
  list:
  #- url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
      # basicAuth: base64EncodedCredentials
  # Persistent storage for models and prompt templates.
  # PVC and HostPath are mutually exclusive. If both are enabled,
  # PVC configuration takes precedence. If neither are enabled, ephemeral
  # storage is used.
  persistence:
    pvc:
      enabled: false
      size: 6Gi
      accessModes:
        - ReadWriteOnce
      annotations: {}
      # Optional
      storageClass: ~
    hostPath:
      enabled: false
      path: "/models"
 service:
  type: ClusterIP
  port: 8080
  annotations: {}
  # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
  # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
 ingress:
  enabled: false
  className: ""
  annotations:
    {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local
 nodeSelector: {}
 tolerations: []
 affinity: {}
--- a/examples/langchain-huggingface/README.md
+++ b/examples/langchain-huggingface/README.md
@ -1,68 +0,0 @@
 # Data query example
 Example of integration with HuggingFace Inference API with help of [langchaingo](https://github.com/tmc/langchaingo).
 ## Setup
 Download the LocalAI and start the API:
 ```bash
 # Clone LocalAI
 git clone https://github.com/go-skynet/LocalAI
 cd LocalAI/examples/langchain-huggingface
 docker-compose up -d
 ```
 Node: Ensure you've set `HUGGINGFACEHUB_API_TOKEN` environment variable, you can generate it
 on [Settings / Access Tokens](https://huggingface.co/settings/tokens) page of HuggingFace site.
 This is an example `.env` file for LocalAI:
 ```ini
 MODELS_PATH=/models
 CONTEXT_SIZE=512
 HUGGINGFACEHUB_API_TOKEN=hg_123456
 ```
 ## Using remote models
 Now you can use any remote models available via HuggingFace API, for example let's enable using of
 [gpt2](https://huggingface.co/gpt2) model in `gpt-3.5-turbo.yaml` config:
 ```yml
 name: gpt-3.5-turbo
 parameters:
  model: gpt2
  top_k: 80
  temperature: 0.2
  top_p: 0.7
 context_size: 1024
 backend: "langchain-huggingface"
 stopwords:
 - "HUMAN:"
 - "GPT:"
 roles:
  user: " "
  system: " "
 template:
  completion: completion
  chat: gpt4all
 ```
 Here is you can see in field `parameters.model` equal `gpt2` and `backend` equal `langchain-huggingface`.
 ## How to use
 ```shell
 # Now API is accessible at localhost:8080
 curl http://localhost:8080/v1/models
 # {"object":"list","data":[{"id":"gpt-3.5-turbo","object":"model"}]}
 curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
  "model": "gpt-3.5-turbo",
  "prompt": "A long time ago in a galaxy far, far away",
  "temperature": 0.7
 }'
 ```
--- a/examples/langchain-huggingface/docker-compose.yml
+++ b/examples/langchain-huggingface/docker-compose.yml
@ -1,15 +0,0 @@
 version: '3.6'
 services:
  api:
    image: quay.io/go-skynet/local-ai:latest
    build:
      context: ../../
      dockerfile: Dockerfile
    ports:
      - 8080:8080
    env_file:
      - ../../.env
    volumes:
      - ./models:/models:cached
    command: ["/usr/bin/local-ai"]
--- a/examples/langchain-huggingface/models/completion.tmpl
+++ b/examples/langchain-huggingface/models/completion.tmpl
@ -1 +0,0 @@
 {{.Input}}
--- a/examples/langchain-huggingface/models/gpt-3.5-turbo.yaml
+++ b/examples/langchain-huggingface/models/gpt-3.5-turbo.yaml
@ -1,17 +0,0 @@
 name: gpt-3.5-turbo
 parameters:
  model: gpt2
  top_k: 80
  temperature: 0.2
  top_p: 0.7
 context_size: 1024
 backend: "langchain-huggingface"
 stopwords:
 - "HUMAN:"
 - "GPT:"
 roles:
  user: " "
  system: " "
 template:
  completion: completion
  chat: gpt4all
--- a/examples/langchain-huggingface/models/gpt4all.tmpl
+++ b/examples/langchain-huggingface/models/gpt4all.tmpl
@ -1,4 +0,0 @@
 The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
 ### Prompt:
 {{.Input}}
 ### Response:
--- a/examples/langchain-python/README.md
+++ b/examples/langchain-python/README.md
@ -12,8 +12,15 @@ git clone https://github.com/go-skynet/LocalAI
 cd LocalAI/examples/langchain-python
 # (optional) Checkout a specific LocalAI tag
 # git checkout -b build <TAG>
 # Download gpt4all-j to models/
 wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
 # start with docker-compose
-docker-compose up --pull always
+docker-compose up -d --build
 pip install langchain
 pip install openai
--- a/examples/langchain-python/docker-compose.yaml
+++ b/examples/langchain-python/docker-compose.yaml
@ -3,14 +3,6 @@ version: '3.6'
 services:
  api:
    image: quay.io/go-skynet/local-ai:latest
    # As initially LocalAI will download the models defined in PRELOAD_MODELS
    # you might need to tweak the healthcheck values here according to your network connection.
    # Here we give a timespan of 20m to download all the required files.
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
      interval: 1m
      timeout: 20m
      retries: 20
    build:
      context: ../../
      dockerfile: Dockerfile
@ -19,9 +11,6 @@ services:
    environment:
      - DEBUG=true
      - MODELS_PATH=/models
      # You can preload different models here as well.
      # See: https://github.com/go-skynet/model-gallery
      - 'PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}]'
    volumes:
      - ./models:/models:cached
    command: ["/usr/bin/local-ai" ]
--- a/examples/langchain-python/models
+++ b/examples/langchain-python/models
@ -0,0 +1 @@
 ../chatbot-ui/models
--- a/examples/slack-qa-bot/.env.example
+++ b/examples/slack-qa-bot/.env.example
@ -1,48 +0,0 @@
 # Create an app-level token with connections:write scope
 SLACK_APP_TOKEN=xapp-1-...
 # Install the app into your workspace to grab this token
 SLACK_BOT_TOKEN=xoxb-...
 # Set this to a random string, it doesn't matter, however if present the python library complains
 OPENAI_API_KEY=sk-foo-bar-baz
 # Optional: gpt-3.5-turbo and gpt-4 are currently supported (default: gpt-3.5-turbo)
 OPENAI_MODEL=gpt-3.5-turbo
 # Optional: You can adjust the timeout seconds for OpenAI calls (default: 30)
 OPENAI_TIMEOUT_SECONDS=560
 MEMORY_DIR=/tmp/memory_dir
 OPENAI_API_BASE=http://api:8080/v1
 EMBEDDINGS_MODEL_NAME=all-MiniLM-L6-v2
 ## Repository and sitemap to index in the vector database on start
 SITEMAP="https://kairos.io/sitemap.xml"
 # Optional repository names.
 # REPOSITORIES="foo,bar"
 # # Define clone URL for "foo"
 # foo_CLONE_URL="http://github.com.."
 # bar_CLONE_URL="..."
 # # Define branch for foo
 # foo_BRANCH="master"
 # Optional token if scraping issues
 # GITHUB_PERSONAL_ACCESS_TOKEN=""
 # ISSUE_REPOSITORIES="go-skynet/LocalAI,foo/bar,..."
 # Optional: When the string is "true", this app translates ChatGPT prompts into a user's preferred language (default: true)
 USE_SLACK_LANGUAGE=true
 # Optional: Adjust the app's logging level (default: DEBUG)
 SLACK_APP_LOG_LEVEL=INFO
 # Optional: When the string is "true", translate between OpenAI markdown and Slack mrkdwn format (default: false)
 TRANSLATE_MARKDOWN=true
 ### LocalAI
 DEBUG=true
 MODELS_PATH=/models
 IMAGE_PATH=/tmp
 # See: https://github.com/go-skynet/model-gallery
 PRELOAD_MODELS=[{"url": "github:go-skynet/model-gallery/gpt4all-j.yaml", "name": "gpt-3.5-turbo"}]
--- a/examples/slack-qa-bot/README.md
+++ b/examples/slack-qa-bot/README.md
@ -1,23 +0,0 @@
 ## Slack QA Bot 
 This example uses https://github.com/spectrocloud-labs/Slack-QA-bot to deploy a slack bot that can answer to your documentation!
 - Create a new Slack app using the manifest-dev.yml file
 - Install the app into your Slack workspace
 - Retrieve your slack keys and edit `.env`
 - Start the app
 ```bash
 # Clone LocalAI
 git clone https://github.com/go-skynet/LocalAI
 cd LocalAI/examples/slack-qa-bot
 cp -rfv .env.example .env
 # Edit .env and add slackbot api keys, or repository settings to scan
 vim .env
 # run the bot
 docker-compose up
 ```
--- a/examples/slack-qa-bot/deployment.yaml
+++ b/examples/slack-qa-bot/deployment.yaml
@ -1,97 +0,0 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: slack-bot
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: knowledgebase
  namespace: slack-bot
  labels:
    app: localai-qabot
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 5Gi
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: localai-qabot
  namespace: slack-bot
  labels:
    app: localai-qabot
 spec:
  selector:
    matchLabels:
      app: localai-qabot
  replicas: 1
  template:
    metadata:
      labels:
        app: localai-qabot
      name: localai-qabot
    spec:
      containers:
        - name: localai-qabot-slack
          env:
          - name: OPENAI_API_KEY
            value: "x"
          - name: SLACK_APP_TOKEN
            value: "xapp-1-"
          - name: SLACK_BOT_TOKEN
            value: "xoxb-"
          - name: OPENAI_MODEL
            value: "gpt-3.5-turbo"
          - name: OPENAI_TIMEOUT_SECONDS
            value: "400"
          - name: OPENAI_SYSTEM_TEXT
            value: ""
          - name: MEMORY_DIR
            value: "/memory"
          - name: TRANSLATE_MARKDOWN
            value: "true"
          - name: OPENAI_API_BASE
            value: "http://local-ai.default.svc.cluster.local:8080"
          - name: REPOSITORIES
            value: "KAIROS,AGENT,SDK,OSBUILDER,PACKAGES,IMMUCORE"
          - name: KAIROS_CLONE_URL
            value: "https://github.com/kairos-io/kairos"
          - name: KAIROS_BRANCH
            value: "master"
          - name: AGENT_CLONE_URL
            value: "https://github.com/kairos-io/kairos-agent"
          - name: AGENT_BRANCH
            value: "main"
          - name: SDK_CLONE_URL
            value: "https://github.com/kairos-io/kairos-sdk"
          - name: SDK_BRANCH
            value: "main"
          - name: OSBUILDER_CLONE_URL
            value: "https://github.com/kairos-io/osbuilder"
          - name: OSBUILDER_BRANCH
            value: "master"
          - name: PACKAGES_CLONE_URL
            value: "https://github.com/kairos-io/packages"
          - name: PACKAGES_BRANCH
            value: "main"
          - name: IMMUCORE_CLONE_URL
            value: "https://github.com/kairos-io/immucore"
          - name: IMMUCORE_BRANCH
            value: "master"
          - name: GITHUB_PERSONAL_ACCESS_TOKEN
            value: ""
          - name: ISSUE_REPOSITORIES
            value: "kairos-io/kairos"
          image: quay.io/spectrocloud-labs/slack-qa-local-bot:qa
          imagePullPolicy: Always
          volumeMounts:
            - mountPath: "/memory"
              name: knowledgebase
      volumes:
        - name: knowledgebase
          persistentVolumeClaim:
            claimName: knowledgebase
--- a/examples/slack-qa-bot/docker-compose.yml
+++ b/examples/slack-qa-bot/docker-compose.yml
@ -1,30 +0,0 @@
 version: "3"
 services:
  api:
    image: quay.io/go-skynet/local-ai:latest
    # As initially LocalAI will download the models defined in PRELOAD_MODELS
    # you might need to tweak the healthcheck values here according to your network connection.
    # Here we give a timespan of 20m to download all the required files.
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/readyz"]
      interval: 1m
      timeout: 20m
      retries: 20
    ports:
      - 8080:8080
    env_file:
      - .env
    volumes:
      - ./models:/models:cached
    command: ["/usr/bin/local-ai" ]
  slackbot:
    image: quay.io/spectrocloud-labs/slack-qa-local-bot:qa
    container_name: slackbot
    restart: always
    env_file:
      - .env
    depends_on:
      api:
        condition: service_healthy
--- a/examples/telegram-bot/README.md
+++ b/examples/telegram-bot/README.md
@ -1,8 +1,6 @@
 ## Telegram bot
-![Screenshot from 2023-06-09 00-36-26](https://github.com/go-skynet/LocalAI/assets/2420543/e98b4305-fa2d-41cf-9d2f-1bb2d75ca902)
+This example uses [chatgpt-telegram-bot](https://github.com/karfly/chatgpt_telegram_bot) to deploy a telegram bot with LocalAI instead of OpenAI.
 This example uses a fork of [chatgpt-telegram-bot](https://github.com/karfly/chatgpt_telegram_bot) to deploy a telegram bot with LocalAI instead of OpenAI.
 ```bash
 # Clone LocalAI
@ -10,7 +8,7 @@ git clone https://github.com/go-skynet/LocalAI
 cd LocalAI/examples/telegram-bot
-git clone https://github.com/mudler/chatgpt_telegram_bot
+git clone https://github.com/karfly/chatgpt_telegram_bot
 cp -rf docker-compose.yml chatgpt_telegram_bot
--- a/examples/telegram-bot/docker-compose.yml
+++ b/examples/telegram-bot/docker-compose.yml
@ -23,6 +23,16 @@ services:
    volumes:
      - ./models:/models:cached
    command: ["/usr/bin/local-ai" ]
  mongo:
    container_name: mongo
    image: mongo:latest
    restart: always
    ports:
      - 127.0.0.1:${MONGODB_PORT:-27017}:${MONGODB_PORT:-27017}
    volumes:
      - ${MONGODB_PATH:-./mongodb}:/data/db
    # TODO: add auth
  chatgpt_telegram_bot:
    container_name: chatgpt_telegram_bot
    command: python3 bot/bot.py
@ -36,3 +46,21 @@ services:
    depends_on:
      api:
        condition: service_healthy
      mongo:
        condition: service_started
  mongo_express:
    container_name: mongo-express
    image: mongo-express:latest
    restart: always
    ports:
      - 127.0.0.1:${MONGO_EXPRESS_PORT:-8081}:${MONGO_EXPRESS_PORT:-8081}
    environment:
      - ME_CONFIG_MONGODB_SERVER=mongo
      - ME_CONFIG_MONGODB_PORT=${MONGODB_PORT:-27017}
      - ME_CONFIG_MONGODB_ENABLE_ADMIN=false
      - ME_CONFIG_MONGODB_AUTH_DATABASE=chatgpt_telegram_bot
      - ME_CONFIG_BASICAUTH_USERNAME=${MONGO_EXPRESS_USERNAME:-username}
      - ME_CONFIG_BASICAUTH_PASSWORD=${MONGO_EXPRESS_PASSWORD:-password}
    depends_on:
      - mongo
--- a/extra/grpc/huggingface/backend_pb2.py
+++ b/extra/grpc/huggingface/backend_pb2.py
@ -1,49 +0,0 @@
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # source: backend.proto
 """Generated protocol buffer code."""
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
 from google.protobuf import symbol_database as _symbol_database
 from google.protobuf.internal import builder as _builder
 # @@protoc_insertion_point(imports)
 _sym_db = _symbol_database.Default()
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rbackend.proto\x12\x07\x62\x61\x63kend\"\x0f\n\rHealthMessage\"\xa4\x05\n\x0ePredictOptions\x12\x0e\n\x06Prompt\x18\x01 \x01(\t\x12\x0c\n\x04Seed\x18\x02 \x01(\x05\x12\x0f\n\x07Threads\x18\x03 \x01(\x05\x12\x0e\n\x06Tokens\x18\x04 \x01(\x05\x12\x0c\n\x04TopK\x18\x05 \x01(\x05\x12\x0e\n\x06Repeat\x18\x06 \x01(\x05\x12\r\n\x05\x42\x61tch\x18\x07 \x01(\x05\x12\r\n\x05NKeep\x18\x08 \x01(\x05\x12\x13\n\x0bTemperature\x18\t \x01(\x02\x12\x0f\n\x07Penalty\x18\n \x01(\x02\x12\r\n\x05\x46\x31\x36KV\x18\x0b \x01(\x08\x12\x11\n\tDebugMode\x18\x0c \x01(\x08\x12\x13\n\x0bStopPrompts\x18\r \x03(\t\x12\x11\n\tIgnoreEOS\x18\x0e \x01(\x08\x12\x19\n\x11TailFreeSamplingZ\x18\x0f \x01(\x02\x12\x10\n\x08TypicalP\x18\x10 \x01(\x02\x12\x18\n\x10\x46requencyPenalty\x18\x11 \x01(\x02\x12\x17\n\x0fPresencePenalty\x18\x12 \x01(\x02\x12\x10\n\x08Mirostat\x18\x13 \x01(\x05\x12\x13\n\x0bMirostatETA\x18\x14 \x01(\x02\x12\x13\n\x0bMirostatTAU\x18\x15 \x01(\x02\x12\x12\n\nPenalizeNL\x18\x16 \x01(\x08\x12\x11\n\tLogitBias\x18\x17 \x01(\t\x12\r\n\x05MLock\x18\x19 \x01(\x08\x12\x0c\n\x04MMap\x18\x1a \x01(\x08\x12\x16\n\x0ePromptCacheAll\x18\x1b \x01(\x08\x12\x15\n\rPromptCacheRO\x18\x1c \x01(\x08\x12\x0f\n\x07Grammar\x18\x1d \x01(\t\x12\x0f\n\x07MainGPU\x18\x1e \x01(\t\x12\x13\n\x0bTensorSplit\x18\x1f \x01(\t\x12\x0c\n\x04TopP\x18  \x01(\x02\x12\x17\n\x0fPromptCachePath\x18! \x01(\t\x12\r\n\x05\x44\x65\x62ug\x18\" \x01(\x08\x12\x17\n\x0f\x45mbeddingTokens\x18# \x03(\x05\x12\x12\n\nEmbeddings\x18$ \x01(\t\"\x18\n\x05Reply\x12\x0f\n\x07message\x18\x01 \x01(\t\"\xac\x02\n\x0cModelOptions\x12\r\n\x05Model\x18\x01 \x01(\t\x12\x13\n\x0b\x43ontextSize\x18\x02 \x01(\x05\x12\x0c\n\x04Seed\x18\x03 \x01(\x05\x12\x0e\n\x06NBatch\x18\x04 \x01(\x05\x12\x11\n\tF16Memory\x18\x05 \x01(\x08\x12\r\n\x05MLock\x18\x06 \x01(\x08\x12\x0c\n\x04MMap\x18\x07 \x01(\x08\x12\x11\n\tVocabOnly\x18\x08 \x01(\x08\x12\x0f\n\x07LowVRAM\x18\t \x01(\x08\x12\x12\n\nEmbeddings\x18\n \x01(\x08\x12\x0c\n\x04NUMA\x18\x0b \x01(\x08\x12\x12\n\nNGPULayers\x18\x0c \x01(\x05\x12\x0f\n\x07MainGPU\x18\r \x01(\t\x12\x13\n\x0bTensorSplit\x18\x0e \x01(\t\x12\x0f\n\x07Threads\x18\x0f \x01(\x05\x12\x19\n\x11LibrarySearchPath\x18\x10 \x01(\t\"*\n\x06Result\x12\x0f\n\x07message\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\"%\n\x0f\x45mbeddingResult\x12\x12\n\nembeddings\x18\x01 \x03(\x02\"C\n\x11TranscriptRequest\x12\x0b\n\x03\x64st\x18\x02 \x01(\t\x12\x10\n\x08language\x18\x03 \x01(\t\x12\x0f\n\x07threads\x18\x04 \x01(\r\"N\n\x10TranscriptResult\x12,\n\x08segments\x18\x01 \x03(\x0b\x32\x1a.backend.TranscriptSegment\x12\x0c\n\x04text\x18\x02 \x01(\t\"Y\n\x11TranscriptSegment\x12\n\n\x02id\x18\x01 \x01(\x05\x12\r\n\x05start\x18\x02 \x01(\x03\x12\x0b\n\x03\x65nd\x18\x03 \x01(\x03\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x0e\n\x06tokens\x18\x05 \x03(\x05\"\x9e\x01\n\x14GenerateImageRequest\x12\x0e\n\x06height\x18\x01 \x01(\x05\x12\r\n\x05width\x18\x02 \x01(\x05\x12\x0c\n\x04mode\x18\x03 \x01(\x05\x12\x0c\n\x04step\x18\x04 \x01(\x05\x12\x0c\n\x04seed\x18\x05 \x01(\x05\x12\x17\n\x0fpositive_prompt\x18\x06 \x01(\t\x12\x17\n\x0fnegative_prompt\x18\x07 \x01(\t\x12\x0b\n\x03\x64st\x18\x08 \x01(\t\"6\n\nTTSRequest\x12\x0c\n\x04text\x18\x01 \x01(\t\x12\r\n\x05model\x18\x02 \x01(\t\x12\x0b\n\x03\x64st\x18\x03 \x01(\t2\xeb\x03\n\x07\x42\x61\x63kend\x12\x32\n\x06Health\x12\x16.backend.HealthMessage\x1a\x0e.backend.Reply\"\x00\x12\x34\n\x07Predict\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x12\x35\n\tLoadModel\x12\x15.backend.ModelOptions\x1a\x0f.backend.Result\"\x00\x12<\n\rPredictStream\x12\x17.backend.PredictOptions\x1a\x0e.backend.Reply\"\x00\x30\x01\x12@\n\tEmbedding\x12\x17.backend.PredictOptions\x1a\x18.backend.EmbeddingResult\"\x00\x12\x41\n\rGenerateImage\x12\x1d.backend.GenerateImageRequest\x1a\x0f.backend.Result\"\x00\x12M\n\x12\x41udioTranscription\x12\x1a.backend.TranscriptRequest\x1a\x19.backend.TranscriptResult\"\x00\x12-\n\x03TTS\x12\x13.backend.TTSRequest\x1a\x0f.backend.Result\"\x00\x42Z\n\x19io.skynet.localai.backendB\x0eLocalAIBackendP\x01Z+github.com/go-skynet/LocalAI/pkg/grpc/protob\x06proto3')
 _globals = globals()
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'backend_pb2', _globals)
 if _descriptor._USE_C_DESCRIPTORS == False:
  DESCRIPTOR._options = None
  DESCRIPTOR._serialized_options = b'\n\031io.skynet.localai.backendB\016LocalAIBackendP\001Z+github.com/go-skynet/LocalAI/pkg/grpc/proto'
  _globals['_HEALTHMESSAGE']._serialized_start=26
  _globals['_HEALTHMESSAGE']._serialized_end=41
  _globals['_PREDICTOPTIONS']._serialized_start=44
  _globals['_PREDICTOPTIONS']._serialized_end=720
  _globals['_REPLY']._serialized_start=722
  _globals['_REPLY']._serialized_end=746
  _globals['_MODELOPTIONS']._serialized_start=749
  _globals['_MODELOPTIONS']._serialized_end=1049
  _globals['_RESULT']._serialized_start=1051
  _globals['_RESULT']._serialized_end=1093
  _globals['_EMBEDDINGRESULT']._serialized_start=1095
  _globals['_EMBEDDINGRESULT']._serialized_end=1132
  _globals['_TRANSCRIPTREQUEST']._serialized_start=1134
  _globals['_TRANSCRIPTREQUEST']._serialized_end=1201
  _globals['_TRANSCRIPTRESULT']._serialized_start=1203
  _globals['_TRANSCRIPTRESULT']._serialized_end=1281
  _globals['_TRANSCRIPTSEGMENT']._serialized_start=1283
  _globals['_TRANSCRIPTSEGMENT']._serialized_end=1372
  _globals['_GENERATEIMAGEREQUEST']._serialized_start=1375
  _globals['_GENERATEIMAGEREQUEST']._serialized_end=1533
  _globals['_TTSREQUEST']._serialized_start=1535
  _globals['_TTSREQUEST']._serialized_end=1589
  _globals['_BACKEND']._serialized_start=1592
  _globals['_BACKEND']._serialized_end=2083
 # @@protoc_insertion_point(module_scope)
--- a/extra/grpc/huggingface/backend_pb2_grpc.py
+++ b/extra/grpc/huggingface/backend_pb2_grpc.py
@ -1,297 +0,0 @@
 # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
 """Client and server classes corresponding to protobuf-defined services."""
 import grpc
 import backend_pb2 as backend__pb2
 class BackendStub(object):
    """Missing associated documentation comment in .proto file."""
    def __init__(self, channel):
        """Constructor.
        Args:
            channel: A grpc.Channel.
        """
        self.Health = channel.unary_unary(
                '/backend.Backend/Health',
                request_serializer=backend__pb2.HealthMessage.SerializeToString,
                response_deserializer=backend__pb2.Reply.FromString,
                )
        self.Predict = channel.unary_unary(
                '/backend.Backend/Predict',
                request_serializer=backend__pb2.PredictOptions.SerializeToString,
                response_deserializer=backend__pb2.Reply.FromString,
                )
        self.LoadModel = channel.unary_unary(
                '/backend.Backend/LoadModel',
                request_serializer=backend__pb2.ModelOptions.SerializeToString,
                response_deserializer=backend__pb2.Result.FromString,
                )
        self.PredictStream = channel.unary_stream(
                '/backend.Backend/PredictStream',
                request_serializer=backend__pb2.PredictOptions.SerializeToString,
                response_deserializer=backend__pb2.Reply.FromString,
                )
        self.Embedding = channel.unary_unary(
                '/backend.Backend/Embedding',
                request_serializer=backend__pb2.PredictOptions.SerializeToString,
                response_deserializer=backend__pb2.EmbeddingResult.FromString,
                )
        self.GenerateImage = channel.unary_unary(
                '/backend.Backend/GenerateImage',
                request_serializer=backend__pb2.GenerateImageRequest.SerializeToString,
                response_deserializer=backend__pb2.Result.FromString,
                )
        self.AudioTranscription = channel.unary_unary(
                '/backend.Backend/AudioTranscription',
                request_serializer=backend__pb2.TranscriptRequest.SerializeToString,
                response_deserializer=backend__pb2.TranscriptResult.FromString,
                )
        self.TTS = channel.unary_unary(
                '/backend.Backend/TTS',
                request_serializer=backend__pb2.TTSRequest.SerializeToString,
                response_deserializer=backend__pb2.Result.FromString,
                )
 class BackendServicer(object):
    """Missing associated documentation comment in .proto file."""
    def Health(self, request, context):
        """Missing associated documentation comment in .proto file."""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')
    def Predict(self, request, context):
        """Missing associated documentation comment in .proto file."""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')
    def LoadModel(self, request, context):
        """Missing associated documentation comment in .proto file."""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')
    def PredictStream(self, request, context):
        """Missing associated documentation comment in .proto file."""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')
    def Embedding(self, request, context):
        """Missing associated documentation comment in .proto file."""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')
    def GenerateImage(self, request, context):
        """Missing associated documentation comment in .proto file."""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')
    def AudioTranscription(self, request, context):
        """Missing associated documentation comment in .proto file."""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')
    def TTS(self, request, context):
        """Missing associated documentation comment in .proto file."""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')
 def add_BackendServicer_to_server(servicer, server):
    rpc_method_handlers = {
            'Health': grpc.unary_unary_rpc_method_handler(
                    servicer.Health,
                    request_deserializer=backend__pb2.HealthMessage.FromString,
                    response_serializer=backend__pb2.Reply.SerializeToString,
            ),
            'Predict': grpc.unary_unary_rpc_method_handler(
                    servicer.Predict,
                    request_deserializer=backend__pb2.PredictOptions.FromString,
                    response_serializer=backend__pb2.Reply.SerializeToString,
            ),
            'LoadModel': grpc.unary_unary_rpc_method_handler(
                    servicer.LoadModel,
                    request_deserializer=backend__pb2.ModelOptions.FromString,
                    response_serializer=backend__pb2.Result.SerializeToString,
            ),
            'PredictStream': grpc.unary_stream_rpc_method_handler(
                    servicer.PredictStream,
                    request_deserializer=backend__pb2.PredictOptions.FromString,
                    response_serializer=backend__pb2.Reply.SerializeToString,
            ),
            'Embedding': grpc.unary_unary_rpc_method_handler(
                    servicer.Embedding,
                    request_deserializer=backend__pb2.PredictOptions.FromString,
                    response_serializer=backend__pb2.EmbeddingResult.SerializeToString,
            ),
            'GenerateImage': grpc.unary_unary_rpc_method_handler(
                    servicer.GenerateImage,
                    request_deserializer=backend__pb2.GenerateImageRequest.FromString,
                    response_serializer=backend__pb2.Result.SerializeToString,
            ),
            'AudioTranscription': grpc.unary_unary_rpc_method_handler(
                    servicer.AudioTranscription,
                    request_deserializer=backend__pb2.TranscriptRequest.FromString,
                    response_serializer=backend__pb2.TranscriptResult.SerializeToString,
            ),
            'TTS': grpc.unary_unary_rpc_method_handler(
                    servicer.TTS,
                    request_deserializer=backend__pb2.TTSRequest.FromString,
                    response_serializer=backend__pb2.Result.SerializeToString,
            ),
    }
    generic_handler = grpc.method_handlers_generic_handler(
            'backend.Backend', rpc_method_handlers)
    server.add_generic_rpc_handlers((generic_handler,))
 # This class is part of an EXPERIMENTAL API.
 class Backend(object):
    """Missing associated documentation comment in .proto file."""
    @staticmethod
    def Health(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            insecure=False,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/backend.Backend/Health',
            backend__pb2.HealthMessage.SerializeToString,
            backend__pb2.Reply.FromString,
            options, channel_credentials,
            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
    @staticmethod
    def Predict(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            insecure=False,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/backend.Backend/Predict',
            backend__pb2.PredictOptions.SerializeToString,
            backend__pb2.Reply.FromString,
            options, channel_credentials,
            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
    @staticmethod
    def LoadModel(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            insecure=False,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/backend.Backend/LoadModel',
            backend__pb2.ModelOptions.SerializeToString,
            backend__pb2.Result.FromString,
            options, channel_credentials,
            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
    @staticmethod
    def PredictStream(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            insecure=False,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_stream(request, target, '/backend.Backend/PredictStream',
            backend__pb2.PredictOptions.SerializeToString,
            backend__pb2.Reply.FromString,
            options, channel_credentials,
            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
    @staticmethod
    def Embedding(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            insecure=False,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/backend.Backend/Embedding',
            backend__pb2.PredictOptions.SerializeToString,
            backend__pb2.EmbeddingResult.FromString,
            options, channel_credentials,
            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
    @staticmethod
    def GenerateImage(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            insecure=False,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/backend.Backend/GenerateImage',
            backend__pb2.GenerateImageRequest.SerializeToString,
            backend__pb2.Result.FromString,
            options, channel_credentials,
            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
    @staticmethod
    def AudioTranscription(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            insecure=False,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/backend.Backend/AudioTranscription',
            backend__pb2.TranscriptRequest.SerializeToString,
            backend__pb2.TranscriptResult.FromString,
            options, channel_credentials,
            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
    @staticmethod
    def TTS(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            insecure=False,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/backend.Backend/TTS',
            backend__pb2.TTSRequest.SerializeToString,
            backend__pb2.Result.FromString,
            options, channel_credentials,
            insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
--- a/extra/grpc/huggingface/huggingface.py
+++ b/extra/grpc/huggingface/huggingface.py
@ -1,67 +0,0 @@
 #!/usr/bin/env python3
 import grpc
 from concurrent import futures
 import time
 import backend_pb2
 import backend_pb2_grpc
 import argparse
 import signal
 import sys
 import os
 from sentence_transformers import SentenceTransformer
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
 # Implement the BackendServicer class with the service methods
 class BackendServicer(backend_pb2_grpc.BackendServicer):
    def Health(self, request, context):
        return backend_pb2.Reply(message="OK")
    def LoadModel(self, request, context):
        model_name = request.Model
        model_name = os.path.basename(model_name)
        try:
            self.model = SentenceTransformer(model_name)
        except Exception as err:
            return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
        # Implement your logic here for the LoadModel service
        # Replace this with your desired response
        return backend_pb2.Result(message="Model loaded successfully", success=True)
    def Embedding(self, request, context):
        # Implement your logic here for the Embedding service
        # Replace this with your desired response
        print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
        sentence_embeddings = self.model.encode(request.Embeddings)
        return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings)
 def serve(address):
    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
    backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
    server.add_insecure_port(address)
    server.start()
    print("Server started. Listening on: " + address, file=sys.stderr)
    # Define the signal handler function
    def signal_handler(sig, frame):
        print("Received termination signal. Shutting down...")
        server.stop(0)
        sys.exit(0)
    # Set the signal handlers for SIGINT and SIGTERM
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)
    try:
        while True:
            time.sleep(_ONE_DAY_IN_SECONDS)
    except KeyboardInterrupt:
        server.stop(0)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run the gRPC server.")
    parser.add_argument(
        "--addr", default="localhost:50051", help="The address to bind the server to."
    )
    args = parser.parse_args()
    serve(args.addr)
--- a/extra/requirements.txt
+++ b/extra/requirements.txt
@ -1,4 +0,0 @@
 sentence_transformers
 grpcio
 google
 protobuf
--- a/go.mod
+++ b/go.mod
@ -1,77 +1,58 @@
 module github.com/go-skynet/LocalAI
-go 1.20
+go 1.19
 require (
-	github.com/donomii/go-rwkv.cpp v0.0.0-20230619005719-f5a8c4539674
+	github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e
-	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e
+	github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80
 	github.com/go-audio/wav v1.1.0
-	github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa
+	github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf
-	github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1
+	github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972
-	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230714203132-ffb09d7dd71e
+	github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230709163512-6c97625cca76
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f
-	github.com/gofiber/fiber/v2 v2.48.0
+	github.com/gofiber/fiber/v2 v2.46.0
 	github.com/google/uuid v1.3.0
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/hpcloud/tail v1.0.0
 	github.com/imdario/mergo v0.3.16
-	github.com/json-iterator/go v1.1.12
+	github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642
-	github.com/mholt/archiver/v3 v3.5.1
+	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922
-	github.com/mudler/go-ggllm.cpp v0.0.0-20230709223052-862477d16eef
+	github.com/onsi/ginkgo/v2 v2.9.5
-	github.com/mudler/go-processmanager v0.0.0-20220724164624-c45b5c61312d
+	github.com/onsi/gomega v1.27.7
-	github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af
+	github.com/otiai10/openaigo v1.1.0
 	github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230714185456-cfd70b69fcf5
 	github.com/onsi/ginkgo/v2 v2.11.0
 	github.com/onsi/gomega v1.27.8
 	github.com/otiai10/openaigo v1.5.2
 	github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5
 	github.com/rs/zerolog v1.29.1
-	github.com/sashabaranov/go-openai v1.14.0
+	github.com/sashabaranov/go-openai v1.9.5
-	github.com/tmc/langchaingo v0.0.0-20230713201705-dcf7ecdc8ac8
+	github.com/swaggo/swag v1.16.1
-	github.com/urfave/cli/v2 v2.25.7
+	github.com/urfave/cli/v2 v2.25.3
-	github.com/valyala/fasthttp v1.48.0
+	github.com/valyala/fasthttp v1.47.0
 	google.golang.org/grpc v1.56.2
 	google.golang.org/protobuf v1.31.0
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
 )
 require (
-	github.com/dlclark/regexp2 v1.8.1 // indirect
+	github.com/KyleBanks/depth v1.2.1 // indirect
-	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
+	github.com/PuerkitoBio/purell v1.1.1 // indirect
-	github.com/go-skynet/go-llama.cpp-grammar v0.0.0-20230703203849-ffa57fbc3a12 // indirect
+	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
 	github.com/golang/protobuf v1.5.3 // indirect
 	github.com/golang/snappy v0.0.2 // indirect
 	github.com/klauspost/pgzip v1.2.5 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
 	github.com/nwaples/rardecode v1.1.0 // indirect
 	github.com/pierrec/lz4/v4 v4.1.2 // indirect
 	github.com/pkoukk/tiktoken-go v0.1.2 // indirect
 	github.com/ulikunitz/xz v0.5.9 // indirect
 	github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
 	google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect
 	gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
 )
 require (
 	github.com/andybalholm/brotli v1.0.5 // indirect
 	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
 	github.com/go-audio/audio v1.0.0 // indirect
 	github.com/go-audio/riff v1.0.0 // indirect
 	github.com/go-logr/logr v1.2.4 // indirect
 	github.com/go-openapi/jsonpointer v0.19.5 // indirect
 	github.com/go-openapi/jsonreference v0.19.6 // indirect
 	github.com/go-openapi/spec v0.20.4 // indirect
 	github.com/go-openapi/swag v0.19.15 // indirect
 	github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 // indirect
 	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
 	github.com/google/go-cmp v0.5.9 // indirect
 	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/hashicorp/errwrap v1.0.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/klauspost/compress v1.16.3 // indirect
 	github.com/mailru/easyjson v0.7.6 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
-	github.com/mattn/go-isatty v0.0.19 // indirect
+	github.com/mattn/go-isatty v0.0.18 // indirect
 	github.com/mattn/go-runewidth v0.0.14 // indirect
-	github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760
+	github.com/otiai10/mint v1.5.1 // indirect
 	github.com/philhofer/fwd v1.1.2 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
@ -82,7 +63,7 @@ require (
 	github.com/valyala/tcplisten v1.0.0 // indirect
 	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
 	golang.org/x/net v0.10.0 // indirect
-	golang.org/x/sys v0.10.0 // indirect
+	golang.org/x/sys v0.8.0 // indirect
 	golang.org/x/text v0.9.0 // indirect
-	golang.org/x/tools v0.9.3 // indirect
+	golang.org/x/tools v0.9.1 // indirect
 )
--- a/go.sum
+++ b/go.sum
@ -1,4 +1,9 @@
-github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
+github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
 github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
 github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
 github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
 github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
 github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
 github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
 github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
@ -11,18 +16,20 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/dlclark/regexp2 v1.8.1 h1:6Lcdwya6GjPUNsBct8Lg/yRPwMhABj269AAzdGSiR+0=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56 h1:s8/MZdicstKi5fn9D9mKGIQ/q6IWCYCk/BM68i8v51w=
-github.com/dlclark/regexp2 v1.8.1/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230515123100-6fdd0c338e56/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
-github.com/donomii/go-rwkv.cpp v0.0.0-20230619005719-f5a8c4539674 h1:G70Yf/QOCEL1v24idWnGd6rJsbqiGkJAJnMaWaolzEg=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e h1:YbcLoxAwS0r7otEqU/d8bArubmfEJaG7dZPp0Aa52Io=
-github.com/donomii/go-rwkv.cpp v0.0.0-20230619005719-f5a8c4539674/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
+github.com/donomii/go-rwkv.cpp v0.0.0-20230529074347-ccb05c3e1c6e/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
-github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 h1:iFaUwBSo5Svw6L7HYpRu/0lE3e0BaElwnNO1qkNQxBY=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881 h1:dafqVivljYk51VLFnnpTXJnfWDe637EobWZ1l8PyEf8=
-github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5/go.mod h1:qssHWj60/X5sZFNxpG4HBPDHVqxNm4DfnCKgrbZOT+s=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230520182345-041be06d5881/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
-github.com/dsnet/golib v0.0.0-20171103203638-1ea166775780/go.mod h1:Lj+Z9rebOhdfkVLjJ8T6VcRQv3SXugXy999NBtR9aFY=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e h1:4PMorQuoUGAXmIzCtnNOHaasyLokXdgd8jUWwsraFTo=
-github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230523110439-77eab3fbfe5e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
-github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230524181101-5e2b3407ef46 h1:+STJWsBFikYC90LnR8I9gcBdysQn7Jv9Jb44+5WBi68=
-github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230524181101-5e2b3407ef46/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e h1:KtbU2JR3lJuXFASHG2+sVLucfMPBjWKUUKByX6C81mQ=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230527074028-9b926844e3ae h1:uzi5myq/qNX9xiKMRF/fW3HfxuEo2WcnTalwg9fe2hM=
-github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230628193450-85ed71aaec8e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
+github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230527074028-9b926844e3ae/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80 h1:IeeVcNaQHdcG+GPg+meOPFvtonvO8p/HBzTrZGjpWZk=
 github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230528233858-d7c936b44a80/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
 github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
 github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
 github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
@ -31,48 +38,52 @@ github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
 github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
 github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
 github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
-github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa h1:gxr68r/6EWroay4iI81jxqGCDbKotY4+CiwdUkBz2NQ=
+github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
-github.com/go-skynet/bloomz.cpp v0.0.0-20230529155654-1834e77b83fa/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
+github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230607105116-6069103f54b9 h1:wRGbDwNwPmSzoXVw/HLzXY4blpRvPWg7QW2OA0WKezA=
+github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230607105116-6069103f54b9/go.mod h1:pXKCpYYXujMeAvgJHU6WoMfvYbr84563+J8+Ebkyr5U=
+github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 h1:yXvc7QfGtoZ51tUW/YVjoTwAfh8HG88XU7UOrbNlz5Y=
+github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
-github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1/go.mod h1:fYjkCDRzC+oRLHSjQoajmYK6AmeJnmEanV27CClAcDc=
+github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
-github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230630204211-3fec197a1dc4 h1:LScGc8yWTS9wbS2RTOq6s+waeHElLIQDJg2SUCwrO3E=
+github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
-github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230630204211-3fec197a1dc4/go.mod h1:31j1odgFXP8hDSUVfH0zErKI5aYVP18ddYnPkwCso2A=
+github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
-github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230714203132-ffb09d7dd71e h1:4reMY29i1eOZaRaSTMPNyXI7X8RMNxCTfDDBXYzrbr0=
+github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
-github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230714203132-ffb09d7dd71e/go.mod h1:31j1odgFXP8hDSUVfH0zErKI5aYVP18ddYnPkwCso2A=
+github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230703203849-ffa57fbc3a12 h1:cfGZiZana0gPD0i8nmyOGTUQGb4N8PYqaBqhhukREPc=
+github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf h1:VJfSn8hIDE+K5+h38M3iAyFXrxpRExMKRdTk33UDxsw=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230703203849-ffa57fbc3a12/go.mod h1:tzi97YvT1bVQ+iTG39LvpDkKG1WbizgtljC+orSoM40=
+github.com/go-skynet/bloomz.cpp v0.0.0-20230510223001-e9366e82abdf/go.mod h1:wc0fJ9V04yiYTfgKvE5RUUSRQ5Kzi0Bo4I+U3nNOUuA=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230709163512-6c97625cca76 h1:NRdxo2MKi8qhWZXxu6CIZOkdH+LBERFz1kk22U1FD3k=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4 h1:+3KPDf4Wv1VHOkzAfZnlj9qakLSYggTpm80AswhD/FU=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230709163512-6c97625cca76/go.mod h1:tzi97YvT1bVQ+iTG39LvpDkKG1WbizgtljC+orSoM40=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230516063724-cea1ed76a7f4/go.mod h1:VY0s5KoAI2jRCvQXKuDeEEe8KG7VaWifSNJSk+E1KtY=
-github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
+github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972 h1:eiE1CTqanNjpNWF2xp9GvNZXgKgRzNaUSyFZGMLu8Vo=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230529074307-771b4a085972/go.mod h1:IQrVVZiAuWpneNrahrGu3m7VVaKLDIvQGp+Q6B8jw5g=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b h1:uKICsAbdRJxMPZ4RXltwOwXPRDO1/d/pdGR3gEEUV9M=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230523173010-f89d7c22df6b/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c h1:jXUOCh2K4OzRItTtHzdxvkylE9r1szRSleRpXCNvraY=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230524084634-c4c581f1853c/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508 h1:pb7wUQlgqbakB4vILBq44iLe5w9bcjAsP7js2iFOWX8=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230525204055-4f18e5eb7508/go.mod h1:hjmO5UfipWl6xkPT54acOs9DDto8GPV81IvsBcvRjsA=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529072326-695f97befe14 h1:0VZ5NbrtqvLvBRs0ioXBb9Mp8cOYRqG2WgAIf3+3dlw=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529072326-695f97befe14/go.mod h1:Rz967+t+aY6S+TBiW/WI8FM/C1WEMM+DamSMtKRxVAM=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb h1:slNlMT8xB6w0QaMroTsqkNzNovUOEkpNpCawB7IjBFY=
 github.com/go-skynet/go-ggml-transformers.cpp v0.0.0-20230529215936-13ccc22621bb/go.mod h1:SI+oF2+THMydq8Vo4+EzKJaQwtfWOy+lr7yWPP6FR2U=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874 h1:/6QWh2oarU7iPSpXj/3bLlkKptyxjKTRrNtGUrh8vhI=
 github.com/go-skynet/go-gpt2.cpp v0.0.0-20230523153133-3eb3a32c0874/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278 h1:st4ow9JKy3UuhkwutrbWof2vMFU/YxwBCLYZ1IxJ2Po=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230520155239-ccf23adfb278/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce h1:Mcq9LvYG4msXJvFUeiYI6PGftqmYbOoBxNfjyAAyFB4=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230523103108-dcf8da632bce/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e h1:zfxPbHj7/hN2F7V12vfxCi4CFsaVO1WohW96OVFtfNw=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230524233806-6e7e69a1607e/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5 h1:AbKnkgzkjkyoJtjOHgR3+rmNKOOjmRja6De3HEa7S7E=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230529120000-4bd3910005a5/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f h1:HmXiNF9Sy+34aSjaJ2/JN+goDgbT2XyLjdiG2EOMvaE=
 github.com/go-skynet/go-llama.cpp v0.0.0-20230529221033-4afcaf28f36f/go.mod h1:oA0r4BW8ndyjTMGi1tulsNd7sdg3Ql8MaVFuT1zF6ws=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/gofiber/fiber/v2 v2.47.0 h1:EN5lHVCc+Pyqh5OEsk8fzRiifgwpbrP0rulQ4iNf3fs=
+github.com/gofiber/fiber/v2 v2.46.0 h1:wkkWotblsGVlLjXj2dpgKQAYHtXumsK/HyFugQM68Ns=
-github.com/gofiber/fiber/v2 v2.47.0/go.mod h1:mbFMVN1lQuzziTkkakgtKKdjfsXSw9BKR5lmcNksUoU=
+github.com/gofiber/fiber/v2 v2.46.0/go.mod h1:DNl0/c37WLe0g92U6lx1VMQuxGUQY5V7EIaVoEsUffc=
 github.com/gofiber/fiber/v2 v2.48.0 h1:cRVMCb9aUJDsyHxGFLwz/sGzDggdailZZyptU9F9cU0=
 github.com/gofiber/fiber/v2 v2.48.0/go.mod h1:xqJgfqrc23FJuqGOW6DVgi3HyZEm2Mn9pRqUb2kHSX8=
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
 github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
 github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
 github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
 github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
 github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
 github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
 github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
 github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw=
 github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 h1:yAJXTCF9TqKcTiHJAE8dj7HMvPfh66eeA2JYW7eFpSE=
 github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
@ -81,84 +92,63 @@ github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/U
 github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
 github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
 github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
 github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
 github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
 github.com/imdario/mergo v0.3.15 h1:M8XP7IuFNsqUx6VPK2P9OSmsYsI/YFaGil0uD21V3dM=
 github.com/imdario/mergo v0.3.15/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
 github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
 github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
-github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
-github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
 github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
-github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
 github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
 github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
 github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
 github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
 github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
 github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
 github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
 github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
+github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98=
-github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/mholt/archiver/v3 v3.5.1 h1:rDjOBX9JSF5BvoJGvjqK479aL70qh9DIpZCl+k7Clwo=
+github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642 h1:KTkh3lOUsGqQyP4v+oa38sPFdrZtNnM4HaxTb3epdYs=
-github.com/mholt/archiver/v3 v3.5.1/go.mod h1:e3dqJ7H78uzsRSEACH1joayhuSyhnonssnDhppzS1L4=
+github.com/mudler/go-stable-diffusion v0.0.0-20230516152536-c0748eca3642/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
-github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
-github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
-github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd h1:is/rE0YD8oEWcX3fQ+VxoS3fD0LqFEmTxh8XZegYYsA=
-github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230522220313-2ce22208a3dd/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/mudler/go-ggllm.cpp v0.0.0-20230708215552-a6504d5bc137 h1:d+XGcCrw65q6KDUbF2wZBPVZ7i7kU6I7fKSX+UwzP7w=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230523222017-b36a52020702 h1:uya1G35AbUfVtG8fu/HuUGTFXpN7n9XuRAAvC1lTr+M=
-github.com/mudler/go-ggllm.cpp v0.0.0-20230708215552-a6504d5bc137/go.mod h1:00giAi/vwF8LX29JBjkPQhtASsivPnGNzB6sdmk8JGE=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230523222017-b36a52020702/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/mudler/go-ggllm.cpp v0.0.0-20230709223052-862477d16eef h1:OJZtJ5vYhlkTJI0RHIl62kOkhiINQEhZgsXlwmmNDhM=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525153421-63f57635d83c h1:mDy1OKHlG9xv1KDMcOVNYQwoYKZSlb5Mu69W3+DNLYI=
-github.com/mudler/go-ggllm.cpp v0.0.0-20230709223052-862477d16eef/go.mod h1:00giAi/vwF8LX29JBjkPQhtASsivPnGNzB6sdmk8JGE=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525153421-63f57635d83c/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760 h1:OFVkSxR7CRSRSNm5dvpMRZwmSwWa8EMMnHbc84fW5tU=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525202709-afe3870b7a29 h1:hgml/PMZX3M+WigXD4BGy+mbD1oPxYbXJXo16I555Aw=
-github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760/go.mod h1:O7SwdSWMilAWhBZMK9N9Y/oBDyMMzshE3ju8Xkexwig=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525202709-afe3870b7a29/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/mudler/go-processmanager v0.0.0-20220724164624-c45b5c61312d h1:/lAg9vPAAU+s35cDMCx1IyeMn+4OYfCBPqi08Q8vXDg=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525210850-d1ff7132c553 h1:+zQQHEoOaVUT72uLr6OJF+Lj35LR620aeeyrF7K6x5s=
-github.com/mudler/go-processmanager v0.0.0-20220724164624-c45b5c61312d/go.mod h1:HGGAOJhipApckwNV8ZTliRJqxctUv3xRY+zbQEwuytc=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230525210850-d1ff7132c553/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af h1:XFq6OUqsWQam0OrEr05okXsJK/TQur3zoZTHbiZD3Ks=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e94458e2 h1:DE++nIPuUGk8pz71PF0BITX+CTF0lv4ZNWv12qCBUVk=
-github.com/mudler/go-stable-diffusion v0.0.0-20230605122230-d89260f598af/go.mod h1:8ufRkpz/S/9ahkaxzZ5i4WMgO9w4InEhuRoT7vK5Rnw=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230526132403-a6f3e94458e2/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230708212935-d611d107479f h1:FtXRIjsBvoBQ5xmA26QbzyG4RjV2U5lOpUgP4npITOM=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922 h1:teYhrXxFY28gyBm6QMcYewA0KvLXqkUsgxJcYelaxbg=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230708212935-d611d107479f/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230528235700-9eb81cb54922/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230714185456-cfd70b69fcf5 h1:bmQnxyKiqCu8i2y/N/Sf0coWoG2/Ed12YGQeb7lTnjo=
+github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q=
-github.com/nomic-ai/gpt4all/gpt4all-bindings/golang v0.0.0-20230714185456-cfd70b69fcf5/go.mod h1:4T3CHXyrt+7FQHXaxULZfPjHbD8/99WuDDJa0YVZARI=
+github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k=
-github.com/nwaples/rardecode v1.1.0 h1:vSxaY8vQhOcVr4mm5e8XllHWTiM4JF507A0Katqw7MQ=
+github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU=
-github.com/nwaples/rardecode v1.1.0/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
+github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4=
-github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
+github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks=
-github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
+github.com/otiai10/mint v1.5.1/go.mod h1:MJm72SBthJjz8qhefc4z1PYEieWmy8Bku7CjcAqyUSM=
-github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
+github.com/otiai10/openaigo v1.1.0 h1:zRvGBqZUW5PCMgdkJNsPVTBd8tOLCMTipXE5wD2pdTg=
-github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
+github.com/otiai10/openaigo v1.1.0/go.mod h1:792bx6AWTS61weDi2EzKpHHnTF4eDMAlJ5GvAk/mgPg=
 github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
 github.com/onsi/ginkgo v1.16.4 h1:29JGrr5oVBm5ulCWet69zQkzWipVXIol6ygQUe/EzNc=
 github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0=
 github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU=
 github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM=
 github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
 github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
 github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY=
 github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc=
 github.com/onsi/gomega v1.27.8/go.mod h1:2J8vzI/s+2shY9XHRApDkdgPo1TKT7P2u6fXeJKFnNQ=
 github.com/otiai10/mint v1.6.1 h1:kgbTJmOpp/0ce7hk3H8jiSuR0MXmpwWRfqUdKww17qg=
 github.com/otiai10/openaigo v1.5.2 h1:YnNDisZmA4syArF3IxMCIrfgZOq30PLV219gPY7n2z8=
 github.com/otiai10/openaigo v1.5.2/go.mod h1:kIaXc3V+Xy5JLplcBxehVyGYDtufHp3PFPy04jOwOAI=
 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1Hc+ETb5K+23HdAMvESYE3ZJ5b5cMI=
 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE=
 github.com/philhofer/fwd v1.1.1/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
 github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
 github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
 github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
 github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkoukk/tiktoken-go v0.1.2 h1:u7PCSBiWJ3nJYoTGShyM9iHXz4dNyYkurwwp+GHtyHY=
 github.com/pkoukk/tiktoken-go v0.1.2/go.mod h1:boMWvk9pQCOTx11pgu0DrIdrAKgQzzJKUP6vLXaz7Rw=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
@ -168,10 +158,10 @@ github.com/rs/zerolog v1.29.1 h1:cO+d60CHkknCbvzEWxP0S9K6KqyTjrCNUy1LdQLCGPc=
 github.com/rs/zerolog v1.29.1/go.mod h1:Le6ESbR7hc+DP6Lt1THiV8CQSdkkNrd3R0XbEgp3ZBU=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
-github.com/sashabaranov/go-openai v1.13.0 h1:EAusFfnhaMaaUspUZ2+MbB/ZcVeD4epJmTOlZ+8AcAE=
+github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
-github.com/sashabaranov/go-openai v1.13.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
-github.com/sashabaranov/go-openai v1.14.0 h1:D1yAB+DHElgbJFdYyjxfTWMFzhddn+PwZmkQ039L7mQ=
+github.com/sashabaranov/go-openai v1.9.5 h1:z1VCMXsfnug+U0ceTTIXr/L26AYl9jafqA9lptlSX0c=
-github.com/sashabaranov/go-openai v1.14.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/sashabaranov/go-openai v1.9.5/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94 h1:rmMl4fXJhKMNWl+K+r/fq4FbbKI+Ia2m9hYBLm2h4G4=
 github.com/savsgio/dictpool v0.0.0-20221023140959-7bf2e61cea94/go.mod h1:90zrgN3D/WJsDd1iXHT96alCoN2KJo6/4x1DZC3wZs8=
 github.com/savsgio/gotils v0.0.0-20220530130905-52f3993e8d6d/go.mod h1:Gy+0tqhJvgGlqnTF8CVGP0AaGRjwBtXs/a5PA0Y3+A4=
@ -179,29 +169,21 @@ github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee h1:8Iv5m6xEo1NR1Avp
 github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee/go.mod h1:qwtSXrKuJh/zsFQ12yEE89xfCrGKK63Rr7ctU/uCo4g=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
 github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8=
+github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
 github.com/swaggo/swag v1.16.1 h1:fTNRhKstPKxcnoKsytm4sahr8FaYzUcT7i1/3nd/fBg=
 github.com/swaggo/swag v1.16.1/go.mod h1:9/LMvHycG3NFHfR6LwvikHv5iFvmPADQ359cKikGxto=
 github.com/tinylib/msgp v1.1.6/go.mod h1:75BAfg2hauQhs3qedfdDZmWAPcFMAvJE5b9rGOMufyw=
 github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
 github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
-github.com/tmc/langchaingo v0.0.0-20230709010448-a875e6bc0c54 h1:MZSC3/pdBzkoPG49uTRvtEepOQKdbdgaT1aLtaEwxx4=
+github.com/urfave/cli/v2 v2.25.3 h1:VJkt6wvEBOoSjPFQvOkv6iWIrsJyCrKGtCtxXWwmGeY=
-github.com/tmc/langchaingo v0.0.0-20230709010448-a875e6bc0c54/go.mod h1:RsMJqgUynOtr2jWNhUF41R3j6SDkKq9c8UfE0nJYBb4=
+github.com/urfave/cli/v2 v2.25.3/go.mod h1:GHupkWPMM0M/sj1a2b4wUrWBPzazNrIjouW6fmdJLxc=
 github.com/tmc/langchaingo v0.0.0-20230713201705-dcf7ecdc8ac8 h1:wdJigYmmIRCuXhCkADDr53Oa1fp/WlxCPoVXR2r7GrU=
 github.com/tmc/langchaingo v0.0.0-20230713201705-dcf7ecdc8ac8/go.mod h1:mTzgQfAGwmBz2hhQELZfu2bwsbHwyKHA6IHOa+9LDFg=
 github.com/ulikunitz/xz v0.5.8/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/ulikunitz/xz v0.5.9 h1:RsKRIA2MO8x56wkkcd3LbtcE/uMszhb6DpRf+3uwa3I=
 github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
 github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
-github.com/valyala/fasthttp v1.48.0 h1:oJWvHb9BIZToTQS3MuQ2R3bJZiNSa2KiNdeI8A+79Tc=
+github.com/valyala/fasthttp v1.47.0 h1:y7moDoxYzMooFpT5aHgNgVOQDrS3qlkfiP9mDtGGK9c=
-github.com/valyala/fasthttp v1.48.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
+github.com/valyala/fasthttp v1.47.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo=
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
@ -214,34 +196,25 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
-golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
+golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
 golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@ -250,10 +223,8 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
+golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
-golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA=
 golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
@ -267,44 +238,22 @@ golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 golang.org/x/tools v0.0.0-20201022035929-9cf592e881e9/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
-golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM=
+golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo=
-golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
+golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A=
+google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
 google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU=
 google.golang.org/grpc v1.56.2 h1:fVRFRnXvU+x6C4IlHZewvJOVHoOv1TUuQyoRsYnB4bI=
 google.golang.org/grpc v1.56.2/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
 google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
 google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
 google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
 google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
 google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
-gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
 gopkg.in/op/go-logging.v1 v1.0.0-20160211212156-b2cb9fa56473/go.mod h1:N1eN2tsCx0Ydtgjl4cqmbRCsY4/+z4cYDeqwZTk6zog=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
mudler	4c3c6fcaf7	examples(telegram): add	1 year ago
mudler	6a13cf957c	examples(flowise): add	1 year ago
mudler	3e0b75b5e2	examples: use gallery in chatbot-ui, add flowise Signed-off-by: mudler <mudler@mocaccino.org>	1 year ago