diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index 51fd12c..b464b0d 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -13,7 +13,7 @@ jobs:
             variable: "GOLLAMA_VERSION"
             branch: "master"
           - repository: "go-skynet/go-llama.cpp"
-            variable: "GOLLAMA_MASTER_VERSION"
+            variable: "GOLLAMA_GRAMMAR_VERSION"
             branch: "master"
           - repository: "go-skynet/go-ggml-transformers.cpp"
             variable: "GOGGMLTRANSFORMERS_VERSION"
diff --git a/Makefile b/Makefile
index 1e07437..5813ba2 100644
--- a/Makefile
+++ b/Makefile
@@ -5,16 +5,16 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 # Temporarly pinned to https://github.com/go-skynet/go-llama.cpp/pull/124
-GOLLAMA_VERSION?=cb8d7cd4cb95725a04504a9e3a26dd72a12b69ac
+GOLLAMA_VERSION?=c90272fdb693fc8d6faf20e1e9a5481c453318e8
 
-GOLLAMA_MASTER_VERSION?=c90272fdb693fc8d6faf20e1e9a5481c453318e8
+GOLLAMA_GRAMMAR_VERSION?=cb8d7cd4cb95725a04504a9e3a26dd72a12b69ac
 # Temporary set a specific version of llama.cpp
 # containing: https://github.com/ggerganov/llama.cpp/pull/1773 and
 # rebased on top of master.
 # This pin can be dropped when the PR above is merged, and go-llama has merged changes as well
 # Set empty to use the version pinned by go-llama
-LLAMA_CPP_REPO?=https://github.com/mudler/llama.cpp
-LLAMA_CPP_VERSION?=48ce8722a05a018681634af801fd0fd45b3a87cc
+LLAMA_CPP_GRAMMAR_REPO?=https://github.com/mudler/llama.cpp
+LLAMA_CPP_GRAMMAR_VERSION?=48ce8722a05a018681634af801fd0fd45b3a87cc
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -201,29 +201,29 @@ whisper.cpp/libwhisper.a: whisper.cpp
 go-llama:
 	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
 	cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
-ifneq ($(LLAMA_CPP_REPO),)
-	cd go-llama && rm -rf llama.cpp && git clone $(LLAMA_CPP_REPO) llama.cpp && cd llama.cpp && git checkout -b build $(LLAMA_CPP_VERSION) && git submodule update --init --recursive --depth 1
-endif
 
-go-llama-master:
-	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama-master
-	cd go-llama-master && git checkout -b build $(GOLLAMA_MASTER_VERSION) && git submodule update --init --recursive --depth 1
+go-llama-grammar:
+	git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama-grammar
+	cd go-llama-grammar && git checkout -b build $(GOLLAMA_GRAMMAR_VERSION) && git submodule update --init --recursive --depth 1
+ifneq ($(LLAMA_CPP_GRAMMAR_REPO),)
+	cd go-llama-grammar && rm -rf llama.cpp && git clone $(LLAMA_CPP_GRAMMAR_REPO) llama.cpp && cd llama.cpp && git checkout -b build $(LLAMA_CPP_GRAMMAR_VERSION) && git submodule update --init --recursive --depth 1
+endif
 
 go-llama/libbinding.a: go-llama
 	$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
 
-go-llama-master/libbinding.a: go-llama-master
-	$(MAKE) -C go-llama-master BUILD_TYPE=$(BUILD_TYPE) libbinding.a
+go-llama-grammar/libbinding.a: go-llama-grammar
+	$(MAKE) -C go-llama-grammar BUILD_TYPE=$(BUILD_TYPE) libbinding.a
 
 go-piper/libpiper_binding.a:
 	$(MAKE) -C go-piper libpiper_binding.a example/main
 
-get-sources: go-llama go-ggllm go-llama-master go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
+get-sources: go-llama go-ggllm go-llama-grammar go-ggml-transformers gpt4all go-piper go-rwkv whisper.cpp go-bert bloomz go-stable-diffusion
 	touch $@
 
 replace:
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp-master=$(shell pwd)/go-llama-master
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp-grammar=$(shell pwd)/go-llama-grammar
 	$(GOCMD) mod edit -replace github.com/nomic-ai/gpt4all/gpt4all-bindings/golang=$(shell pwd)/gpt4all/gpt4all-bindings/golang
 	$(GOCMD) mod edit -replace github.com/go-skynet/go-ggml-transformers.cpp=$(shell pwd)/go-ggml-transformers
 	$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
@@ -241,7 +241,7 @@ prepare-sources: get-sources replace
 rebuild: ## Rebuilds the project
 	$(GOCMD) clean -cache
 	$(MAKE) -C go-llama clean
-	$(MAKE) -C go-llama-master clean
+	$(MAKE) -C go-llama-grammar clean
 	$(MAKE) -C gpt4all/gpt4all-bindings/golang/ clean
 	$(MAKE) -C go-ggml-transformers clean
 	$(MAKE) -C go-rwkv clean
@@ -371,9 +371,9 @@ backend-assets/grpc/llama: backend-assets/grpc go-llama/libbinding.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama LIBRARY_PATH=$(shell pwd)/go-llama \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama ./cmd/grpc/llama/
 
-backend-assets/grpc/llama-master: backend-assets/grpc go-llama-master/libbinding.a
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-master LIBRARY_PATH=$(shell pwd)/go-llama-master \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-master ./cmd/grpc/llama-master/
+backend-assets/grpc/llama-grammar: backend-assets/grpc go-llama-grammar/libbinding.a
+	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/go-llama-grammar LIBRARY_PATH=$(shell pwd)/go-llama-grammar \
+	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-grammar ./cmd/grpc/llama-grammar/
 
 backend-assets/grpc/gpt4all: backend-assets/grpc backend-assets/gpt4all gpt4all/gpt4all-bindings/golang/libgpt4all.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(shell pwd)/gpt4all/gpt4all-bindings/golang/ \
@@ -438,4 +438,4 @@ backend-assets/grpc/whisper: backend-assets/grpc whisper.cpp/libwhisper.a
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(shell pwd)/whisper.cpp LIBRARY_PATH=$(shell pwd)/whisper.cpp \
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./cmd/grpc/whisper/
 
-grpcs: prepare backend-assets/grpc/langchain-huggingface backend-assets/grpc/llama-master backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
\ No newline at end of file
+grpcs: prepare backend-assets/grpc/langchain-huggingface backend-assets/grpc/llama-grammar backend-assets/grpc/falcon-ggml backend-assets/grpc/bert-embeddings backend-assets/grpc/falcon backend-assets/grpc/bloomz backend-assets/grpc/llama backend-assets/grpc/gpt4all backend-assets/grpc/dolly backend-assets/grpc/gpt2 backend-assets/grpc/gptj backend-assets/grpc/gptneox backend-assets/grpc/mpt backend-assets/grpc/replit backend-assets/grpc/starcoder backend-assets/grpc/rwkv backend-assets/grpc/whisper $(OPTIONAL_GRPC)
\ No newline at end of file
diff --git a/api/api_test.go b/api/api_test.go
index 06e978b..6970a8f 100644
--- a/api/api_test.go
+++ b/api/api_test.go
@@ -291,7 +291,7 @@ var _ = Describe("API test", func() {
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
 					URL:       "github:go-skynet/model-gallery/openllama_3b.yaml",
 					Name:      "openllama_3b",
-					Overrides: map[string]string{},
+					Overrides: map[string]string{"backend": "llama-grammar"},
 				})
 
 				Expect(response["uuid"]).ToNot(BeEmpty(), fmt.Sprint(response))
diff --git a/cmd/grpc/llama-master/main.go b/cmd/grpc/llama-grammar/main.go
similarity index 85%
rename from cmd/grpc/llama-master/main.go
rename to cmd/grpc/llama-grammar/main.go
index 51cd00f..4653eb2 100644
--- a/cmd/grpc/llama-master/main.go
+++ b/cmd/grpc/llama-grammar/main.go
@@ -7,7 +7,7 @@ package main
 import (
 	"flag"
 
-	llama "github.com/go-skynet/LocalAI/pkg/grpc/llm/llama-master"
+	llama "github.com/go-skynet/LocalAI/pkg/grpc/llm/llama-grammar"
 
 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 )
diff --git a/go.mod b/go.mod
index f56ce00..d0e5194 100644
--- a/go.mod
+++ b/go.mod
@@ -39,7 +39,7 @@ require (
 require (
 	github.com/dlclark/regexp2 v1.8.1 // indirect
 	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
-	github.com/go-skynet/go-llama.cpp-master v0.0.0-20230703203849-ffa57fbc3a12 // indirect
+	github.com/go-skynet/go-llama.cpp-grammar v0.0.0-20230703203849-ffa57fbc3a12 // indirect
 	github.com/golang/protobuf v1.5.3 // indirect
 	github.com/golang/snappy v0.0.2 // indirect
 	github.com/klauspost/pgzip v1.2.5 // indirect
diff --git a/pkg/grpc/llm/llama-master/llama.go b/pkg/grpc/llm/llama-grammar/llama.go
similarity index 97%
rename from pkg/grpc/llm/llama-master/llama.go
rename to pkg/grpc/llm/llama-grammar/llama.go
index 43ae25f..515a24c 100644
--- a/pkg/grpc/llm/llama-master/llama.go
+++ b/pkg/grpc/llm/llama-grammar/llama.go
@@ -7,7 +7,7 @@ import (
 
 	"github.com/go-skynet/LocalAI/pkg/grpc/base"
 	pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
-	"github.com/go-skynet/go-llama.cpp-master"
+	"github.com/go-skynet/go-llama.cpp-grammar"
 )
 
 type LLM struct {
@@ -71,6 +71,8 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
 		predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
 	}
 
+	predictOptions = append(predictOptions, llama.WithGrammar(opts.Grammar))
+
 	// Expected absolute path
 	if opts.PromptCachePath != "" {
 		predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
diff --git a/pkg/grpc/llm/llama/llama.go b/pkg/grpc/llm/llama/llama.go
index 82063b7..421eb0c 100644
--- a/pkg/grpc/llm/llama/llama.go
+++ b/pkg/grpc/llm/llama/llama.go
@@ -71,8 +71,6 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
 		predictOptions = append(predictOptions, llama.EnablePromptCacheRO)
 	}
 
-	predictOptions = append(predictOptions, llama.WithGrammar(opts.Grammar))
-
 	// Expected absolute path
 	if opts.PromptCachePath != "" {
 		predictOptions = append(predictOptions, llama.SetPathPromptCache(opts.PromptCachePath))
diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go
index 9d33a6e..53fc684 100644
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@@ -37,7 +37,7 @@ const (
 	Gpt4All             = "gpt4all"
 	FalconBackend       = "falcon"
 	FalconGGMLBackend   = "falcon-ggml"
-	LlamaMasterBackend  = "llama-master"
+	LlamaGrammarBackend = "llama-grammar"
 
 	BertEmbeddingsBackend  = "bert-embeddings"
 	RwkvBackend            = "rwkv"
@@ -54,7 +54,7 @@ var AutoLoadBackends []string = []string{
 	FalconBackend,
 	GPTNeoXBackend,
 	BertEmbeddingsBackend,
-	LlamaMasterBackend,
+	LlamaGrammarBackend,
 	FalconGGMLBackend,
 	GPTJBackend,
 	Gpt2Backend,
@@ -185,7 +185,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (model *grpc.Client, err er
 
 	backend := strings.ToLower(o.backendString)
 	switch backend {
-	case LlamaBackend, LlamaMasterBackend, GPTJBackend, DollyBackend,
+	case LlamaBackend, LlamaGrammarBackend, GPTJBackend, DollyBackend,
 		MPTBackend, Gpt2Backend, FalconBackend,
 		GPTNeoXBackend, ReplitBackend, StarcoderBackend, BloomzBackend,
 		RwkvBackend, LCHuggingFaceBackend, BertEmbeddingsBackend, FalconGGMLBackend, StableDiffusionBackend, WhisperBackend: