diff --git a/.gitignore b/.gitignore index fecd3e8..25fb365 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ go-gpt4all-j # LocalAI build binary LocalAI local-ai +# prevent above rules from omitting the helm chart +!charts/* # Ignore models models/*.bin diff --git a/README.md b/README.md index 330b3ec..8591fc7 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,26 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d }' ``` +## Helm Chart Installation (run LocalAI in Kubernetes) +The local-ai Helm chart supports two options for the LocalAI server's models directory: +1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory. + + Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`. + +2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster. + + First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`: + ```bash + helm install local-ai charts/local-ai -n local-ai --create-namespace + ``` + Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV. + + Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart: + ```bash + helm upgrade local-ai -n local-ai charts/local-ai + ``` + This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume. + ## Prompt templates The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release. @@ -184,10 +204,6 @@ python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2 -### Kubernetes - -You can run the API in Kubernetes, see an example deployment in [kubernetes](https://github.com/go-skynet/LocalAI/tree/master/kubernetes) - ### Build locally Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually. diff --git a/charts/local-ai/Chart.yaml b/charts/local-ai/Chart.yaml new file mode 100644 index 0000000..622bc10 --- /dev/null +++ b/charts/local-ai/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +appVersion: 0.1.0 +description: A Helm chart for LocalAI +name: local-ai +type: application +version: 1.0.0 diff --git a/charts/local-ai/templates/_helpers.tpl b/charts/local-ai/templates/_helpers.tpl new file mode 100644 index 0000000..d6e7d0e --- /dev/null +++ b/charts/local-ai/templates/_helpers.tpl @@ -0,0 +1,44 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "local-ai.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "local-ai.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "local-ai.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "local-ai.labels" -}} +helm.sh/chart: {{ include "local-ai.chart" . }} +app.kubernetes.io/name: {{ include "local-ai.name" . }} +app.kubernetes.io/instance: "{{ .Release.Name }}" +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/local-ai/templates/data-volume.yaml b/charts/local-ai/templates/data-volume.yaml new file mode 100644 index 0000000..eb27cf1 --- /dev/null +++ b/charts/local-ai/templates/data-volume.yaml @@ -0,0 +1,39 @@ +{{- if .Values.dataVolume.enabled }} +apiVersion: cdi.kubevirt.io/v1beta1 +kind: DataVolume +metadata: + name: {{ template "local-ai.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "local-ai.labels" . | nindent 4 }} +spec: + contentType: archive + source: + {{ .Values.dataVolume.source.type }}: + url: {{ .Values.dataVolume.source.url }} + secretRef: {{ template "local-ai.fullname" . }} + {{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }} + secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }} + {{- end }} + {{- if .Values.dataVolume.source.caCertConfigMap }} + caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }} + {{- end }} + pvc: + accessModes: {{ .Values.dataVolume.pvc.accessModes }} + resources: + requests: + storage: {{ .Values.dataVolume.pvc.size }} +--- +{{- if .Values.dataVolume.secret.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "local-ai.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "local-ai.labels" . | nindent 4 }} +data: + accessKeyId: {{ .Values.dataVolume.secret.username }} + secretKey: {{ .Values.dataVolume.secret.password }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/local-ai/templates/deployment.yaml b/charts/local-ai/templates/deployment.yaml new file mode 100644 index 0000000..6966bab --- /dev/null +++ b/charts/local-ai/templates/deployment.yaml @@ -0,0 +1,39 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "local-ai.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "local-ai.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + app.kubernetes.io/name: {{ include "local-ai.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + replicas: 1 + template: + metadata: + name: {{ template "local-ai.fullname" . }} + labels: + app.kubernetes.io/name: {{ include "local-ai.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + spec: + containers: + - name: {{ template "local-ai.fullname" . }} + image: {{ .Values.deployment.image }} + env: + - name: THREADS + value: {{ .Values.deployment.env.threads | quote }} + - name: CONTEXT_SIZE + value: {{ .Values.deployment.env.contextSize | quote }} + - name: MODELS_PATH + value: {{ .Values.deployment.env.modelsPath }} +{{- if .Values.deployment.volume.enabled }} + volumeMounts: + - mountPath: {{ .Values.deployment.env.modelsPath }} + name: models + volumes: + - name: models + persistentVolumeClaim: + claimName: {{ template "local-ai.fullname" . }} +{{- end }} \ No newline at end of file diff --git a/charts/local-ai/templates/service.yaml b/charts/local-ai/templates/service.yaml new file mode 100644 index 0000000..a42bfda --- /dev/null +++ b/charts/local-ai/templates/service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "local-ai.fullname" . }} + namespace: {{ .Release.Namespace | quote }} + labels: + {{- include "local-ai.labels" . | nindent 4 }} +{{- if .Values.service.annotations }} + annotations: + {{ toYaml .Values.service.annotations | indent 4 }} +{{- end }} +spec: + selector: + app.kubernetes.io/name: {{ include "local-ai.name" . }} + type: "{{ .Values.service.type }}" + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 diff --git a/charts/local-ai/values.yaml b/charts/local-ai/values.yaml new file mode 100644 index 0000000..e7bdb5e --- /dev/null +++ b/charts/local-ai/values.yaml @@ -0,0 +1,38 @@ +deployment: + image: quay.io/go-skynet/local-ai:latest + env: + threads: 14 + contextSize: 512 + modelsPath: "/models" + volume: + enabled: false + +service: + type: ClusterIP + annotations: {} + # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout + # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200" + +# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket +# (requires https://github.com/kubevirt/containerized-data-importer) +dataVolume: + enabled: false + source: + type: "http" # Source type. One of: [ http | s3 ] + url: "http:///" # e.g. koala-7B-4bit-128g.GGML.tar + + # CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key + # and a base64 encoded pem certificate + caCertConfigMap: "" + + # SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header + # that may include sensitive information. Only applicable for the http source type. + secretExtraHeaders: [] + pvc: + accessModes: + - ReadWriteOnce + size: 5Gi + secret: + enabled: false + username: "" # base64 encoded + password: "" # base64 encoded diff --git a/kubernetes/data-volume.yaml b/kubernetes/data-volume.yaml deleted file mode 100644 index d79ce2f..0000000 --- a/kubernetes/data-volume.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# Create a PVC containing a model binary, sourced from an arbitrary HTTP server -# (requires https://github.com/kubevirt/containerized-data-importer) -apiVersion: cdi.kubevirt.io/v1beta1 -kind: DataVolume -metadata: - name: models - namespace: local-ai -spec: - contentType: archive - source: - http: - url: http:///koala-7B-4bit-128g.GGML.tar - secretRef: model-secret - pvc: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 5Gi ---- -apiVersion: v1 -kind: Secret -metadata: - name: model-secret - namespace: local-ai -data: - accessKeyId: - secretKey: \ No newline at end of file diff --git a/kubernetes/deployment.yaml b/kubernetes/deployment.yaml deleted file mode 100644 index 8959ed1..0000000 --- a/kubernetes/deployment.yaml +++ /dev/null @@ -1,57 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: local-ai ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: local-ai - namespace: local-ai - labels: - app: local-ai -spec: - selector: - matchLabels: - app: local-ai - replicas: 1 - template: - metadata: - labels: - app: local-ai - name: local-ai - spec: - containers: - - name: local-ai - image: quay.io/go-skynet/local-ai:latest - env: - - name: THREADS - value: "14" - - name: CONTEXT_SIZE - value: "512" - - name: MODELS_PATH - value: /models - volumeMounts: - - mountPath: /models - name: models - volumes: - - name: models - persistentVolumeClaim: - claimName: models ---- -apiVersion: v1 -kind: Service -metadata: - name: local-ai - namespace: local-ai - # If using AWS, you'll need to override the default 60s load balancer idle timeout - # annotations: - # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200" -spec: - selector: - app: local-ai - type: LoadBalancer - ports: - - protocol: TCP - port: 8080 - targetPort: 8080