From c905512bb07a0498a7d38160fe6c188da54f78de Mon Sep 17 00:00:00 2001 From: Tyler Gillson Date: Thu, 20 Apr 2023 09:31:11 -0700 Subject: [PATCH] Update example K8s manifests (#40) --- kubernetes/data-volume.yaml | 28 ++++++++++++++++++++++++++ kubernetes/deployment.yaml | 39 ++++++++++++++++++++++++++----------- 2 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 kubernetes/data-volume.yaml diff --git a/kubernetes/data-volume.yaml b/kubernetes/data-volume.yaml new file mode 100644 index 0000000..d79ce2f --- /dev/null +++ b/kubernetes/data-volume.yaml @@ -0,0 +1,28 @@ +# Create a PVC containing a model binary, sourced from an arbitrary HTTP server +# (requires https://github.com/kubevirt/containerized-data-importer) +apiVersion: cdi.kubevirt.io/v1beta1 +kind: DataVolume +metadata: + name: models + namespace: local-ai +spec: + contentType: archive + source: + http: + url: http:///koala-7B-4bit-128g.GGML.tar + secretRef: model-secret + pvc: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi +--- +apiVersion: v1 +kind: Secret +metadata: + name: model-secret + namespace: local-ai +data: + accessKeyId: + secretKey: \ No newline at end of file diff --git a/kubernetes/deployment.yaml b/kubernetes/deployment.yaml index a302b10..8959ed1 100644 --- a/kubernetes/deployment.yaml +++ b/kubernetes/deployment.yaml @@ -1,38 +1,55 @@ apiVersion: v1 kind: Namespace metadata: - name: llama + name: local-ai --- apiVersion: apps/v1 kind: Deployment metadata: - name: llama - namespace: llama + name: local-ai + namespace: local-ai labels: - app: llama + app: local-ai spec: selector: matchLabels: - app: llama + app: local-ai replicas: 1 template: metadata: labels: - app: llama - name: llama + app: local-ai + name: local-ai spec: containers: - - name: llama + - name: local-ai image: quay.io/go-skynet/local-ai:latest + env: + - name: THREADS + value: "14" + - name: CONTEXT_SIZE + value: "512" + - name: MODELS_PATH + value: /models + volumeMounts: + - mountPath: /models + name: models + volumes: + - name: models + persistentVolumeClaim: + claimName: models --- apiVersion: v1 kind: Service metadata: - name: llama - namespace: llama + name: local-ai + namespace: local-ai + # If using AWS, you'll need to override the default 60s load balancer idle timeout + # annotations: + # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200" spec: selector: - app: llama + app: local-ai type: LoadBalancer ports: - protocol: TCP