Update example K8s manifests (#40)

add/first-example
Tyler Gillson 1 year ago committed by GitHub
parent 1254951fab
commit c905512bb0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 28
      kubernetes/data-volume.yaml
  2. 39
      kubernetes/deployment.yaml

@ -0,0 +1,28 @@
# Create a PVC containing a model binary, sourced from an arbitrary HTTP server
# (requires https://github.com/kubevirt/containerized-data-importer)
apiVersion: cdi.kubevirt.io/v1beta1
kind: DataVolume
metadata:
name: models
namespace: local-ai
spec:
contentType: archive
source:
http:
url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
secretRef: model-secret
pvc:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: Secret
metadata:
name: model-secret
namespace: local-ai
data:
accessKeyId: <model_server_username_base64_encoded>
secretKey: <model_server_password_base64_encoded>

@ -1,38 +1,55 @@
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:
name: llama name: local-ai
--- ---
apiVersion: apps/v1 apiVersion: apps/v1
kind: Deployment kind: Deployment
metadata: metadata:
name: llama name: local-ai
namespace: llama namespace: local-ai
labels: labels:
app: llama app: local-ai
spec: spec:
selector: selector:
matchLabels: matchLabels:
app: llama app: local-ai
replicas: 1 replicas: 1
template: template:
metadata: metadata:
labels: labels:
app: llama app: local-ai
name: llama name: local-ai
spec: spec:
containers: containers:
- name: llama - name: local-ai
image: quay.io/go-skynet/local-ai:latest image: quay.io/go-skynet/local-ai:latest
env:
- name: THREADS
value: "14"
- name: CONTEXT_SIZE
value: "512"
- name: MODELS_PATH
value: /models
volumeMounts:
- mountPath: /models
name: models
volumes:
- name: models
persistentVolumeClaim:
claimName: models
--- ---
apiVersion: v1 apiVersion: v1
kind: Service kind: Service
metadata: metadata:
name: llama name: local-ai
namespace: llama namespace: local-ai
# If using AWS, you'll need to override the default 60s load balancer idle timeout
# annotations:
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
spec: spec:
selector: selector:
app: llama app: local-ai
type: LoadBalancer type: LoadBalancer
ports: ports:
- protocol: TCP - protocol: TCP

Loading…
Cancel
Save