apiVersion: v1 kind: Namespace metadata: name: local-ai --- apiVersion: apps/v1 kind: Deployment metadata: name: local-ai namespace: local-ai labels: app: local-ai spec: selector: matchLabels: app: local-ai replicas: 1 template: metadata: labels: app: local-ai name: local-ai spec: containers: - name: local-ai image: quay.io/go-skynet/local-ai:latest env: - name: THREADS value: "14" - name: CONTEXT_SIZE value: "512" - name: MODELS_PATH value: /models volumeMounts: - mountPath: /models name: models volumes: - name: models persistentVolumeClaim: claimName: models --- apiVersion: v1 kind: Service metadata: name: local-ai namespace: local-ai # If using AWS, you'll need to override the default 60s load balancer idle timeout # annotations: # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200" spec: selector: app: local-ai type: LoadBalancer ports: - protocol: TCP port: 8080 targetPort: 8080