Skip to content

Commit

Permalink
feat(localai): runtimeclass nvidia
Browse files Browse the repository at this point in the history
  • Loading branch information
nold committed Feb 21, 2024
1 parent d9a93a6 commit 6dba741
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 19 deletions.
9 changes: 6 additions & 3 deletions projects/ai/project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ config:

apps:
- name: localai
repoURL: https://go-skynet.github.io/helm-charts
chart: local-ai
targetRevision: 3.1.0
# repoURL: https://go-skynet.github.io/helm-charts
# chart: local-ai
# targetRevision: 3.1.0
repoURL: https://github.com/nold360/localai-charts.git
path: charts/local-ai
targetRevision: feat/runtimeclass

- name: anythingllm
repo: bjw-s
Expand Down
40 changes: 24 additions & 16 deletions projects/ai/values/localai.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
replicaCount: 1

deployment:
image: quay.io/go-skynet/local-ai:master-ffmpeg-core
image:
repository: quay.io/go-skynet/local-ai
tag: v2.8.0-cublas-cuda12
pullPolicy: Always
runtimeClassName: nvidia

env:
threads: 16
# threads: 16
context_size: 2048
DEBUG: "true"

Expand Down Expand Up @@ -54,9 +59,6 @@ deployment:
prompt_templates:
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
image: busybox
pullPolicy: Always
imagePullSecrets: []
# - name: secret-names

resources:
requests:
Expand All @@ -80,17 +82,25 @@ models:

# The list of URLs to download models from
# Note: the name of the file will be the name of the loaded model
list:
- url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
list: []
# - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
# basicAuth: base64EncodedCredentials

persistence:
pvc:
enabled: true
size: 100Gi
accessModes:
- ReadWriteOnce
storageClass: "ssd"
persistence:
models:
enabled: true
annotations: {}
storageClass: ssd
accessModes: ReadWriteOnce
size: 100Gi
globalMount: /models
output:
enabled: false
annotations: {}
storageClass: ssd
accessModes: ReadWriteOnce
size: 100Gi
globalMount: /tmp/generated

service:
type: ClusterIP
Expand All @@ -115,5 +125,3 @@ ingress:
hosts:
- ai.dc

image:
pullPolicy: IfNotPresent

0 comments on commit 6dba741

Please sign in to comment.