Skip to content

Commit

Permalink
Merge branch 'main' into production
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanmcshane committed Jan 30, 2025
2 parents d3cedf3 + ef8c546 commit f72d7d4
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions addons/hf-llm-models/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,19 @@ spec:
- --tensor-parallel-size={{ .Values.tensorParallelSize }}
{{- if .Values.maxModelLen }}
- --max-model-len={{ .Values.maxModelLen }}
{{- end }}
{{- if .Values.extraArgs }}
{{- range .Values.extraArgs }}
- {{ . }}
{{- end }}
{{- end }}
image: {{ .Values.vllmImage }}
imagePullPolicy: IfNotPresent
env:
- name: HF_TOKEN
value: {{ .Values.huggingFaceToken }}
- name: NCCL_DEBUG
value: INFO
ports:
- containerPort: 8000
protocol: TCP
Expand Down Expand Up @@ -105,13 +112,19 @@ spec:
nvidia.com/gpu: {{ .Values.resources.limits.nvidiaGpu }}
{{- end }}
volumeMounts:
- name: dshm
mountPath: /dev/shm
- name: model-volume
mountPath: {{ .Values.modelDir }}
name: vllm
securityContext:
allowPrivilegeEscalation: false
terminationGracePeriodSeconds: 10
volumes:
- name: dshm
emptyDir:
medium: Memory
sizeLimit: 4Gi
- name: model-volume
persistentVolumeClaim:
claimName: "{{ .Release.Name }}-hf-llm"

0 comments on commit f72d7d4

Please sign in to comment.