Merge branch 'main' into production

porter-dev · Jan 30, 2025 · f72d7d4 · f72d7d4
2 parents d3cedf3 + ef8c546
commit f72d7d4
Showing 1 changed file with 13 additions and 0 deletions.
diff --git a/addons/hf-llm-models/templates/deployment.yaml b/addons/hf-llm-models/templates/deployment.yaml
@@ -69,12 +69,19 @@ spec:
         - --tensor-parallel-size={{ .Values.tensorParallelSize }}
       {{- if .Values.maxModelLen }}
         - --max-model-len={{ .Values.maxModelLen }}
+      {{- end }}
+      {{- if .Values.extraArgs }}
+        {{- range .Values.extraArgs }}
+        - {{ . }}
+        {{- end }}
       {{- end }}
         image: {{ .Values.vllmImage }}
         imagePullPolicy: IfNotPresent
         env:
         - name: HF_TOKEN
           value: {{ .Values.huggingFaceToken }}
+        - name: NCCL_DEBUG
+          value: INFO
         ports:
         - containerPort: 8000
           protocol: TCP
@@ -105,13 +112,19 @@ spec:
             nvidia.com/gpu: {{ .Values.resources.limits.nvidiaGpu }}
               {{- end }}
         volumeMounts:
+        - name: dshm
+          mountPath: /dev/shm
         - name: model-volume
           mountPath: {{ .Values.modelDir }}
         name: vllm
         securityContext:
           allowPrivilegeEscalation: false
       terminationGracePeriodSeconds: 10
       volumes:
+      - name: dshm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 4Gi
       - name: model-volume
         persistentVolumeClaim:
           claimName: "{{ .Release.Name }}-hf-llm"