Merge pull request #199 from Lanture1064/dev

feat: worker support VLLM
kubeagi · Nov 20, 2023 · 3004c10 · 3004c10
2 parents 0b7c8e8 + 178f8a1
commit 3004c10
Show file tree

Hide file tree

Showing 6 changed files with 183 additions and 31 deletions.
diff --git a/deploy/charts/llm-worker/Chart.yaml b/deploy/charts/llm-worker/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.0.1
+version: 0.0.2
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to

diff --git a/deploy/charts/llm-worker/README.md b/deploy/charts/llm-worker/README.md
@@ -3,3 +3,48 @@
 ## Requirements
 
 - Kubernetes
+
+## Installation
+
+### With Helm
+
+#### 1. Clone Repo
+```shell
+helm repo add arcadia https://kubeagi.github.io/arcadia
+helm repo update
+```
+
+#### 2. Install FastChat
+
+```shell
+helm install [RELEASE_NAME] arcadia/llm-worker
+```
+
+## Parameters
+
+### 1. MinIO
+
+```yaml
+    - name: MINIO_ENDPOINT
+      value: "your_minio_endpoint"
+    - name: MINIO_ACCESS_KEY
+      value: "your_minio_access_key"
+    - name: MINIO_SECRET_KEY
+      value: "your_minio_secret_key"
+    - name: MINIO_MODEL_BUCKET_PATH
+      value: "path/to/your/minio/model"
+```
+
+
+### 2. FastChat
+
+```yaml
+    - name: FASTCHAT_WORKER_NAME
+      value: "your_worker_instance_name"            # default "baichuan2-7b-instance-1"
+    - name: FASTCHAT_WORKER_MODEL_NAME
+      value: "your_model_name"                      # default "baichuan2-7b"
+    - name: FASTCCHAT_WORKER_ADDRESS
+      value: "defined_worker_k8s_service_address:21002"
+    - name: FASTCCHAT_CONTROLLER_ADDRESS
+      value: "your_fastchat_controller_address:21001"
+```
diff --git a/deploy/charts/llm-worker/templates/deployment.yaml b/deploy/charts/llm-worker/templates/deployment.yaml
@@ -5,7 +5,6 @@ metadata:
   labels:
     {{- include "llm-worker.labels" . | nindent 4 }}
 spec:
-  replicas: {{ .Values.replicaCount }}
   selector:
     matchLabels:
       {{- include "llm-worker.selectorLabels" . | nindent 6 }}
@@ -20,10 +19,27 @@ spec:
     spec:
       initContainers:
         - name: get-model
-          image: {{ .Values.init.Repository }}
+          image: "{{ .Values.init.image }}:{{ .Values.init.tag | default .Chart.AppVersion }}"
           env:
+          # TODO: could be simplified
+          {{- if .Values.image.env }}
             {{- toYaml .Values.image.env | nindent 12 }}
-            {{- toYaml .Values.init.env | nindent 12}}
+          {{- else }}
+            - name: FASTCHAT_WORKER_MODEL_NAMES
+              value: "Baichuan2-7B-Chat"
+          {{- end }}
+          {{- if .Values.init.env }}
+            {{- toYaml .Values.init.env | nindent 12 }}
+          {{- else }}
+            - name: MINIO_MODEL_BUCKET_PATH
+              value: "oss/arcadia/model"
+            - name: MINIO_ENDPOINT
+              value: "http://10.96.241.70:9000"
+            - name: MINIO_ACCESS_KEY
+              value: "os4GDEmSZaJwweoj"
+            - name: MINIO_SECRET_KEY
+              value: "ROVfENZuyUtpRNOT4mtTQicA3CPASQ89"
+          {{- end }}
           command:
             # clone model repo to local
             - "/bin/sh"
@@ -32,7 +48,7 @@ spec:
               echo "Init object storage service..."
               mc alias set oss $MINIO_ENDPOINT $MINIO_ACCESS_KEY $MINIO_SECRET_KEY --insecure
               echo "Copy model files..."
-              mc --insecure cp -r oss/kubeagi/models/$FASTCHAT_WORKER_MODEL_NAMES /data/models
+              mc --insecure cp -r $MINIO_MODEL_BUCKET_PATH/$FASTCHAT_MODEL_NAME /data/models
               ls /data/models
           volumeMounts:
             - mountPath: /data/models
@@ -45,18 +61,38 @@ spec:
         - name: {{ .Chart.Name }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{ with .Values.resources }}
           resources:
-            limits:
-              nvidia.com/gpu: "1" # request 1 GPU
+            {{ toYaml . | nindent 12 }}
+          {{ end }}
           env:
+          {{- if .Values.image.env }}
             {{- toYaml .Values.image.env | nindent 12 }}
+          {{- else }}
+            - name: FASTCHAT_MODEL_NAME
+              value: "baichuan2-7b"
+            - name: FASTCCHAT_WORKER_ADDRESS
+              {{- if .Values.ingress.enabled }}
+              {{- range .Values.ingress.hosts }}
+              value: "{{ .host }}:21002"
+              {{- end }}
+              {{- else }}
+              value: "{{ .Release.Service }}.{{ .Release.Namespace }}.svc.cluster.local:21002"
+              {{- end }}
+            - name: FASTCCHAT_CONTROLLER_ADDRESS
+              value: "http://arcadia-fastchat.{{ .Release.Namespace }}.svc.cluster.local:21001"
+          {{- end }}
+            - name: FASTCHAT_WORKER_NAME
+              value: {{ .Release.Name }}
+            - name: FASTCHAT_WORKER_NAMESPACE
+              value: {{ .Release.Namespace }}
           command:
             - "/bin/bash"
             - "-c"
             - |
               echo "Starting model worker..."
-              python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_WORKER_MODEL_NAMES \
-              --model-path /data/models/$FASTCHAT_WORKER_MODEL_PATH --worker-address $FASTCCHAT_WORKER_ADDRESS \
+              python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_MODEL_NAME-$FASTCHAT_WORKER_NAME-$FASTCHAT_WORKER_NAMESPACE \
+              --model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCCHAT_WORKER_ADDRESS \
               --controller-address $FASTCCHAT_CONTROLLER_ADDRESS \
               --host 0.0.0.0 --port 21002
           ports:

diff --git a/deploy/charts/llm-worker/values.yaml b/deploy/charts/llm-worker/values.yaml
@@ -2,31 +2,31 @@
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 image:
-  repository: kubebb/arcadia-llm-worker
+  repository: kubebb/arcadia-fastchat-worker
   pullPolicy: IfNotPresent
   # Overrides the image tag whose default is the chart appVersion.
   tag: "v0.0.1"
   env:
-    - name: FASTCHAT_WORKER_MODEL_NAMES
-      value: "chatglm3-6b"
-    - name: FASTCHAT_WORKER_MODEL_PATH
-      value: "chatglm3-6b"
-    - name: FASTCCHAT_WORKER_ADDRESS
-      value: "http://fastchat-model-worker.arcadia.svc.cluster.local:21002"
-    - name: FASTCCHAT_CONTROLLER_ADDRESS
-      value: "http://arcadia-fastchat.svc.cluster.local:21001"
+  - name: FASTCHAT_MODEL_NAME
+    value: "baichuan2-7b"
+  - name: FASTCCHAT_WORKER_ADDRESS
+    value: "http://arcadia-llm-worker.arcadia.svc.cluster.local:21002"
+  - name: FASTCCHAT_CONTROLLER_ADDRESS
+    value: "http://arcadia-fastchat.arcadia.svc.cluster.local:21001"
 
 init:
   image: kubebb/minio-mc
   pullPolicy: IfNotPresent
   tag: "RELEASE.2023-01-28T20-29-38Z"
   env:
-    - name: MINIO_ENDPOINT
-      value: "https://arcadia-minio-api.172.22.96.167.nip.io/"
-    - name: MINIO_ACCESS_KEY
-      value: "ydKiRfEhz2UTvrn8"
-    - name: MINIO_SECRET_KEY
-      value: "XoJ2X5N2BOYWXgP3BczEGiG3QG5VKOKt"
+  - name: MINIO_ENDPOINT
+    value: "http://10.96.241.70:9000"
+  - name: MINIO_ACCESS_KEY
+    value: "os4GDEmSZaJwweoj"
+  - name: MINIO_SECRET_KEY
+    value: "ROVfENZuyUtpRNOT4mtTQicA3CPASQ89"
+  - name: MINIO_MODEL_BUCKET_PATH
+    value: "oss/arcadia/model"
 
 persistentVolume:
   enabled: true
@@ -60,7 +60,10 @@ ingress:
   #    hosts:
   #      - chart-example.local
 
-resources: {}
+resources:
+  limits:
+    # request 1 GPU
+    nvidia.com/gpu: "1"
 
   # We usually recommend not to specify default resources and to leave this as a conscious
   # choice for the user. This also increases chances charts run on environments with little

diff --git a/pkg/worker/runner.go b/pkg/worker/runner.go
@@ -34,18 +34,32 @@ type ModelRunner interface {
 
 var _ ModelRunner = (*RunnerFastchat)(nil)
 
+var _ ModelRunner = (*RunnerFastchatVLLM)(nil)
+
 type RunnerFastchat struct {
 	c client.Client
 	w *arcadiav1alpha1.Worker
 }
 
+type RunnerFastchatVLLM struct {
+	c client.Client
+	w *arcadiav1alpha1.Worker
+}
+
 func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error) {
 	return &RunnerFastchat{
 		c: c,
 		w: w,
 	}, nil
 }
 
+func NewRunnerFastchatVLLM(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error) {
+	return &RunnerFastchatVLLM{
+		c: c,
+		w: w,
+	}, nil
+}
+
 func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error) {
 	if model == nil {
 		return nil, errors.New("nil model")
@@ -58,17 +72,60 @@ func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.
 	// read worker address
 	container := &corev1.Container{
 		Name:            "runner",
-		Image:           "kubebb/arcadia-llm-worker:v0.0.1",
+		Image:           "kubebb/arcadia-fastchat-worker:v0.0.1",
 		ImagePullPolicy: "IfNotPresent",
 		Command: []string{
 			"/bin/bash",
 			"-c",
 			`echo "Run model worker..."
-python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_MODEL_NAME \
+python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_MODEL_NAME-$FASTCHAT_WORKER_NAME-$FASTCHAT_WORKER_NAMESPACE \
 --model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \
 --controller-address $FASTCHAT_CONTROLLER_ADDRESS \
 --host 0.0.0.0 --port 21002`},
 		Env: []corev1.EnvVar{
+			{Name: "FASTCHAT_WORKER_NAMESPACE", Value: runner.w.Namespace},
+			{Name: "FASTCHAT_WORKER_NAME", Value: runner.w.Name},
+			{Name: "FASTCHAT_MODEL_NAME", Value: model.Name},
+			{Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:21002", runner.w.Name+WokerCommonSuffix, runner.w.Namespace)},
+			{Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller},
+		},
+		Ports: []corev1.ContainerPort{
+			{Name: "http", ContainerPort: 21002},
+		},
+		VolumeMounts: []corev1.VolumeMount{
+			{Name: "models", MountPath: "/data/models"},
+		},
+		Resources: runner.w.Spec.Resources,
+	}
+
+	return container, nil
+}
+
+func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error) {
+	if model == nil {
+		return nil, errors.New("nil model")
+	}
+	gw, err := config.GetGateway(ctx, runner.c)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get arcadia config with %w", err)
+	}
+
+	// read worker address
+	container := &corev1.Container{
+		Name:            "runner",
+		Image:           "kubebb/arcadia-fastchat-worker:vllm-v0.0.1",
+		ImagePullPolicy: "IfNotPresent",
+		Command: []string{
+			"/bin/bash",
+			"-c",
+			`echo "Run model worker..."
+			python3.9 -m fastchat.serve.vllm_worker --model-names $FASTCHAT_MODEL_NAME-$FASTCHAT_WORKER_NAME-$FASTCHAT_WORKER_NAMESPACE \
+			--model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \
+			--controller-address $FASTCHAT_CONTROLLER_ADDRESS \
+			--host 0.0.0.0 --port 21002 --trust-remote-code`},
+		Env: []corev1.EnvVar{
+			{Name: "FASTCHAT_WORKER_NAMESPACE", Value: runner.w.Namespace},
+			{Name: "FASTCHAT_WORKER_NAME", Value: runner.w.Name},
 			{Name: "FASTCHAT_MODEL_NAME", Value: model.Name},
 			{Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:21002", runner.w.Name+WokerCommonSuffix, runner.w.Namespace)},
 			{Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller},

diff --git a/pkg/worker/worker.go b/pkg/worker/worker.go
@@ -180,11 +180,22 @@ func NewPodWorker(ctx context.Context, c client.Client, s *runtime.Scheme, w *ar
 	}
 
 	// init runner
-	r, err := NewRunnerFastchat(c, w.DeepCopy())
-	if err != nil {
-		return nil, fmt.Errorf("failed to new a runner with %w", err)
+	switch w.Spec.Type {
+	case arcadiav1alpha1.WorkerTypeFastchatVLLM:
+		r, err := NewRunnerFastchatVLLM(c, w.DeepCopy())
+		if err != nil {
+			return nil, fmt.Errorf("failed to new a runner with %w", err)
+		}
+		worker.r = r
+	case arcadiav1alpha1.WorkerTypeFastchatNormal:
+		r, err := NewRunnerFastchat(c, w.DeepCopy())
+		if err != nil {
+			return nil, fmt.Errorf("failed to new a runner with %w", err)
+		}
+		worker.r = r
+	default:
+		return nil, fmt.Errorf("worker %s with type %s not supported in worker", w.Name, w.Spec.Type)
 	}
-	worker.r = r
 
 	return worker, nil
 }