From 178f8a12b9d29d58a38d86fd4db8dfaac789eabf Mon Sep 17 00:00:00 2001 From: Lanture1064 Date: Thu, 9 Nov 2023 17:23:47 +0800 Subject: [PATCH] feat: worker support VLLM Signed-off-by: Lanture1064 --- deploy/charts/llm-worker/Chart.yaml | 2 +- deploy/charts/llm-worker/README.md | 45 ++++++++++++++ .../llm-worker/templates/deployment.yaml | 52 +++++++++++++--- deploy/charts/llm-worker/values.yaml | 35 ++++++----- pkg/worker/runner.go | 61 ++++++++++++++++++- pkg/worker/worker.go | 19 ++++-- 6 files changed, 183 insertions(+), 31 deletions(-) diff --git a/deploy/charts/llm-worker/Chart.yaml b/deploy/charts/llm-worker/Chart.yaml index a694f8bc4..d639d22fa 100644 --- a/deploy/charts/llm-worker/Chart.yaml +++ b/deploy/charts/llm-worker/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.0.1 +version: 0.0.2 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/deploy/charts/llm-worker/README.md b/deploy/charts/llm-worker/README.md index 46c48eb22..841fe28ca 100644 --- a/deploy/charts/llm-worker/README.md +++ b/deploy/charts/llm-worker/README.md @@ -3,3 +3,48 @@ ## Requirements - Kubernetes + +## Installation + +### With Helm + +#### 1. Clone Repo +```shell +helm repo add arcadia https://kubeagi.github.io/arcadia +helm repo update +``` + +#### 2. Install FastChat + +```shell +helm install [RELEASE_NAME] arcadia/llm-worker +``` + +## Parameters + +### 1. MinIO + +```yaml + - name: MINIO_ENDPOINT + value: "your_minio_endpoint" + - name: MINIO_ACCESS_KEY + value: "your_minio_access_key" + - name: MINIO_SECRET_KEY + value: "your_minio_secret_key" + - name: MINIO_MODEL_BUCKET_PATH + value: "path/to/your/minio/model" +``` + + +### 2. FastChat + +```yaml + - name: FASTCHAT_WORKER_NAME + value: "your_worker_instance_name" # default "baichuan2-7b-instance-1" + - name: FASTCHAT_WORKER_MODEL_NAME + value: "your_model_name" # default "baichuan2-7b" + - name: FASTCCHAT_WORKER_ADDRESS + value: "defined_worker_k8s_service_address:21002" + - name: FASTCCHAT_CONTROLLER_ADDRESS + value: "your_fastchat_controller_address:21001" +``` \ No newline at end of file diff --git a/deploy/charts/llm-worker/templates/deployment.yaml b/deploy/charts/llm-worker/templates/deployment.yaml index c7c0d6f62..3051d5d89 100644 --- a/deploy/charts/llm-worker/templates/deployment.yaml +++ b/deploy/charts/llm-worker/templates/deployment.yaml @@ -5,7 +5,6 @@ metadata: labels: {{- include "llm-worker.labels" . | nindent 4 }} spec: - replicas: {{ .Values.replicaCount }} selector: matchLabels: {{- include "llm-worker.selectorLabels" . | nindent 6 }} @@ -20,10 +19,27 @@ spec: spec: initContainers: - name: get-model - image: {{ .Values.init.Repository }} + image: "{{ .Values.init.image }}:{{ .Values.init.tag | default .Chart.AppVersion }}" env: + # TODO: could be simplified + {{- if .Values.image.env }} {{- toYaml .Values.image.env | nindent 12 }} - {{- toYaml .Values.init.env | nindent 12}} + {{- else }} + - name: FASTCHAT_WORKER_MODEL_NAMES + value: "Baichuan2-7B-Chat" + {{- end }} + {{- if .Values.init.env }} + {{- toYaml .Values.init.env | nindent 12 }} + {{- else }} + - name: MINIO_MODEL_BUCKET_PATH + value: "oss/arcadia/model" + - name: MINIO_ENDPOINT + value: "http://10.96.241.70:9000" + - name: MINIO_ACCESS_KEY + value: "os4GDEmSZaJwweoj" + - name: MINIO_SECRET_KEY + value: "ROVfENZuyUtpRNOT4mtTQicA3CPASQ89" + {{- end }} command: # clone model repo to local - "/bin/sh" @@ -32,7 +48,7 @@ spec: echo "Init object storage service..." mc alias set oss $MINIO_ENDPOINT $MINIO_ACCESS_KEY $MINIO_SECRET_KEY --insecure echo "Copy model files..." - mc --insecure cp -r oss/kubeagi/models/$FASTCHAT_WORKER_MODEL_NAMES /data/models + mc --insecure cp -r $MINIO_MODEL_BUCKET_PATH/$FASTCHAT_MODEL_NAME /data/models ls /data/models volumeMounts: - mountPath: /data/models @@ -45,18 +61,38 @@ spec: - name: {{ .Chart.Name }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + {{ with .Values.resources }} resources: - limits: - nvidia.com/gpu: "1" # request 1 GPU + {{ toYaml . | nindent 12 }} + {{ end }} env: + {{- if .Values.image.env }} {{- toYaml .Values.image.env | nindent 12 }} + {{- else }} + - name: FASTCHAT_MODEL_NAME + value: "baichuan2-7b" + - name: FASTCCHAT_WORKER_ADDRESS + {{- if .Values.ingress.enabled }} + {{- range .Values.ingress.hosts }} + value: "{{ .host }}:21002" + {{- end }} + {{- else }} + value: "{{ .Release.Service }}.{{ .Release.Namespace }}.svc.cluster.local:21002" + {{- end }} + - name: FASTCCHAT_CONTROLLER_ADDRESS + value: "http://arcadia-fastchat.{{ .Release.Namespace }}.svc.cluster.local:21001" + {{- end }} + - name: FASTCHAT_WORKER_NAME + value: {{ .Release.Name }} + - name: FASTCHAT_WORKER_NAMESPACE + value: {{ .Release.Namespace }} command: - "/bin/bash" - "-c" - | echo "Starting model worker..." - python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_WORKER_MODEL_NAMES \ - --model-path /data/models/$FASTCHAT_WORKER_MODEL_PATH --worker-address $FASTCCHAT_WORKER_ADDRESS \ + python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_MODEL_NAME-$FASTCHAT_WORKER_NAME-$FASTCHAT_WORKER_NAMESPACE \ + --model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCCHAT_WORKER_ADDRESS \ --controller-address $FASTCCHAT_CONTROLLER_ADDRESS \ --host 0.0.0.0 --port 21002 ports: diff --git a/deploy/charts/llm-worker/values.yaml b/deploy/charts/llm-worker/values.yaml index 571ae45e6..3489ca3dd 100644 --- a/deploy/charts/llm-worker/values.yaml +++ b/deploy/charts/llm-worker/values.yaml @@ -2,31 +2,31 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. image: - repository: kubebb/arcadia-llm-worker + repository: kubebb/arcadia-fastchat-worker pullPolicy: IfNotPresent # Overrides the image tag whose default is the chart appVersion. tag: "v0.0.1" env: - - name: FASTCHAT_WORKER_MODEL_NAMES - value: "chatglm3-6b" - - name: FASTCHAT_WORKER_MODEL_PATH - value: "chatglm3-6b" - - name: FASTCCHAT_WORKER_ADDRESS - value: "http://fastchat-model-worker.arcadia.svc.cluster.local:21002" - - name: FASTCCHAT_CONTROLLER_ADDRESS - value: "http://arcadia-fastchat.svc.cluster.local:21001" + - name: FASTCHAT_MODEL_NAME + value: "baichuan2-7b" + - name: FASTCCHAT_WORKER_ADDRESS + value: "http://arcadia-llm-worker.arcadia.svc.cluster.local:21002" + - name: FASTCCHAT_CONTROLLER_ADDRESS + value: "http://arcadia-fastchat.arcadia.svc.cluster.local:21001" init: image: kubebb/minio-mc pullPolicy: IfNotPresent tag: "RELEASE.2023-01-28T20-29-38Z" env: - - name: MINIO_ENDPOINT - value: "https://arcadia-minio-api.172.22.96.167.nip.io/" - - name: MINIO_ACCESS_KEY - value: "ydKiRfEhz2UTvrn8" - - name: MINIO_SECRET_KEY - value: "XoJ2X5N2BOYWXgP3BczEGiG3QG5VKOKt" + - name: MINIO_ENDPOINT + value: "http://10.96.241.70:9000" + - name: MINIO_ACCESS_KEY + value: "os4GDEmSZaJwweoj" + - name: MINIO_SECRET_KEY + value: "ROVfENZuyUtpRNOT4mtTQicA3CPASQ89" + - name: MINIO_MODEL_BUCKET_PATH + value: "oss/arcadia/model" persistentVolume: enabled: true @@ -60,7 +60,10 @@ ingress: # hosts: # - chart-example.local -resources: {} +resources: + limits: + # request 1 GPU + nvidia.com/gpu: "1" # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little diff --git a/pkg/worker/runner.go b/pkg/worker/runner.go index a2b36c4e2..ddc0d5993 100644 --- a/pkg/worker/runner.go +++ b/pkg/worker/runner.go @@ -34,11 +34,18 @@ type ModelRunner interface { var _ ModelRunner = (*RunnerFastchat)(nil) +var _ ModelRunner = (*RunnerFastchatVLLM)(nil) + type RunnerFastchat struct { c client.Client w *arcadiav1alpha1.Worker } +type RunnerFastchatVLLM struct { + c client.Client + w *arcadiav1alpha1.Worker +} + func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error) { return &RunnerFastchat{ c: c, @@ -46,6 +53,13 @@ func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, }, nil } +func NewRunnerFastchatVLLM(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error) { + return &RunnerFastchatVLLM{ + c: c, + w: w, + }, nil +} + func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error) { if model == nil { return nil, errors.New("nil model") @@ -58,17 +72,60 @@ func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1. // read worker address container := &corev1.Container{ Name: "runner", - Image: "kubebb/arcadia-llm-worker:v0.0.1", + Image: "kubebb/arcadia-fastchat-worker:v0.0.1", ImagePullPolicy: "IfNotPresent", Command: []string{ "/bin/bash", "-c", `echo "Run model worker..." -python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_MODEL_NAME \ +python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_MODEL_NAME-$FASTCHAT_WORKER_NAME-$FASTCHAT_WORKER_NAMESPACE \ --model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \ --controller-address $FASTCHAT_CONTROLLER_ADDRESS \ --host 0.0.0.0 --port 21002`}, Env: []corev1.EnvVar{ + {Name: "FASTCHAT_WORKER_NAMESPACE", Value: runner.w.Namespace}, + {Name: "FASTCHAT_WORKER_NAME", Value: runner.w.Name}, + {Name: "FASTCHAT_MODEL_NAME", Value: model.Name}, + {Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:21002", runner.w.Name+WokerCommonSuffix, runner.w.Namespace)}, + {Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller}, + }, + Ports: []corev1.ContainerPort{ + {Name: "http", ContainerPort: 21002}, + }, + VolumeMounts: []corev1.VolumeMount{ + {Name: "models", MountPath: "/data/models"}, + }, + Resources: runner.w.Spec.Resources, + } + + return container, nil +} + +func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error) { + if model == nil { + return nil, errors.New("nil model") + } + gw, err := config.GetGateway(ctx, runner.c) + if err != nil { + return nil, fmt.Errorf("failed to get arcadia config with %w", err) + } + + // read worker address + container := &corev1.Container{ + Name: "runner", + Image: "kubebb/arcadia-fastchat-worker:vllm-v0.0.1", + ImagePullPolicy: "IfNotPresent", + Command: []string{ + "/bin/bash", + "-c", + `echo "Run model worker..." + python3.9 -m fastchat.serve.vllm_worker --model-names $FASTCHAT_MODEL_NAME-$FASTCHAT_WORKER_NAME-$FASTCHAT_WORKER_NAMESPACE \ + --model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \ + --controller-address $FASTCHAT_CONTROLLER_ADDRESS \ + --host 0.0.0.0 --port 21002 --trust-remote-code`}, + Env: []corev1.EnvVar{ + {Name: "FASTCHAT_WORKER_NAMESPACE", Value: runner.w.Namespace}, + {Name: "FASTCHAT_WORKER_NAME", Value: runner.w.Name}, {Name: "FASTCHAT_MODEL_NAME", Value: model.Name}, {Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:21002", runner.w.Name+WokerCommonSuffix, runner.w.Namespace)}, {Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller}, diff --git a/pkg/worker/worker.go b/pkg/worker/worker.go index 0a60df7ca..c83e24457 100644 --- a/pkg/worker/worker.go +++ b/pkg/worker/worker.go @@ -180,11 +180,22 @@ func NewPodWorker(ctx context.Context, c client.Client, s *runtime.Scheme, w *ar } // init runner - r, err := NewRunnerFastchat(c, w.DeepCopy()) - if err != nil { - return nil, fmt.Errorf("failed to new a runner with %w", err) + switch w.Spec.Type { + case arcadiav1alpha1.WorkerTypeFastchatVLLM: + r, err := NewRunnerFastchatVLLM(c, w.DeepCopy()) + if err != nil { + return nil, fmt.Errorf("failed to new a runner with %w", err) + } + worker.r = r + case arcadiav1alpha1.WorkerTypeFastchatNormal: + r, err := NewRunnerFastchat(c, w.DeepCopy()) + if err != nil { + return nil, fmt.Errorf("failed to new a runner with %w", err) + } + worker.r = r + default: + return nil, fmt.Errorf("worker %s with type %s not supported in worker", w.Name, w.Spec.Type) } - worker.r = r return worker, nil }