Skip to content

Commit

Permalink
Merge pull request #199 from Lanture1064/dev
Browse files Browse the repository at this point in the history
feat: worker support VLLM
  • Loading branch information
bjwswang authored Nov 20, 2023
2 parents 0b7c8e8 + 178f8a1 commit 3004c10
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 31 deletions.
2 changes: 1 addition & 1 deletion deploy/charts/llm-worker/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.1
version: 0.0.2

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
45 changes: 45 additions & 0 deletions deploy/charts/llm-worker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,48 @@
## Requirements

- Kubernetes

## Installation

### With Helm

#### 1. Clone Repo
```shell
helm repo add arcadia https://kubeagi.github.io/arcadia
helm repo update
```

#### 2. Install FastChat

```shell
helm install [RELEASE_NAME] arcadia/llm-worker
```

## Parameters

### 1. MinIO

```yaml
- name: MINIO_ENDPOINT
value: "your_minio_endpoint"
- name: MINIO_ACCESS_KEY
value: "your_minio_access_key"
- name: MINIO_SECRET_KEY
value: "your_minio_secret_key"
- name: MINIO_MODEL_BUCKET_PATH
value: "path/to/your/minio/model"
```
### 2. FastChat
```yaml
- name: FASTCHAT_WORKER_NAME
value: "your_worker_instance_name" # default "baichuan2-7b-instance-1"
- name: FASTCHAT_WORKER_MODEL_NAME
value: "your_model_name" # default "baichuan2-7b"
- name: FASTCCHAT_WORKER_ADDRESS
value: "defined_worker_k8s_service_address:21002"
- name: FASTCCHAT_CONTROLLER_ADDRESS
value: "your_fastchat_controller_address:21001"
```
52 changes: 44 additions & 8 deletions deploy/charts/llm-worker/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ metadata:
labels:
{{- include "llm-worker.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "llm-worker.selectorLabels" . | nindent 6 }}
Expand All @@ -20,10 +19,27 @@ spec:
spec:
initContainers:
- name: get-model
image: {{ .Values.init.Repository }}
image: "{{ .Values.init.image }}:{{ .Values.init.tag | default .Chart.AppVersion }}"
env:
# TODO: could be simplified
{{- if .Values.image.env }}
{{- toYaml .Values.image.env | nindent 12 }}
{{- toYaml .Values.init.env | nindent 12}}
{{- else }}
- name: FASTCHAT_WORKER_MODEL_NAMES
value: "Baichuan2-7B-Chat"
{{- end }}
{{- if .Values.init.env }}
{{- toYaml .Values.init.env | nindent 12 }}
{{- else }}
- name: MINIO_MODEL_BUCKET_PATH
value: "oss/arcadia/model"
- name: MINIO_ENDPOINT
value: "http://10.96.241.70:9000"
- name: MINIO_ACCESS_KEY
value: "os4GDEmSZaJwweoj"
- name: MINIO_SECRET_KEY
value: "ROVfENZuyUtpRNOT4mtTQicA3CPASQ89"
{{- end }}
command:
# clone model repo to local
- "/bin/sh"
Expand All @@ -32,7 +48,7 @@ spec:
echo "Init object storage service..."
mc alias set oss $MINIO_ENDPOINT $MINIO_ACCESS_KEY $MINIO_SECRET_KEY --insecure
echo "Copy model files..."
mc --insecure cp -r oss/kubeagi/models/$FASTCHAT_WORKER_MODEL_NAMES /data/models
mc --insecure cp -r $MINIO_MODEL_BUCKET_PATH/$FASTCHAT_MODEL_NAME /data/models
ls /data/models
volumeMounts:
- mountPath: /data/models
Expand All @@ -45,18 +61,38 @@ spec:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
{{ with .Values.resources }}
resources:
limits:
nvidia.com/gpu: "1" # request 1 GPU
{{ toYaml . | nindent 12 }}
{{ end }}
env:
{{- if .Values.image.env }}
{{- toYaml .Values.image.env | nindent 12 }}
{{- else }}
- name: FASTCHAT_MODEL_NAME
value: "baichuan2-7b"
- name: FASTCCHAT_WORKER_ADDRESS
{{- if .Values.ingress.enabled }}
{{- range .Values.ingress.hosts }}
value: "{{ .host }}:21002"
{{- end }}
{{- else }}
value: "{{ .Release.Service }}.{{ .Release.Namespace }}.svc.cluster.local:21002"
{{- end }}
- name: FASTCCHAT_CONTROLLER_ADDRESS
value: "http://arcadia-fastchat.{{ .Release.Namespace }}.svc.cluster.local:21001"
{{- end }}
- name: FASTCHAT_WORKER_NAME
value: {{ .Release.Name }}
- name: FASTCHAT_WORKER_NAMESPACE
value: {{ .Release.Namespace }}
command:
- "/bin/bash"
- "-c"
- |
echo "Starting model worker..."
python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_WORKER_MODEL_NAMES \
--model-path /data/models/$FASTCHAT_WORKER_MODEL_PATH --worker-address $FASTCCHAT_WORKER_ADDRESS \
python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_MODEL_NAME-$FASTCHAT_WORKER_NAME-$FASTCHAT_WORKER_NAMESPACE \
--model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCCHAT_WORKER_ADDRESS \
--controller-address $FASTCCHAT_CONTROLLER_ADDRESS \
--host 0.0.0.0 --port 21002
ports:
Expand Down
35 changes: 19 additions & 16 deletions deploy/charts/llm-worker/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,31 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
image:
repository: kubebb/arcadia-llm-worker
repository: kubebb/arcadia-fastchat-worker
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "v0.0.1"
env:
- name: FASTCHAT_WORKER_MODEL_NAMES
value: "chatglm3-6b"
- name: FASTCHAT_WORKER_MODEL_PATH
value: "chatglm3-6b"
- name: FASTCCHAT_WORKER_ADDRESS
value: "http://fastchat-model-worker.arcadia.svc.cluster.local:21002"
- name: FASTCCHAT_CONTROLLER_ADDRESS
value: "http://arcadia-fastchat.svc.cluster.local:21001"
- name: FASTCHAT_MODEL_NAME
value: "baichuan2-7b"
- name: FASTCCHAT_WORKER_ADDRESS
value: "http://arcadia-llm-worker.arcadia.svc.cluster.local:21002"
- name: FASTCCHAT_CONTROLLER_ADDRESS
value: "http://arcadia-fastchat.arcadia.svc.cluster.local:21001"

init:
image: kubebb/minio-mc
pullPolicy: IfNotPresent
tag: "RELEASE.2023-01-28T20-29-38Z"
env:
- name: MINIO_ENDPOINT
value: "https://arcadia-minio-api.172.22.96.167.nip.io/"
- name: MINIO_ACCESS_KEY
value: "ydKiRfEhz2UTvrn8"
- name: MINIO_SECRET_KEY
value: "XoJ2X5N2BOYWXgP3BczEGiG3QG5VKOKt"
- name: MINIO_ENDPOINT
value: "http://10.96.241.70:9000"
- name: MINIO_ACCESS_KEY
value: "os4GDEmSZaJwweoj"
- name: MINIO_SECRET_KEY
value: "ROVfENZuyUtpRNOT4mtTQicA3CPASQ89"
- name: MINIO_MODEL_BUCKET_PATH
value: "oss/arcadia/model"

persistentVolume:
enabled: true
Expand Down Expand Up @@ -60,7 +60,10 @@ ingress:
# hosts:
# - chart-example.local

resources: {}
resources:
limits:
# request 1 GPU
nvidia.com/gpu: "1"

# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
Expand Down
61 changes: 59 additions & 2 deletions pkg/worker/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,32 @@ type ModelRunner interface {

var _ ModelRunner = (*RunnerFastchat)(nil)

var _ ModelRunner = (*RunnerFastchatVLLM)(nil)

type RunnerFastchat struct {
c client.Client
w *arcadiav1alpha1.Worker
}

type RunnerFastchatVLLM struct {
c client.Client
w *arcadiav1alpha1.Worker
}

func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error) {
return &RunnerFastchat{
c: c,
w: w,
}, nil
}

func NewRunnerFastchatVLLM(c client.Client, w *arcadiav1alpha1.Worker) (ModelRunner, error) {
return &RunnerFastchatVLLM{
c: c,
w: w,
}, nil
}

func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error) {
if model == nil {
return nil, errors.New("nil model")
Expand All @@ -58,17 +72,60 @@ func (runner *RunnerFastchat) Build(ctx context.Context, model *arcadiav1alpha1.
// read worker address
container := &corev1.Container{
Name: "runner",
Image: "kubebb/arcadia-llm-worker:v0.0.1",
Image: "kubebb/arcadia-fastchat-worker:v0.0.1",
ImagePullPolicy: "IfNotPresent",
Command: []string{
"/bin/bash",
"-c",
`echo "Run model worker..."
python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_MODEL_NAME \
python3.9 -m fastchat.serve.model_worker --model-names $FASTCHAT_MODEL_NAME-$FASTCHAT_WORKER_NAME-$FASTCHAT_WORKER_NAMESPACE \
--model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \
--controller-address $FASTCHAT_CONTROLLER_ADDRESS \
--host 0.0.0.0 --port 21002`},
Env: []corev1.EnvVar{
{Name: "FASTCHAT_WORKER_NAMESPACE", Value: runner.w.Namespace},
{Name: "FASTCHAT_WORKER_NAME", Value: runner.w.Name},
{Name: "FASTCHAT_MODEL_NAME", Value: model.Name},
{Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:21002", runner.w.Name+WokerCommonSuffix, runner.w.Namespace)},
{Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller},
},
Ports: []corev1.ContainerPort{
{Name: "http", ContainerPort: 21002},
},
VolumeMounts: []corev1.VolumeMount{
{Name: "models", MountPath: "/data/models"},
},
Resources: runner.w.Spec.Resources,
}

return container, nil
}

func (runner *RunnerFastchatVLLM) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error) {
if model == nil {
return nil, errors.New("nil model")
}
gw, err := config.GetGateway(ctx, runner.c)
if err != nil {
return nil, fmt.Errorf("failed to get arcadia config with %w", err)
}

// read worker address
container := &corev1.Container{
Name: "runner",
Image: "kubebb/arcadia-fastchat-worker:vllm-v0.0.1",
ImagePullPolicy: "IfNotPresent",
Command: []string{
"/bin/bash",
"-c",
`echo "Run model worker..."
python3.9 -m fastchat.serve.vllm_worker --model-names $FASTCHAT_MODEL_NAME-$FASTCHAT_WORKER_NAME-$FASTCHAT_WORKER_NAMESPACE \
--model-path /data/models/$FASTCHAT_MODEL_NAME --worker-address $FASTCHAT_WORKER_ADDRESS \
--controller-address $FASTCHAT_CONTROLLER_ADDRESS \
--host 0.0.0.0 --port 21002 --trust-remote-code`},
Env: []corev1.EnvVar{
{Name: "FASTCHAT_WORKER_NAMESPACE", Value: runner.w.Namespace},
{Name: "FASTCHAT_WORKER_NAME", Value: runner.w.Name},
{Name: "FASTCHAT_MODEL_NAME", Value: model.Name},
{Name: "FASTCHAT_WORKER_ADDRESS", Value: fmt.Sprintf("http://%s.%s.svc.cluster.local:21002", runner.w.Name+WokerCommonSuffix, runner.w.Namespace)},
{Name: "FASTCHAT_CONTROLLER_ADDRESS", Value: gw.Controller},
Expand Down
19 changes: 15 additions & 4 deletions pkg/worker/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,22 @@ func NewPodWorker(ctx context.Context, c client.Client, s *runtime.Scheme, w *ar
}

// init runner
r, err := NewRunnerFastchat(c, w.DeepCopy())
if err != nil {
return nil, fmt.Errorf("failed to new a runner with %w", err)
switch w.Spec.Type {
case arcadiav1alpha1.WorkerTypeFastchatVLLM:
r, err := NewRunnerFastchatVLLM(c, w.DeepCopy())
if err != nil {
return nil, fmt.Errorf("failed to new a runner with %w", err)
}
worker.r = r
case arcadiav1alpha1.WorkerTypeFastchatNormal:
r, err := NewRunnerFastchat(c, w.DeepCopy())
if err != nil {
return nil, fmt.Errorf("failed to new a runner with %w", err)
}
worker.r = r
default:
return nil, fmt.Errorf("worker %s with type %s not supported in worker", w.Name, w.Spec.Type)
}
worker.r = r

return worker, nil
}
Expand Down

0 comments on commit 3004c10

Please sign in to comment.