Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: transformers backend #382

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/test-docker-gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
- exllama2-gptq
- exllama2-exl2
- diffusers
- transformers
# - exllama
# - mamba
steps:
Expand Down Expand Up @@ -48,8 +49,8 @@ jobs:
- name: run test model
run: docker run --name testmodel -d --rm -p 8080:8080 --gpus all testmodel:test

- name: run test (gguf)
if: matrix.backend == 'llama-cuda'
- name: run test (${{ matrix.backend }})
if: matrix.backend == 'llama-cuda' || matrix.backend == 'transformers'
run: |
result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llama-3.1-8b-instruct",
Expand Down
2 changes: 2 additions & 0 deletions pkg/aikit2llb/inference/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ func Aikit2LLB(c *config.InferenceConfig, platform *specs.Platform) (llb.State,
merge = installMamba(state, merge)
case utils.BackendDiffusers:
merge = installDiffusers(state, merge)
case utils.BackendTransformers:
merge = installTransformers(state, merge)
}
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/aikit2llb/inference/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ func emptyImage(c *config.InferenceConfig, platform *specs.Platform) *specs.Imag
"CUDA_HOME=/usr/local/cuda",
}
img.Config.Env = append(img.Config.Env, diffusersEnv...)
case utils.BackendTransformers:
transformersEnv := []string{
"EXTERNAL_GRPC_BACKENDS=transformers:/tmp/localai/backend/python/transformers/run.sh",
"CUDA_HOME=/usr/local/cuda",
}
img.Config.Env = append(img.Config.Env, transformersEnv...)
}
}

Expand Down
18 changes: 18 additions & 0 deletions pkg/aikit2llb/inference/transformers.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package inference

import (
"github.com/moby/buildkit/client/llb"
"github.com/sozercan/aikit/pkg/utils"
)

func installTransformers(s llb.State, merge llb.State) llb.State {
savedState := s
s = s.Run(utils.Sh("apt-get install --no-install-recommends -y git python3 python3-pip python3-venv python-is-python3 make && pip install uv grpcio-tools && apt-get clean"), llb.IgnoreCache).Root()

s = cloneLocalAI(s)

s = s.Run(utils.Bashf("export BUILD_TYPE=cublas && export CUDA_MAJOR_VERSION=12 && cd /tmp/localai/backend/python/%[1]s && sed -i 's/grpcio==1.66.0/grpcio==1.66.1/g' requirements.txt && make %[1]s", utils.BackendTransformers)).Root()

diff := llb.Diff(savedState, s)
return llb.Merge([]llb.State{merge, diff})
}
4 changes: 2 additions & 2 deletions pkg/build/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,11 +448,11 @@ func validateInferenceConfig(c *config.InferenceConfig) error {
return errors.New("cannot specify both stablediffusion with exllama or exllama2 at this time")
}

if (slices.Contains(c.Backends, utils.BackendExllama) || slices.Contains(c.Backends, utils.BackendExllamaV2) || slices.Contains(c.Backends, utils.BackendMamba) || slices.Contains(c.Backends, utils.BackendDiffusers)) && c.Runtime != utils.RuntimeNVIDIA {
if (slices.Contains(c.Backends, utils.BackendExllama) || slices.Contains(c.Backends, utils.BackendExllamaV2) || slices.Contains(c.Backends, utils.BackendMamba) || slices.Contains(c.Backends, utils.BackendDiffusers) || slices.Contains(c.Backends, utils.BackendTransformers)) && c.Runtime != utils.RuntimeNVIDIA {
return errors.New("exllama, mamba, and diffusers backends only supports nvidia cuda runtime. please add 'runtime: cuda' to your aikitfile.yaml")
}

backends := []string{utils.BackendExllama, utils.BackendExllamaV2, utils.BackendStableDiffusion, utils.BackendMamba, utils.BackendDiffusers}
backends := []string{utils.BackendExllama, utils.BackendExllamaV2, utils.BackendStableDiffusion, utils.BackendMamba, utils.BackendDiffusers, utils.BackendTransformers}
for _, b := range c.Backends {
if !slices.Contains(backends, b) {
return errors.Errorf("backend %s is not supported", b)
Expand Down
1 change: 1 addition & 0 deletions pkg/utils/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const (
BackendExllamaV2 = "exllama2"
BackendMamba = "mamba"
BackendDiffusers = "diffusers"
BackendTransformers = "transformers"

TargetUnsloth = "unsloth"

Expand Down
26 changes: 26 additions & 0 deletions test/aikitfile-transformers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#syntax=aikit:test
apiVersion: v1alpha1
debug: true
runtime: cuda
backends:
- transformers
config: |
- name: llama-3.1-8b-instruct
backend: transformers
type: AutoModelForCausalLM
parameters:
model: microsoft/Phi-3.5-mini-instruct
f16: true
stopwords:
- <|user|>
- <|assistant|>
- <|end|>
template:
chat_message: |
<|{{ .RoleName }}|>
{{.Content}}<|end|>
chat: |
{{.Input}}
<|assistant|>
completion: |
{{.Input}}
Loading