Skip to content

Commit

Permalink
feat: add stable diffusion support (#34)
Browse files Browse the repository at this point in the history
Signed-off-by: Sertac Ozercan <[email protected]>
  • Loading branch information
sozercan authored Dec 18, 2023
1 parent 8247603 commit 0cd6d6c
Show file tree
Hide file tree
Showing 10 changed files with 214 additions and 29 deletions.
40 changes: 32 additions & 8 deletions .github/workflows/test-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 240
strategy:
fail-fast: false
matrix:
backend:
- llama
- stablediffusion
steps:
- name: Harden Runner
uses: step-security/harden-runner@eb238b55efaa70779f274895e782ed17c84f2895 # v2.6.1
Expand Down Expand Up @@ -56,7 +62,7 @@ jobs:
- name: build test model
run: |
docker buildx build . -t testmodel:test \
-f test/aikitfile.yaml \
-f test/aikitfile-${{ matrix.backend }}.yaml \
--load --progress plain \
--cache-from=type=gha,scope=testmodel \
--cache-to=type=gha,scope=testmodel,mode=max
Expand All @@ -70,18 +76,36 @@ jobs:
- name: install e2e dependencies
run: make test-e2e-dependencies

- name: run test
- name: run llama test
if: matrix.backend == 'llama'
run: |
curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "llama-2-7b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llama-2-7b-chat",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}'
- name: run stablediffusion test
if: matrix.backend == 'stablediffusion'
run: |
curl http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
"prompt": "A cute baby llama",
"size": "256x256"
}'
- name: save logs
if: always()
run: |
docker logs testmodel > docker.log
run: docker logs testmodel > /tmp/docker-${{ matrix.backend }}.log

- name: publish logs
- name: save generated image
if: matrix.backend == 'stablediffusion'
run: docker cp testmodel:/tmp/generated/images /tmp

- name: publish test artifacts
if: always()
uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0
with:
name: docker-logs
path: docker.log
name: test-${{ matrix.backend }}
path: |
/tmp/*.log
/tmp/images/*.png
2 changes: 1 addition & 1 deletion .github/workflows/test-kubernetes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
- name: build test model
run: |
docker buildx build . -t testmodel:test \
-f test/aikitfile.yaml \
-f test/aikitfile-llama.yaml \
--load --progress plain \
--cache-from=type=gha,scope=testmodel \
--cache-to=type=gha,scope=testmodel,mode=max
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ KIND_VERSION ?= 0.20.0
KUBERNETES_VERSION ?= 1.28.0
TAG ?= test
OUTPUT_TYPE ?= type=docker
TEST_FILE ?= test/aikitfile-llama.yaml
PULL ?=
NO_CACHE ?=

Expand All @@ -18,7 +19,7 @@ build-aikit:

.PHONY: build-test-model
build-test-model:
docker buildx build . -t ${REGISTRY}/testmodel:${TAG} -f test/aikitfile.yaml --output=${OUTPUT_TYPE}
docker buildx build . -t ${REGISTRY}/testmodel:${TAG} -f ${TEST_FILE} --output=${OUTPUT_TYPE}

.PHONY: run-test-model
run-test-model:
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@ AIKit is a quick, easy, and local or cloud-agnostic way to get started to host a
AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open-source LLMs powered by AIKit!

> [!NOTE]
> At this time, AIKit is tested with LocalAI `llama` backend. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends.
> At this time, AIKit is tested with LocalAI `llama` and `stablediffusion` backends. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends.
## Features

- 🐳 No GPU, Internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
- 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image
- 🚀 [Easy to use declarative configuration](./docs/specs.md)
- ✨ OpenAI API compatible to use with any OpenAI API compatible client
- 🖼️ [Multi-modal model support](./docs/demo.md#vision-with-llava)
- 🚢 Kubernetes deployment ready
- 📸 [Multi-modal model support](./docs/demo.md#vision-with-llava)
- 🖼️ Image generation support with Stable Diffusion
- 🚢 [Kubernetes deployment ready](#kubernetes-deployment)
- 📦 Supports multiple models with a single image
- 🖥️ [Supports GPU-accelerated inferencing with NVIDIA GPUs](#nvidia)
- 🔐 [Signed images for `aikit` and pre-made models](./docs/cosign.md)
Expand Down
5 changes: 4 additions & 1 deletion docs/specs.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
```yaml
apiVersion: # required. only v1alpha1 is supported at the moment
debug: # optional. if set to true, will print debug logs
runtime: # optional. defaults to avx. can be avx, avx2, avx512, cuda
runtime: # optional. defaults to avx. can be "avx", "avx2", "avx512", "cuda"
backends: # optional. list of additional backends. can be "stablediffusion"
models: # required. list of models to build
- name: # required. name of the model
source: # required. source of the model. must be a url
Expand All @@ -23,6 +24,8 @@ Example:
apiVersion: v1alpha1
debug: true
runtime: cuda
backends:
- stablediffusion
models:
- name: llama-2-7b-chat
source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
Expand Down
50 changes: 50 additions & 0 deletions examples/stablediffusion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#syntax=ghcr.io/sozercan/aikit:latest
apiVersion: v1alpha1
debug: true
backends:
- stablediffusion
models:
- name: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
- name: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
- name: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
- name: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-encoder-512-512-fp16.bin"
- name: "stablediffusion_assets/AutoencoderKL-fp16.bin"
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-fp16.bin"
- name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/FrozenCLIPEmbedder-fp16.bin"
- name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
- name: "stablediffusion_assets/log_sigmas.bin"
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
source: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
- name: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
- name: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
- name: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
- name: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/UNetModel-MHA-fp16.bin"
- name: "stablediffusion_assets/vocab.txt"
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
config: |
- name: stablediffusion
backend: stablediffusion
parameters:
model: stablediffusion_assets
11 changes: 6 additions & 5 deletions pkg/aikit/config/specs.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@ func NewFromBytes(b []byte) (*Config, error) {
}

type Config struct {
APIVersion string `yaml:"apiVersion"`
Debug bool `yaml:"debug,omitempty"`
Runtime string `yaml:"runtime,omitempty"`
Models []Model `yaml:"models"`
Config string `yaml:"config,omitempty"`
APIVersion string `yaml:"apiVersion"`
Debug bool `yaml:"debug,omitempty"`
Runtime string `yaml:"runtime,omitempty"`
Backends []string `yaml:"backends,omitempty"`
Models []Model `yaml:"models"`
Config string `yaml:"config,omitempty"`
}

type Model struct {
Expand Down
75 changes: 65 additions & 10 deletions pkg/aikit2llb/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"net/url"
"path"
"strings"

"github.com/moby/buildkit/client/llb"
"github.com/opencontainers/go-digest"
Expand All @@ -21,18 +22,39 @@ const (

func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
var merge llb.State
s := llb.Image(debianSlim)
s, merge = copyModels(c, s)
s, merge = addLocalAI(c, s, merge)
state := llb.Image(debianSlim)
base := getBaseImage(c)

state, merge = copyModels(c, base, state)
state, merge = addLocalAI(c, state, merge)

// install cuda if runtime is nvidia
if c.Runtime == utils.RuntimeNVIDIA {
merge = installCuda(s, merge)
merge = installCuda(state, merge)
}

// install opencv and friends if stable diffusion backend is being used
for b := range c.Backends {
if strings.Contains(c.Backends[b], "stablediffusion") {
merge = installOpenCV(state, merge)
}
}

imageCfg := NewImageConfig(c)
return merge, imageCfg
}

func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
db := llb.Image(distrolessBase)
func getBaseImage(c *config.Config) llb.State {
for b := range c.Backends {
if strings.Contains(c.Backends[b], "stablediffusion") {
// due to too many dependencies, using debian slim as base for stable diffusion
return llb.Image(debianSlim)
}
}
return llb.Image(distrolessBase)
}

func copyModels(c *config.Config, base llb.State, s llb.State) (llb.State, llb.State) {
savedState := s

// create config file if defined
Expand All @@ -50,13 +72,20 @@ func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {

m := llb.HTTP(model.Source, opts...)

var modelPath string
if strings.Contains(model.Name, "/") {
modelPath = "/models/" + path.Dir(model.Name) + "/" + fileNameFromURL(model.Source)
} else {
modelPath = "/models/" + fileNameFromURL(model.Source)
}

var copyOpts []llb.CopyOption
copyOpts = append(copyOpts, &llb.CopyInfo{
CreateDestPath: true,
})
s = s.File(
llb.Copy(m, fileNameFromURL(model.Source), "/models/"+fileNameFromURL(model.Source), copyOpts...),
llb.WithCustomName("Copying "+fileNameFromURL(model.Source)+" to /models"), //nolint: goconst
llb.Copy(m, fileNameFromURL(model.Source), modelPath, copyOpts...),
llb.WithCustomName("Copying "+fileNameFromURL(model.Source)+" to "+modelPath), //nolint: goconst
)

// create prompt templates if defined
Expand All @@ -67,7 +96,7 @@ func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
}
}
diff := llb.Diff(savedState, s)
merge := llb.Merge([]llb.State{db, diff})
merge := llb.Merge([]llb.State{base, diff})
return s, merge
}

Expand All @@ -87,6 +116,7 @@ func installCuda(s llb.State, merge llb.State) llb.State {
llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst
)
s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
// running apt-get update twice due to nvidia repo
s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root()
savedState := s
s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root()
Expand All @@ -96,6 +126,31 @@ func installCuda(s llb.State, merge llb.State) llb.State {
return merge
}

func installOpenCV(s llb.State, merge llb.State) llb.State {
savedState := s
// adding debian 11 (bullseye) repo due to opencv 4.5 requirement
s = s.Run(shf("echo 'deb http://deb.debian.org/debian bullseye main' | tee -a /etc/apt/sources.list")).Root()
// pinning libdap packages to bullseye version due to symbol error
s = s.Run(shf("apt-get update && mkdir -p /tmp/generated/images && apt-get install -y libopencv-imgcodecs4.5 libgomp1 libdap27=3.20.7-6 libdapclient6v5=3.20.7-6 && apt-get clean"), llb.IgnoreCache).Root()
diff := llb.Diff(savedState, s)
merge = llb.Merge([]llb.State{merge, diff})

sdURL := fmt.Sprintf("https://sertaccdn.azureedge.net/localai/%s/stablediffusion", localAIVersion)
var opts []llb.HTTPOption
opts = append(opts, llb.Filename("stablediffusion"))
opts = append(opts, llb.Chmod(0o755))
var copyOpts []llb.CopyOption
copyOpts = append(copyOpts, &llb.CopyInfo{
CreateDestPath: true,
})
sd := llb.HTTP(sdURL, opts...)
merge = merge.File(
llb.Copy(sd, "stablediffusion", "/tmp/localai/backend_data/backend-assets/grpc/stablediffusion", copyOpts...),
llb.WithCustomName("Copying stable diffusion backend"), //nolint: goconst
)
return merge
}

func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.State) {
savedState := s
var localAIURL string
Expand All @@ -115,7 +170,7 @@ func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.
opts = append(opts, llb.Chmod(0o755))
localAI := llb.HTTP(localAIURL, opts...)
s = s.File(
llb.Copy(localAI, "local-ai", "/usr/bin"),
llb.Copy(localAI, "local-ai", "/usr/bin/local-ai"),
llb.WithCustomName("Copying "+fileNameFromURL(localAIURL)+" to /usr/bin"), //nolint: goconst
)

Expand Down
File renamed without changes.
50 changes: 50 additions & 0 deletions test/aikitfile-stablediffusion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#syntax=aikit:test
apiVersion: v1alpha1
debug: true
backends:
- stablediffusion
models:
- name: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
- name: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
- name: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
- name: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-encoder-512-512-fp16.bin"
- name: "stablediffusion_assets/AutoencoderKL-fp16.bin"
sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-fp16.bin"
- name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/FrozenCLIPEmbedder-fp16.bin"
- name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
- name: "stablediffusion_assets/log_sigmas.bin"
sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
source: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
- name: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
- name: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
- name: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
- name: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/UNetModel-MHA-fp16.bin"
- name: "stablediffusion_assets/vocab.txt"
sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
config: |
- name: stablediffusion
backend: stablediffusion
parameters:
model: stablediffusion_assets

0 comments on commit 0cd6d6c

Please sign in to comment.