diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml index eb73b30c..4413db24 100644 --- a/.github/workflows/test-docker.yaml +++ b/.github/workflows/test-docker.yaml @@ -18,6 +18,12 @@ jobs: test: runs-on: ubuntu-latest timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + backend: + - llama + - stablediffusion steps: - name: Harden Runner uses: step-security/harden-runner@eb238b55efaa70779f274895e782ed17c84f2895 # v2.6.1 @@ -56,7 +62,7 @@ jobs: - name: build test model run: | docker buildx build . -t testmodel:test \ - -f test/aikitfile.yaml \ + -f test/aikitfile-${{ matrix.backend }}.yaml \ --load --progress plain \ --cache-from=type=gha,scope=testmodel \ --cache-to=type=gha,scope=testmodel,mode=max @@ -70,18 +76,36 @@ jobs: - name: install e2e dependencies run: make test-e2e-dependencies - - name: run test + - name: run llama test + if: matrix.backend == 'llama' run: | - curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "llama-2-7b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}' + curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llama-2-7b-chat", + "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] + }' + + - name: run stablediffusion test + if: matrix.backend == 'stablediffusion' + run: | + curl http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{ + "prompt": "A cute baby llama", + "size": "256x256" + }' - name: save logs if: always() - run: | - docker logs testmodel > docker.log + run: docker logs testmodel > /tmp/docker-${{ matrix.backend }}.log - - name: publish logs + - name: save generated image + if: matrix.backend == 'stablediffusion' + run: docker cp testmodel:/tmp/generated/images /tmp + + - name: publish test artifacts if: always() uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0 with: - name: docker-logs - path: docker.log + name: test-${{ matrix.backend }} + path: | + /tmp/*.log + /tmp/images/*.png + diff --git a/.github/workflows/test-kubernetes.yaml b/.github/workflows/test-kubernetes.yaml index ee4d9b72..c9ced343 100644 --- a/.github/workflows/test-kubernetes.yaml +++ b/.github/workflows/test-kubernetes.yaml @@ -58,7 +58,7 @@ jobs: - name: build test model run: | docker buildx build . -t testmodel:test \ - -f test/aikitfile.yaml \ + -f test/aikitfile-llama.yaml \ --load --progress plain \ --cache-from=type=gha,scope=testmodel \ --cache-to=type=gha,scope=testmodel,mode=max diff --git a/Makefile b/Makefile index bbe36043..fbd87699 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ KIND_VERSION ?= 0.20.0 KUBERNETES_VERSION ?= 1.28.0 TAG ?= test OUTPUT_TYPE ?= type=docker +TEST_FILE ?= test/aikitfile-llama.yaml PULL ?= NO_CACHE ?= @@ -18,7 +19,7 @@ build-aikit: .PHONY: build-test-model build-test-model: - docker buildx build . -t ${REGISTRY}/testmodel:${TAG} -f test/aikitfile.yaml --output=${OUTPUT_TYPE} + docker buildx build . -t ${REGISTRY}/testmodel:${TAG} -f ${TEST_FILE} --output=${OUTPUT_TYPE} .PHONY: run-test-model run-test-model: diff --git a/README.md b/README.md index 5b17418d..38ed3101 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ AIKit is a quick, easy, and local or cloud-agnostic way to get started to host a AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open-source LLMs powered by AIKit! > [!NOTE] -> At this time, AIKit is tested with LocalAI `llama` backend. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends. +> At this time, AIKit is tested with LocalAI `llama` and `stablediffusion` backends. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends. ## Features @@ -17,8 +17,9 @@ AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. Local - 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image - 🚀 [Easy to use declarative configuration](./docs/specs.md) - ✨ OpenAI API compatible to use with any OpenAI API compatible client -- 🖼️ [Multi-modal model support](./docs/demo.md#vision-with-llava) -- 🚢 Kubernetes deployment ready +- 📸 [Multi-modal model support](./docs/demo.md#vision-with-llava) +- 🖼️ Image generation support with Stable Diffusion +- 🚢 [Kubernetes deployment ready](#kubernetes-deployment) - 📦 Supports multiple models with a single image - 🖥️ [Supports GPU-accelerated inferencing with NVIDIA GPUs](#nvidia) - 🔐 [Signed images for `aikit` and pre-made models](./docs/cosign.md) diff --git a/docs/specs.md b/docs/specs.md index b1cf6df9..3533e0c4 100644 --- a/docs/specs.md +++ b/docs/specs.md @@ -5,7 +5,8 @@ ```yaml apiVersion: # required. only v1alpha1 is supported at the moment debug: # optional. if set to true, will print debug logs -runtime: # optional. defaults to avx. can be avx, avx2, avx512, cuda +runtime: # optional. defaults to avx. can be "avx", "avx2", "avx512", "cuda" +backends: # optional. list of additional backends. can be "stablediffusion" models: # required. list of models to build - name: # required. name of the model source: # required. source of the model. must be a url @@ -23,6 +24,8 @@ Example: apiVersion: v1alpha1 debug: true runtime: cuda +backends: + - stablediffusion models: - name: llama-2-7b-chat source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf diff --git a/examples/stablediffusion.yaml b/examples/stablediffusion.yaml new file mode 100644 index 00000000..4a7de950 --- /dev/null +++ b/examples/stablediffusion.yaml @@ -0,0 +1,50 @@ +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +debug: true +backends: + - stablediffusion +models: + - name: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" + sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" + - name: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" + sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" + - name: "stablediffusion_assets/AutoencoderKL-base-fp16.param" + sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" + - name: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" + sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" + source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-encoder-512-512-fp16.bin" + - name: "stablediffusion_assets/AutoencoderKL-fp16.bin" + sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" + source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-fp16.bin" + - name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" + sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" + source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/FrozenCLIPEmbedder-fp16.bin" + - name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" + sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" + - name: "stablediffusion_assets/log_sigmas.bin" + sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" + source: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" + - name: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" + sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" + - name: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" + sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" + - name: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" + sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" + - name: "stablediffusion_assets/UNetModel-MHA-fp16.bin" + sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" + source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/UNetModel-MHA-fp16.bin" + - name: "stablediffusion_assets/vocab.txt" + sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" +config: | + - name: stablediffusion + backend: stablediffusion + parameters: + model: stablediffusion_assets diff --git a/pkg/aikit/config/specs.go b/pkg/aikit/config/specs.go index ea9b26bf..af769fd0 100644 --- a/pkg/aikit/config/specs.go +++ b/pkg/aikit/config/specs.go @@ -14,11 +14,12 @@ func NewFromBytes(b []byte) (*Config, error) { } type Config struct { - APIVersion string `yaml:"apiVersion"` - Debug bool `yaml:"debug,omitempty"` - Runtime string `yaml:"runtime,omitempty"` - Models []Model `yaml:"models"` - Config string `yaml:"config,omitempty"` + APIVersion string `yaml:"apiVersion"` + Debug bool `yaml:"debug,omitempty"` + Runtime string `yaml:"runtime,omitempty"` + Backends []string `yaml:"backends,omitempty"` + Models []Model `yaml:"models"` + Config string `yaml:"config,omitempty"` } type Model struct { diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go index 2903e822..146d108d 100644 --- a/pkg/aikit2llb/convert.go +++ b/pkg/aikit2llb/convert.go @@ -4,6 +4,7 @@ import ( "fmt" "net/url" "path" + "strings" "github.com/moby/buildkit/client/llb" "github.com/opencontainers/go-digest" @@ -21,18 +22,39 @@ const ( func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) { var merge llb.State - s := llb.Image(debianSlim) - s, merge = copyModels(c, s) - s, merge = addLocalAI(c, s, merge) + state := llb.Image(debianSlim) + base := getBaseImage(c) + + state, merge = copyModels(c, base, state) + state, merge = addLocalAI(c, state, merge) + + // install cuda if runtime is nvidia if c.Runtime == utils.RuntimeNVIDIA { - merge = installCuda(s, merge) + merge = installCuda(state, merge) + } + + // install opencv and friends if stable diffusion backend is being used + for b := range c.Backends { + if strings.Contains(c.Backends[b], "stablediffusion") { + merge = installOpenCV(state, merge) + } } + imageCfg := NewImageConfig(c) return merge, imageCfg } -func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) { - db := llb.Image(distrolessBase) +func getBaseImage(c *config.Config) llb.State { + for b := range c.Backends { + if strings.Contains(c.Backends[b], "stablediffusion") { + // due to too many dependencies, using debian slim as base for stable diffusion + return llb.Image(debianSlim) + } + } + return llb.Image(distrolessBase) +} + +func copyModels(c *config.Config, base llb.State, s llb.State) (llb.State, llb.State) { savedState := s // create config file if defined @@ -50,13 +72,20 @@ func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) { m := llb.HTTP(model.Source, opts...) + var modelPath string + if strings.Contains(model.Name, "/") { + modelPath = "/models/" + path.Dir(model.Name) + "/" + fileNameFromURL(model.Source) + } else { + modelPath = "/models/" + fileNameFromURL(model.Source) + } + var copyOpts []llb.CopyOption copyOpts = append(copyOpts, &llb.CopyInfo{ CreateDestPath: true, }) s = s.File( - llb.Copy(m, fileNameFromURL(model.Source), "/models/"+fileNameFromURL(model.Source), copyOpts...), - llb.WithCustomName("Copying "+fileNameFromURL(model.Source)+" to /models"), //nolint: goconst + llb.Copy(m, fileNameFromURL(model.Source), modelPath, copyOpts...), + llb.WithCustomName("Copying "+fileNameFromURL(model.Source)+" to "+modelPath), //nolint: goconst ) // create prompt templates if defined @@ -67,7 +96,7 @@ func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) { } } diff := llb.Diff(savedState, s) - merge := llb.Merge([]llb.State{db, diff}) + merge := llb.Merge([]llb.State{base, diff}) return s, merge } @@ -87,6 +116,7 @@ func installCuda(s llb.State, merge llb.State) llb.State { llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst ) s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root() + // running apt-get update twice due to nvidia repo s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root() savedState := s s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root() @@ -96,6 +126,31 @@ func installCuda(s llb.State, merge llb.State) llb.State { return merge } +func installOpenCV(s llb.State, merge llb.State) llb.State { + savedState := s + // adding debian 11 (bullseye) repo due to opencv 4.5 requirement + s = s.Run(shf("echo 'deb http://deb.debian.org/debian bullseye main' | tee -a /etc/apt/sources.list")).Root() + // pinning libdap packages to bullseye version due to symbol error + s = s.Run(shf("apt-get update && mkdir -p /tmp/generated/images && apt-get install -y libopencv-imgcodecs4.5 libgomp1 libdap27=3.20.7-6 libdapclient6v5=3.20.7-6 && apt-get clean"), llb.IgnoreCache).Root() + diff := llb.Diff(savedState, s) + merge = llb.Merge([]llb.State{merge, diff}) + + sdURL := fmt.Sprintf("https://sertaccdn.azureedge.net/localai/%s/stablediffusion", localAIVersion) + var opts []llb.HTTPOption + opts = append(opts, llb.Filename("stablediffusion")) + opts = append(opts, llb.Chmod(0o755)) + var copyOpts []llb.CopyOption + copyOpts = append(copyOpts, &llb.CopyInfo{ + CreateDestPath: true, + }) + sd := llb.HTTP(sdURL, opts...) + merge = merge.File( + llb.Copy(sd, "stablediffusion", "/tmp/localai/backend_data/backend-assets/grpc/stablediffusion", copyOpts...), + llb.WithCustomName("Copying stable diffusion backend"), //nolint: goconst + ) + return merge +} + func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.State) { savedState := s var localAIURL string @@ -115,7 +170,7 @@ func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb. opts = append(opts, llb.Chmod(0o755)) localAI := llb.HTTP(localAIURL, opts...) s = s.File( - llb.Copy(localAI, "local-ai", "/usr/bin"), + llb.Copy(localAI, "local-ai", "/usr/bin/local-ai"), llb.WithCustomName("Copying "+fileNameFromURL(localAIURL)+" to /usr/bin"), //nolint: goconst ) diff --git a/test/aikitfile.yaml b/test/aikitfile-llama.yaml similarity index 100% rename from test/aikitfile.yaml rename to test/aikitfile-llama.yaml diff --git a/test/aikitfile-stablediffusion.yaml b/test/aikitfile-stablediffusion.yaml new file mode 100644 index 00000000..e296818d --- /dev/null +++ b/test/aikitfile-stablediffusion.yaml @@ -0,0 +1,50 @@ +#syntax=aikit:test +apiVersion: v1alpha1 +debug: true +backends: + - stablediffusion +models: + - name: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param" + sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param" + - name: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param" + sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param" + - name: "stablediffusion_assets/AutoencoderKL-base-fp16.param" + sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param" + - name: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin" + sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa" + source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-encoder-512-512-fp16.bin" + - name: "stablediffusion_assets/AutoencoderKL-fp16.bin" + sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd" + source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-fp16.bin" + - name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin" + sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6" + source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/FrozenCLIPEmbedder-fp16.bin" + - name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param" + sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param" + - name: "stablediffusion_assets/log_sigmas.bin" + sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac" + source: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin" + - name: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param" + sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param" + - name: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param" + sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param" + - name: "stablediffusion_assets/UNetModel-base-MHA-fp16.param" + sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param" + - name: "stablediffusion_assets/UNetModel-MHA-fp16.bin" + sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3" + source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/UNetModel-MHA-fp16.bin" + - name: "stablediffusion_assets/vocab.txt" + sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d" + source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt" +config: | + - name: stablediffusion + backend: stablediffusion + parameters: + model: stablediffusion_assets