sozercan · sozercan · Dec 18, 2023 · Dec 11, 2023 · Dec 13, 2023 · Dec 13, 2023
diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
@@ -18,6 +18,12 @@ jobs:
   test:
     runs-on: ubuntu-latest
     timeout-minutes: 240
+    strategy:
+      fail-fast: false
+      matrix:
+        backend:
+          - llama
+          - stablediffusion
     steps:
       - name: Harden Runner
         uses: step-security/harden-runner@eb238b55efaa70779f274895e782ed17c84f2895 # v2.6.1
@@ -56,7 +62,7 @@ jobs:
       - name: build test model
         run: |
           docker buildx build . -t testmodel:test \
-            -f test/aikitfile.yaml \
+            -f test/aikitfile-${{ matrix.backend }}.yaml \
             --load --progress plain \
             --cache-from=type=gha,scope=testmodel \
             --cache-to=type=gha,scope=testmodel,mode=max
@@ -70,18 +76,36 @@ jobs:
       - name: install e2e dependencies
         run: make test-e2e-dependencies
 
-      - name: run test
+      - name: run llama test
+        if: matrix.backend == 'llama'
         run: |
-          curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "llama-2-7b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
+          curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "llama-2-7b-chat",
+            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
+          }'
+
+      - name: run stablediffusion test
+        if: matrix.backend == 'stablediffusion'
+        run: |
+          curl http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
+            "prompt": "A cute baby llama",
+            "size": "256x256"
+          }'
 
       - name: save logs
         if: always()
-        run: |
-          docker logs testmodel > docker.log
+        run: docker logs testmodel > /tmp/docker-${{ matrix.backend }}.log
 
-      - name: publish logs
+      - name: save generated image
+        if: matrix.backend == 'stablediffusion'
+        run: docker cp testmodel:/tmp/generated/images /tmp
+
+      - name: publish test artifacts
         if: always()
         uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 # v4.0.0
         with:
-          name: docker-logs
-          path: docker.log
+          name: test-${{ matrix.backend }}
+          path: |
+            /tmp/*.log
+            /tmp/images/*.png
+
diff --git a/.github/workflows/test-kubernetes.yaml b/.github/workflows/test-kubernetes.yaml
@@ -58,7 +58,7 @@ jobs:
       - name: build test model
         run: |
           docker buildx build . -t testmodel:test \
-            -f test/aikitfile.yaml \
+            -f test/aikitfile-llama.yaml \
             --load --progress plain \
             --cache-from=type=gha,scope=testmodel \
             --cache-to=type=gha,scope=testmodel,mode=max

diff --git a/Makefile b/Makefile
@@ -5,6 +5,7 @@ KIND_VERSION ?= 0.20.0
 KUBERNETES_VERSION ?= 1.28.0
 TAG ?= test
 OUTPUT_TYPE ?= type=docker
+TEST_FILE ?= test/aikitfile-llama.yaml
 PULL ?=
 NO_CACHE ?=
 
@@ -18,7 +19,7 @@ build-aikit:
 
 .PHONY: build-test-model
 build-test-model:
-	docker buildx build . -t ${REGISTRY}/testmodel:${TAG} -f test/aikitfile.yaml --output=${OUTPUT_TYPE}
+	docker buildx build . -t ${REGISTRY}/testmodel:${TAG} -f ${TEST_FILE} --output=${OUTPUT_TYPE}
 
 .PHONY: run-test-model
 run-test-model:

diff --git a/README.md b/README.md
@@ -9,16 +9,17 @@ AIKit is a quick, easy, and local or cloud-agnostic way to get started to host a
 AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open-source LLMs powered by AIKit!
 
 > [!NOTE]
-> At this time, AIKit is tested with LocalAI `llama` backend. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends.
+> At this time, AIKit is tested with LocalAI `llama` and `stablediffusion` backends. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends.
 
 ## Features
 
 - 🐳 No GPU, Internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
 - 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image
 - 🚀 [Easy to use declarative configuration](./docs/specs.md)
 - ✨ OpenAI API compatible to use with any OpenAI API compatible client
-- 🖼️ [Multi-modal model support](./docs/demo.md#vision-with-llava)
-- 🚢 Kubernetes deployment ready
+- 📸 [Multi-modal model support](./docs/demo.md#vision-with-llava)
+- 🖼️ Image generation support with Stable Diffusion
+- 🚢 [Kubernetes deployment ready](#kubernetes-deployment)
 - 📦 Supports multiple models with a single image
 - 🖥️ [Supports GPU-accelerated inferencing with NVIDIA GPUs](#nvidia)
 - 🔐 [Signed images for `aikit` and pre-made models](./docs/cosign.md)

diff --git a/docs/specs.md b/docs/specs.md
@@ -5,7 +5,8 @@
 ```yaml
 apiVersion: # required. only v1alpha1 is supported at the moment
 debug: # optional. if set to true, will print debug logs
-runtime: # optional. defaults to avx. can be avx, avx2, avx512, cuda
+runtime: # optional. defaults to avx. can be "avx", "avx2", "avx512", "cuda"
+backends: # optional. list of additional backends. can be "stablediffusion"
 models: # required. list of models to build
   - name: # required. name of the model
     source: # required. source of the model. must be a url
@@ -23,6 +24,8 @@ Example:
 apiVersion: v1alpha1
 debug: true
 runtime: cuda
+backends:
+  - stablediffusion
 models:
   - name: llama-2-7b-chat
     source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf

diff --git a/examples/stablediffusion.yaml b/examples/stablediffusion.yaml
@@ -0,0 +1,50 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+backends:
+  - stablediffusion
+models:
+  - name: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
+    sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
+  - name: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
+    sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
+  - name: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
+    sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
+  - name: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
+    sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-encoder-512-512-fp16.bin"
+  - name: "stablediffusion_assets/AutoencoderKL-fp16.bin"
+    sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-fp16.bin"
+  - name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
+    sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/FrozenCLIPEmbedder-fp16.bin"
+  - name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
+    sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
+  - name: "stablediffusion_assets/log_sigmas.bin"
+    sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
+    source: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
+  - name: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
+    sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
+  - name: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
+    sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
+  - name: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
+    sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
+  - name: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
+    sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/UNetModel-MHA-fp16.bin"
+  - name: "stablediffusion_assets/vocab.txt"
+    sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
+config: |
+  - name: stablediffusion
+    backend: stablediffusion
+    parameters:
+      model: stablediffusion_assets
diff --git a/pkg/aikit/config/specs.go b/pkg/aikit/config/specs.go
@@ -14,11 +14,12 @@ func NewFromBytes(b []byte) (*Config, error) {
 }
 
 type Config struct {
-	APIVersion string  `yaml:"apiVersion"`
-	Debug      bool    `yaml:"debug,omitempty"`
-	Runtime    string  `yaml:"runtime,omitempty"`
-	Models     []Model `yaml:"models"`
-	Config     string  `yaml:"config,omitempty"`
+	APIVersion string   `yaml:"apiVersion"`
+	Debug      bool     `yaml:"debug,omitempty"`
+	Runtime    string   `yaml:"runtime,omitempty"`
+	Backends   []string `yaml:"backends,omitempty"`
+	Models     []Model  `yaml:"models"`
+	Config     string   `yaml:"config,omitempty"`
 }
 
 type Model struct {

diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"net/url"
 	"path"
+	"strings"
 
 	"github.com/moby/buildkit/client/llb"
 	"github.com/opencontainers/go-digest"
@@ -21,18 +22,39 @@ const (
 
 func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
 	var merge llb.State
-	s := llb.Image(debianSlim)
-	s, merge = copyModels(c, s)
-	s, merge = addLocalAI(c, s, merge)
+	state := llb.Image(debianSlim)
+	base := getBaseImage(c)
+
+	state, merge = copyModels(c, base, state)
+	state, merge = addLocalAI(c, state, merge)
+
+	// install cuda if runtime is nvidia
 	if c.Runtime == utils.RuntimeNVIDIA {
-		merge = installCuda(s, merge)
+		merge = installCuda(state, merge)
+	}
+
+	// install opencv and friends if stable diffusion backend is being used
+	for b := range c.Backends {
+		if strings.Contains(c.Backends[b], "stablediffusion") {
+			merge = installOpenCV(state, merge)
+		}
 	}
+
 	imageCfg := NewImageConfig(c)
 	return merge, imageCfg
 }
 
-func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
-	db := llb.Image(distrolessBase)
+func getBaseImage(c *config.Config) llb.State {
+	for b := range c.Backends {
+		if strings.Contains(c.Backends[b], "stablediffusion") {
+			// due to too many dependencies, using debian slim as base for stable diffusion
+			return llb.Image(debianSlim)
+		}
+	}
+	return llb.Image(distrolessBase)
+}
+
+func copyModels(c *config.Config, base llb.State, s llb.State) (llb.State, llb.State) {
 	savedState := s
 
 	// create config file if defined
@@ -50,13 +72,20 @@ func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
 
 		m := llb.HTTP(model.Source, opts...)
 
+		var modelPath string
+		if strings.Contains(model.Name, "/") {
+			modelPath = "/models/" + path.Dir(model.Name) + "/" + fileNameFromURL(model.Source)
+		} else {
+			modelPath = "/models/" + fileNameFromURL(model.Source)
+		}
+
 		var copyOpts []llb.CopyOption
 		copyOpts = append(copyOpts, &llb.CopyInfo{
 			CreateDestPath: true,
 		})
 		s = s.File(
-			llb.Copy(m, fileNameFromURL(model.Source), "/models/"+fileNameFromURL(model.Source), copyOpts...),
-			llb.WithCustomName("Copying "+fileNameFromURL(model.Source)+" to /models"), //nolint: goconst
+			llb.Copy(m, fileNameFromURL(model.Source), modelPath, copyOpts...),
+			llb.WithCustomName("Copying "+fileNameFromURL(model.Source)+" to "+modelPath), //nolint: goconst
 		)
 
 		// create prompt templates if defined
@@ -67,7 +96,7 @@ func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
 		}
 	}
 	diff := llb.Diff(savedState, s)
-	merge := llb.Merge([]llb.State{db, diff})
+	merge := llb.Merge([]llb.State{base, diff})
 	return s, merge
 }
 
@@ -87,6 +116,7 @@ func installCuda(s llb.State, merge llb.State) llb.State {
 		llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst
 	)
 	s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
+	// running apt-get update twice due to nvidia repo
 	s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root()
 	savedState := s
 	s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root()
@@ -96,6 +126,31 @@ func installCuda(s llb.State, merge llb.State) llb.State {
 	return merge
 }
 
+func installOpenCV(s llb.State, merge llb.State) llb.State {
+	savedState := s
+	// adding debian 11 (bullseye) repo due to opencv 4.5 requirement
+	s = s.Run(shf("echo 'deb http://deb.debian.org/debian bullseye main' | tee -a /etc/apt/sources.list")).Root()
+	// pinning libdap packages to bullseye version due to symbol error
+	s = s.Run(shf("apt-get update && mkdir -p /tmp/generated/images && apt-get install -y libopencv-imgcodecs4.5 libgomp1 libdap27=3.20.7-6 libdapclient6v5=3.20.7-6 && apt-get clean"), llb.IgnoreCache).Root()
+	diff := llb.Diff(savedState, s)
+	merge = llb.Merge([]llb.State{merge, diff})
+
+	sdURL := fmt.Sprintf("https://sertaccdn.azureedge.net/localai/%s/stablediffusion", localAIVersion)
+	var opts []llb.HTTPOption
+	opts = append(opts, llb.Filename("stablediffusion"))
+	opts = append(opts, llb.Chmod(0o755))
+	var copyOpts []llb.CopyOption
+	copyOpts = append(copyOpts, &llb.CopyInfo{
+		CreateDestPath: true,
+	})
+	sd := llb.HTTP(sdURL, opts...)
+	merge = merge.File(
+		llb.Copy(sd, "stablediffusion", "/tmp/localai/backend_data/backend-assets/grpc/stablediffusion", copyOpts...),
+		llb.WithCustomName("Copying stable diffusion backend"), //nolint: goconst
+	)
+	return merge
+}
+
 func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.State) {
 	savedState := s
 	var localAIURL string
@@ -115,7 +170,7 @@ func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.
 	opts = append(opts, llb.Chmod(0o755))
 	localAI := llb.HTTP(localAIURL, opts...)
 	s = s.File(
-		llb.Copy(localAI, "local-ai", "/usr/bin"),
+		llb.Copy(localAI, "local-ai", "/usr/bin/local-ai"),
 		llb.WithCustomName("Copying "+fileNameFromURL(localAIURL)+" to /usr/bin"), //nolint: goconst
 	)
 

diff --git a/test/aikitfile.yaml → test/aikitfile-llama.yaml b/test/aikitfile.yaml → test/aikitfile-llama.yaml
diff --git a/test/aikitfile-stablediffusion.yaml b/test/aikitfile-stablediffusion.yaml
@@ -0,0 +1,50 @@
+#syntax=aikit:test
+apiVersion: v1alpha1
+debug: true
+backends:
+  - stablediffusion
+models:
+  - name: "stablediffusion_assets/AutoencoderKL-256-256-fp16-opt.param"
+    sha256: "18ca4b66685e21406bcf64c484b3b680b4949900415536d599cc876579c85c82"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-256-256-fp16-opt.param"
+  - name: "stablediffusion_assets/AutoencoderKL-512-512-fp16-opt.param"
+    sha256: "cf45f63aacf3dbbab0f59ed92a6f2c14d9a1801314631cd3abe91e3c85639a20"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-512-512-fp16-opt.param"
+  - name: "stablediffusion_assets/AutoencoderKL-base-fp16.param"
+    sha256: "0254a056dce61b0c27dc9ec1b78b53bcf55315c540f55f051eb841aa992701ba"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/AutoencoderKL-base-fp16.param"
+  - name: "stablediffusion_assets/AutoencoderKL-encoder-512-512-fp16.bin"
+    sha256: "ddcb79a9951b9f91e05e087739ed69da2c1c4ae30ba4168cce350b49d617c9fa"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-encoder-512-512-fp16.bin"
+  - name: "stablediffusion_assets/AutoencoderKL-fp16.bin"
+    sha256: "f02e71f80e70252734724bbfaed5c4ddd3a8ed7e61bb2175ff5f53099f0e35dd"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/AutoencoderKL-fp16.bin"
+  - name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.bin"
+    sha256: "1c9a12f4e1dd1b295a388045f7f28a2352a4d70c3dc96a542189a3dd7051fdd6"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/FrozenCLIPEmbedder-fp16.bin"
+  - name: "stablediffusion_assets/FrozenCLIPEmbedder-fp16.param"
+    sha256: "471afbe678dd1fd3fe764ef9c6eccaccb0a7d7e601f27b462aa926b20eb368c9"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/FrozenCLIPEmbedder-fp16.param"
+  - name: "stablediffusion_assets/log_sigmas.bin"
+    sha256: "a2089f8aa4c61f9c200feaec541ab3f5c94233b28deb6d5e8bcd974fa79b68ac"
+    source: "https://github.com/EdVince/Stable-Diffusion-NCNN/raw/main/x86/linux/assets/log_sigmas.bin"
+  - name: "stablediffusion_assets/UNetModel-256-256-MHA-fp16-opt.param"
+    sha256: "a58c380229f09491776df837b7aa7adffc0a87821dc4708b34535da2e36e3da1"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-256-256-MHA-fp16-opt.param"
+  - name: "stablediffusion_assets/UNetModel-512-512-MHA-fp16-opt.param"
+    sha256: "f12034067062827bd7f43d1d21888d1f03905401acf6c6eea22be23c259636fa"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-512-512-MHA-fp16-opt.param"
+  - name: "stablediffusion_assets/UNetModel-base-MHA-fp16.param"
+    sha256: "696f6975de49f4325b53ce32aff81861a6d6c07cd9ce3f0aae2cc405350af38d"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/UNetModel-base-MHA-fp16.param"
+  - name: "stablediffusion_assets/UNetModel-MHA-fp16.bin"
+    sha256: "d618918d011bfc1f644c0f2a33bf84931bd53b28a98492b0a8ed6f3a818852c3"
+    source: "https://github.com/lenaxia/stablediffusion-bins/releases/download/2023.05.24/UNetModel-MHA-fp16.bin"
+  - name: "stablediffusion_assets/vocab.txt"
+    sha256: "e30e57b6f1e47616982ef898d8922be24e535b4fa3d0110477b3a6f02ebbae7d"
+    source: "https://raw.githubusercontent.com/EdVince/Stable-Diffusion-NCNN/main/x86/linux/assets/vocab.txt"
+config: |
+  - name: stablediffusion
+    backend: stablediffusion
+    parameters:
+      model: stablediffusion_assets