From 60911284fae6147a57d12ff0eb5f5c0e44c1c03f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?= <852750+sozercan@users.noreply.github.com> Date: Fri, 1 Dec 2023 21:34:40 -0800 Subject: [PATCH] use llb.http (#10) Signed-off-by: Sertac Ozercan --- README.md | 6 +-- go.mod | 2 +- models/cuda/orca-2-13b.yaml | 1 + pkg/aikit2llb/convert.go | 75 +++++++++++++++++++++++-------------- test/aikitfile.yaml | 1 + 5 files changed, 52 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 11b5c70e..847ad127 100644 --- a/README.md +++ b/README.md @@ -6,14 +6,14 @@ AIKit is a quick, easy, and local or cloud-agnostic way to get started to host and deploy large language models (LLMs) for inference. No GPU, internet access or additional tools are needed to get started except for [Docker](https://docs.docker.com/desktop/install/linux-install/)! -AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), to send requests to open-source LLMs powered by AIKit! +AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open-source LLMs powered by AIKit! > [!NOTE] > At this time, AIKit is tested with LocalAI `llama` backend. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends. ## Features -- 🐳 No GPU, internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)! +- 🐳 No GPU, Internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)! - 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image - 🚀 Easy to use declarative configuration - ✨ OpenAI API compatible to use with any OpenAI API compatible client @@ -141,7 +141,7 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso ``` > [!TIP] -> For an example Kubernetes deployment and service YAML, see [kubernetes folder](./kubernetes/). +> For an example Kubernetes deployment and service YAML, see [kubernetes folder](./kubernetes/). Please note that these are examples, you may need to customize them (such as properly configured resource requests and limits) based on your needs. ## GPU Acceleration Support diff --git a/go.mod b/go.mod index 72149b4c..00eae887 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.21 require ( github.com/containerd/containerd v1.7.9 github.com/moby/buildkit v0.12.3 + github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.0-rc5 github.com/pkg/errors v0.9.1 github.com/sirupsen/logrus v1.9.3 @@ -36,7 +37,6 @@ require ( github.com/kr/text v0.2.0 // indirect github.com/moby/locker v1.0.1 // indirect github.com/moby/sys/signal v0.7.0 // indirect - github.com/opencontainers/go-digest v1.0.0 // indirect github.com/secure-systems-lab/go-securesystemslib v0.4.0 // indirect github.com/shibumi/go-pathspec v1.3.0 // indirect github.com/tonistiigi/fsutil v0.0.0-20230629203738-36ef4d8c0dbb // indirect diff --git a/models/cuda/orca-2-13b.yaml b/models/cuda/orca-2-13b.yaml index 8abec499..565517b9 100644 --- a/models/cuda/orca-2-13b.yaml +++ b/models/cuda/orca-2-13b.yaml @@ -1,6 +1,7 @@ #syntax=ghcr.io/sozercan/aikit:latest apiVersion: v1alpha1 debug: true +runtime: cuda models: - name: orca-2-13b source: https://huggingface.co/TheBloke/Orca-2-13B-GGUF/resolve/main/orca-2-13b.Q4_K_M.gguf diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go index 15fd38bb..865fb857 100644 --- a/pkg/aikit2llb/convert.go +++ b/pkg/aikit2llb/convert.go @@ -6,6 +6,7 @@ import ( "path" "github.com/moby/buildkit/client/llb" + "github.com/opencontainers/go-digest" specs "github.com/opencontainers/image-spec/specs-go/v1" "github.com/sozercan/aikit/pkg/aikit/config" "github.com/sozercan/aikit/pkg/utils" @@ -15,26 +16,23 @@ const ( debianSlim = "docker.io/library/debian:12-slim" distrolessBase = "gcr.io/distroless/cc-debian12:latest" localAIVersion = "v1.40.0" - retryCount = 5 cudaVersion = "12-3" ) func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) { var merge llb.State s := llb.Image(debianSlim) - s = curl(s) + s, merge = copyModels(s, c) + s, merge = addLocalAI(c, s, merge) if c.Runtime == utils.RuntimeNVIDIA { - s, merge = installCuda(s) - } else { - merge = llb.Image(distrolessBase) + s = installCuda(s, merge) } - s, merge = copyModels(s, merge, c) - s = addLocalAI(c, s, merge) imageCfg := NewImageConfig(c) return s, imageCfg } -func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb.State) { +func copyModels(s llb.State, c *config.Config) (llb.State, llb.State) { + db := llb.Image(distrolessBase) initState := s // create config file if defined @@ -43,12 +41,24 @@ func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb. } for _, model := range c.Models { - s = s.Run(llb.Shlexf("curl --retry %d --create-dirs -sSLO --output-dir /models %s", retryCount, model.Source)).Root() - // verify sha256 checksum if defined + var opts []llb.HTTPOption + opts = append(opts, llb.Filename(fileNameFromURL(model.Source))) if model.SHA256 != "" { - path := fmt.Sprintf("/models/%s", fileNameFromURL(model.Source)) - s = s.Run(shf("echo \"%s %s\" | sha256sum -c -", model.SHA256, path)).Root() + digest := digest.NewDigestFromEncoded(digest.SHA256, model.SHA256) + opts = append(opts, llb.Checksum(digest)) } + + m := llb.HTTP(model.Source, opts...) + + var copyOpts []llb.CopyOption + copyOpts = append(copyOpts, &llb.CopyInfo{ + CreateDestPath: true, + }) + s = s.File( + llb.Copy(m, fileNameFromURL(model.Source), "/models/"+fileNameFromURL(model.Source), copyOpts...), + llb.WithCustomName("Copying "+fileNameFromURL(model.Source)+" to /models"), //nolint: goconst + ) + // create prompt templates if defined for _, pt := range model.PromptTemplates { if pt.Name != "" && pt.Template != "" { @@ -57,7 +67,7 @@ func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb. } } diff := llb.Diff(initState, s) - merge = llb.Merge([]llb.State{merge, diff}) + merge := llb.Merge([]llb.State{db, diff}) return s, merge } @@ -69,29 +79,29 @@ func fileNameFromURL(urlString string) string { return path.Base(parsedURL.Path) } -func curl(s llb.State) llb.State { - i := s.Run(llb.Shlex("apt-get update"), llb.IgnoreCache).Root() - return i.Run(llb.Shlex("apt-get install curl -y")).Root() -} - -func installCuda(s llb.State) (llb.State, llb.State) { +func installCuda(s llb.State, merge llb.State) llb.State { initState := s - s = s.Run(shf("curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root() - s = s.Run(llb.Shlex("apt-get update"), llb.IgnoreCache).Root() - s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root() + cudaKeyringURL := "https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb" + cudaKeyring := llb.HTTP(cudaKeyringURL) + s = s.File( + llb.Copy(cudaKeyring, fileNameFromURL(cudaKeyringURL), "/"), + llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst + ) + s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root() + s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root() diff := llb.Diff(initState, s) - merge := llb.Merge([]llb.State{llb.Image(distrolessBase), diff}) - return s, merge + merge = llb.Merge([]llb.State{merge, diff}) + return merge } -func addLocalAI(c *config.Config, s llb.State, merge llb.State) llb.State { +func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.State) { initState := s var localAIURL string switch c.Runtime { case utils.RuntimeNVIDIA: - localAIURL = fmt.Sprintf("https://sertacstorage.blob.core.windows.net/localai/%s/local-ai", localAIVersion) + localAIURL = fmt.Sprintf("https://sertaccdn.azureedge.net/localai/%s/local-ai", localAIVersion) case utils.RuntimeCPUAVX2: localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx2-Linux-x86_64", localAIVersion) case utils.RuntimeCPUAVX512: @@ -100,10 +110,17 @@ func addLocalAI(c *config.Config, s llb.State, merge llb.State) llb.State { localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx-Linux-x86_64", localAIVersion) } - s = s.Run(llb.Shlexf("curl -Lo /usr/bin/local-ai %s", localAIURL)).Root() - s = s.Run(llb.Shlex("chmod +x /usr/bin/local-ai")).Root() + var opts []llb.HTTPOption + opts = append(opts, llb.Filename("local-ai")) + opts = append(opts, llb.Chmod(0o755)) + localAI := llb.HTTP(localAIURL, opts...) + s = s.File( + llb.Copy(localAI, "local-ai", "/usr/bin"), + llb.WithCustomName("Copying "+fileNameFromURL(localAIURL)+" to /usr/bin"), //nolint: goconst + ) + diff := llb.Diff(initState, s) - return llb.Merge([]llb.State{merge, diff}) + return s, llb.Merge([]llb.State{merge, diff}) } func shf(cmd string, v ...interface{}) llb.RunOption { diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml index d6606e3b..856ac72c 100644 --- a/test/aikitfile.yaml +++ b/test/aikitfile.yaml @@ -4,6 +4,7 @@ debug: true models: - name: llama-2-7b-chat source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf + sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" config: | - name: llama-2-7b-chat backend: llama