From 60911284fae6147a57d12ff0eb5f5c0e44c1c03f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?=
 <852750+sozercan@users.noreply.github.com>
Date: Fri, 1 Dec 2023 21:34:40 -0800
Subject: [PATCH] use llb.http (#10)

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 README.md                   |  6 +--
 go.mod                      |  2 +-
 models/cuda/orca-2-13b.yaml |  1 +
 pkg/aikit2llb/convert.go    | 75 +++++++++++++++++++++++--------------
 test/aikitfile.yaml         |  1 +
 5 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/README.md b/README.md
index 11b5c70e..847ad127 100644
--- a/README.md
+++ b/README.md
@@ -6,14 +6,14 @@
 
 AIKit is a quick, easy, and local or cloud-agnostic way to get started to host and deploy large language models (LLMs) for inference. No GPU, internet access or additional tools are needed to get started except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
 
-AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), to send requests to open-source LLMs powered by AIKit!
+AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open-source LLMs powered by AIKit!
 
 > [!NOTE]
 > At this time, AIKit is tested with LocalAI `llama` backend. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends.
 
 ## Features
 
-- 🐳 No GPU, internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
+- 🐳 No GPU, Internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
 - 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image
 - 🚀 Easy to use declarative configuration
 - ✨ OpenAI API compatible to use with any OpenAI API compatible client
@@ -141,7 +141,7 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
 ```
 
 > [!TIP]
-> For an example Kubernetes deployment and service YAML, see [kubernetes folder](./kubernetes/).
+> For an example Kubernetes deployment and service YAML, see [kubernetes folder](./kubernetes/). Please note that these are examples, you may need to customize them (such as properly configured resource requests and limits) based on your needs.
 
 ## GPU Acceleration Support
 
diff --git a/go.mod b/go.mod
index 72149b4c..00eae887 100644
--- a/go.mod
+++ b/go.mod
@@ -5,6 +5,7 @@ go 1.21
 require (
 	github.com/containerd/containerd v1.7.9
 	github.com/moby/buildkit v0.12.3
+	github.com/opencontainers/go-digest v1.0.0
 	github.com/opencontainers/image-spec v1.1.0-rc5
 	github.com/pkg/errors v0.9.1
 	github.com/sirupsen/logrus v1.9.3
@@ -36,7 +37,6 @@ require (
 	github.com/kr/text v0.2.0 // indirect
 	github.com/moby/locker v1.0.1 // indirect
 	github.com/moby/sys/signal v0.7.0 // indirect
-	github.com/opencontainers/go-digest v1.0.0 // indirect
 	github.com/secure-systems-lab/go-securesystemslib v0.4.0 // indirect
 	github.com/shibumi/go-pathspec v1.3.0 // indirect
 	github.com/tonistiigi/fsutil v0.0.0-20230629203738-36ef4d8c0dbb // indirect
diff --git a/models/cuda/orca-2-13b.yaml b/models/cuda/orca-2-13b.yaml
index 8abec499..565517b9 100644
--- a/models/cuda/orca-2-13b.yaml
+++ b/models/cuda/orca-2-13b.yaml
@@ -1,6 +1,7 @@
 #syntax=ghcr.io/sozercan/aikit:latest
 apiVersion: v1alpha1
 debug: true
+runtime: cuda
 models:
   - name: orca-2-13b
     source: https://huggingface.co/TheBloke/Orca-2-13B-GGUF/resolve/main/orca-2-13b.Q4_K_M.gguf
diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go
index 15fd38bb..865fb857 100644
--- a/pkg/aikit2llb/convert.go
+++ b/pkg/aikit2llb/convert.go
@@ -6,6 +6,7 @@ import (
 	"path"
 
 	"github.com/moby/buildkit/client/llb"
+	"github.com/opencontainers/go-digest"
 	specs "github.com/opencontainers/image-spec/specs-go/v1"
 	"github.com/sozercan/aikit/pkg/aikit/config"
 	"github.com/sozercan/aikit/pkg/utils"
@@ -15,26 +16,23 @@ const (
 	debianSlim     = "docker.io/library/debian:12-slim"
 	distrolessBase = "gcr.io/distroless/cc-debian12:latest"
 	localAIVersion = "v1.40.0"
-	retryCount     = 5
 	cudaVersion    = "12-3"
 )
 
 func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
 	var merge llb.State
 	s := llb.Image(debianSlim)
-	s = curl(s)
+	s, merge = copyModels(s, c)
+	s, merge = addLocalAI(c, s, merge)
 	if c.Runtime == utils.RuntimeNVIDIA {
-		s, merge = installCuda(s)
-	} else {
-		merge = llb.Image(distrolessBase)
+		s = installCuda(s, merge)
 	}
-	s, merge = copyModels(s, merge, c)
-	s = addLocalAI(c, s, merge)
 	imageCfg := NewImageConfig(c)
 	return s, imageCfg
 }
 
-func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb.State) {
+func copyModels(s llb.State, c *config.Config) (llb.State, llb.State) {
+	db := llb.Image(distrolessBase)
 	initState := s
 
 	// create config file if defined
@@ -43,12 +41,24 @@ func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb.
 	}
 
 	for _, model := range c.Models {
-		s = s.Run(llb.Shlexf("curl --retry %d --create-dirs -sSLO --output-dir /models %s", retryCount, model.Source)).Root()
-		// verify sha256 checksum if defined
+		var opts []llb.HTTPOption
+		opts = append(opts, llb.Filename(fileNameFromURL(model.Source)))
 		if model.SHA256 != "" {
-			path := fmt.Sprintf("/models/%s", fileNameFromURL(model.Source))
-			s = s.Run(shf("echo \"%s  %s\" | sha256sum -c -", model.SHA256, path)).Root()
+			digest := digest.NewDigestFromEncoded(digest.SHA256, model.SHA256)
+			opts = append(opts, llb.Checksum(digest))
 		}
+
+		m := llb.HTTP(model.Source, opts...)
+
+		var copyOpts []llb.CopyOption
+		copyOpts = append(copyOpts, &llb.CopyInfo{
+			CreateDestPath: true,
+		})
+		s = s.File(
+			llb.Copy(m, fileNameFromURL(model.Source), "/models/"+fileNameFromURL(model.Source), copyOpts...),
+			llb.WithCustomName("Copying "+fileNameFromURL(model.Source)+" to /models"), //nolint: goconst
+		)
+
 		// create prompt templates if defined
 		for _, pt := range model.PromptTemplates {
 			if pt.Name != "" && pt.Template != "" {
@@ -57,7 +67,7 @@ func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb.
 		}
 	}
 	diff := llb.Diff(initState, s)
-	merge = llb.Merge([]llb.State{merge, diff})
+	merge := llb.Merge([]llb.State{db, diff})
 	return s, merge
 }
 
@@ -69,29 +79,29 @@ func fileNameFromURL(urlString string) string {
 	return path.Base(parsedURL.Path)
 }
 
-func curl(s llb.State) llb.State {
-	i := s.Run(llb.Shlex("apt-get update"), llb.IgnoreCache).Root()
-	return i.Run(llb.Shlex("apt-get install curl -y")).Root()
-}
-
-func installCuda(s llb.State) (llb.State, llb.State) {
+func installCuda(s llb.State, merge llb.State) llb.State {
 	initState := s
 
-	s = s.Run(shf("curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
-	s = s.Run(llb.Shlex("apt-get update"), llb.IgnoreCache).Root()
-	s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root()
+	cudaKeyringURL := "https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb"
+	cudaKeyring := llb.HTTP(cudaKeyringURL)
+	s = s.File(
+		llb.Copy(cudaKeyring, fileNameFromURL(cudaKeyringURL), "/"),
+		llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst
+	)
+	s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
+	s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root()
 
 	diff := llb.Diff(initState, s)
-	merge := llb.Merge([]llb.State{llb.Image(distrolessBase), diff})
-	return s, merge
+	merge = llb.Merge([]llb.State{merge, diff})
+	return merge
 }
 
-func addLocalAI(c *config.Config, s llb.State, merge llb.State) llb.State {
+func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.State) {
 	initState := s
 	var localAIURL string
 	switch c.Runtime {
 	case utils.RuntimeNVIDIA:
-		localAIURL = fmt.Sprintf("https://sertacstorage.blob.core.windows.net/localai/%s/local-ai", localAIVersion)
+		localAIURL = fmt.Sprintf("https://sertaccdn.azureedge.net/localai/%s/local-ai", localAIVersion)
 	case utils.RuntimeCPUAVX2:
 		localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx2-Linux-x86_64", localAIVersion)
 	case utils.RuntimeCPUAVX512:
@@ -100,10 +110,17 @@ func addLocalAI(c *config.Config, s llb.State, merge llb.State) llb.State {
 		localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx-Linux-x86_64", localAIVersion)
 	}
 
-	s = s.Run(llb.Shlexf("curl -Lo /usr/bin/local-ai %s", localAIURL)).Root()
-	s = s.Run(llb.Shlex("chmod +x /usr/bin/local-ai")).Root()
+	var opts []llb.HTTPOption
+	opts = append(opts, llb.Filename("local-ai"))
+	opts = append(opts, llb.Chmod(0o755))
+	localAI := llb.HTTP(localAIURL, opts...)
+	s = s.File(
+		llb.Copy(localAI, "local-ai", "/usr/bin"),
+		llb.WithCustomName("Copying "+fileNameFromURL(localAIURL)+" to /usr/bin"), //nolint: goconst
+	)
+
 	diff := llb.Diff(initState, s)
-	return llb.Merge([]llb.State{merge, diff})
+	return s, llb.Merge([]llb.State{merge, diff})
 }
 
 func shf(cmd string, v ...interface{}) llb.RunOption {
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
index d6606e3b..856ac72c 100644
--- a/test/aikitfile.yaml
+++ b/test/aikitfile.yaml
@@ -4,6 +4,7 @@ debug: true
 models:
   - name: llama-2-7b-chat
     source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+    sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
 config: |
   - name: llama-2-7b-chat
     backend: llama