Skip to content

Commit

Permalink
use llb.http (#10)
Browse files Browse the repository at this point in the history
Signed-off-by: Sertac Ozercan <[email protected]>
  • Loading branch information
sozercan authored Dec 2, 2023
1 parent 73075fc commit 6091128
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 33 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

AIKit is a quick, easy, and local or cloud-agnostic way to get started to host and deploy large language models (LLMs) for inference. No GPU, internet access or additional tools are needed to get started except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!

AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), to send requests to open-source LLMs powered by AIKit!
AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open-source LLMs powered by AIKit!

> [!NOTE]
> At this time, AIKit is tested with LocalAI `llama` backend. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends.
## Features

- 🐳 No GPU, internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
- 🐳 No GPU, Internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
- 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image
- 🚀 Easy to use declarative configuration
- ✨ OpenAI API compatible to use with any OpenAI API compatible client
Expand Down Expand Up @@ -141,7 +141,7 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
```

> [!TIP]
> For an example Kubernetes deployment and service YAML, see [kubernetes folder](./kubernetes/).
> For an example Kubernetes deployment and service YAML, see [kubernetes folder](./kubernetes/). Please note that these are examples, you may need to customize them (such as properly configured resource requests and limits) based on your needs.

## GPU Acceleration Support

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ go 1.21
require (
github.com/containerd/containerd v1.7.9
github.com/moby/buildkit v0.12.3
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec v1.1.0-rc5
github.com/pkg/errors v0.9.1
github.com/sirupsen/logrus v1.9.3
Expand Down Expand Up @@ -36,7 +37,6 @@ require (
github.com/kr/text v0.2.0 // indirect
github.com/moby/locker v1.0.1 // indirect
github.com/moby/sys/signal v0.7.0 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/secure-systems-lab/go-securesystemslib v0.4.0 // indirect
github.com/shibumi/go-pathspec v1.3.0 // indirect
github.com/tonistiigi/fsutil v0.0.0-20230629203738-36ef4d8c0dbb // indirect
Expand Down
1 change: 1 addition & 0 deletions models/cuda/orca-2-13b.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#syntax=ghcr.io/sozercan/aikit:latest
apiVersion: v1alpha1
debug: true
runtime: cuda
models:
- name: orca-2-13b
source: https://huggingface.co/TheBloke/Orca-2-13B-GGUF/resolve/main/orca-2-13b.Q4_K_M.gguf
Expand Down
75 changes: 46 additions & 29 deletions pkg/aikit2llb/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"path"

"github.com/moby/buildkit/client/llb"
"github.com/opencontainers/go-digest"
specs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/sozercan/aikit/pkg/aikit/config"
"github.com/sozercan/aikit/pkg/utils"
Expand All @@ -15,26 +16,23 @@ const (
debianSlim = "docker.io/library/debian:12-slim"
distrolessBase = "gcr.io/distroless/cc-debian12:latest"
localAIVersion = "v1.40.0"
retryCount = 5
cudaVersion = "12-3"
)

func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
var merge llb.State
s := llb.Image(debianSlim)
s = curl(s)
s, merge = copyModels(s, c)
s, merge = addLocalAI(c, s, merge)
if c.Runtime == utils.RuntimeNVIDIA {
s, merge = installCuda(s)
} else {
merge = llb.Image(distrolessBase)
s = installCuda(s, merge)
}
s, merge = copyModels(s, merge, c)
s = addLocalAI(c, s, merge)
imageCfg := NewImageConfig(c)
return s, imageCfg
}

func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb.State) {
func copyModels(s llb.State, c *config.Config) (llb.State, llb.State) {
db := llb.Image(distrolessBase)
initState := s

// create config file if defined
Expand All @@ -43,12 +41,24 @@ func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb.
}

for _, model := range c.Models {
s = s.Run(llb.Shlexf("curl --retry %d --create-dirs -sSLO --output-dir /models %s", retryCount, model.Source)).Root()
// verify sha256 checksum if defined
var opts []llb.HTTPOption
opts = append(opts, llb.Filename(fileNameFromURL(model.Source)))
if model.SHA256 != "" {
path := fmt.Sprintf("/models/%s", fileNameFromURL(model.Source))
s = s.Run(shf("echo \"%s %s\" | sha256sum -c -", model.SHA256, path)).Root()
digest := digest.NewDigestFromEncoded(digest.SHA256, model.SHA256)
opts = append(opts, llb.Checksum(digest))
}

m := llb.HTTP(model.Source, opts...)

var copyOpts []llb.CopyOption
copyOpts = append(copyOpts, &llb.CopyInfo{
CreateDestPath: true,
})
s = s.File(
llb.Copy(m, fileNameFromURL(model.Source), "/models/"+fileNameFromURL(model.Source), copyOpts...),
llb.WithCustomName("Copying "+fileNameFromURL(model.Source)+" to /models"), //nolint: goconst
)

// create prompt templates if defined
for _, pt := range model.PromptTemplates {
if pt.Name != "" && pt.Template != "" {
Expand All @@ -57,7 +67,7 @@ func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb.
}
}
diff := llb.Diff(initState, s)
merge = llb.Merge([]llb.State{merge, diff})
merge := llb.Merge([]llb.State{db, diff})
return s, merge
}

Expand All @@ -69,29 +79,29 @@ func fileNameFromURL(urlString string) string {
return path.Base(parsedURL.Path)
}

func curl(s llb.State) llb.State {
i := s.Run(llb.Shlex("apt-get update"), llb.IgnoreCache).Root()
return i.Run(llb.Shlex("apt-get install curl -y")).Root()
}

func installCuda(s llb.State) (llb.State, llb.State) {
func installCuda(s llb.State, merge llb.State) llb.State {
initState := s

s = s.Run(shf("curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
s = s.Run(llb.Shlex("apt-get update"), llb.IgnoreCache).Root()
s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root()
cudaKeyringURL := "https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb"
cudaKeyring := llb.HTTP(cudaKeyringURL)
s = s.File(
llb.Copy(cudaKeyring, fileNameFromURL(cudaKeyringURL), "/"),
llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst
)
s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root()

diff := llb.Diff(initState, s)
merge := llb.Merge([]llb.State{llb.Image(distrolessBase), diff})
return s, merge
merge = llb.Merge([]llb.State{merge, diff})
return merge
}

func addLocalAI(c *config.Config, s llb.State, merge llb.State) llb.State {
func addLocalAI(c *config.Config, s llb.State, merge llb.State) (llb.State, llb.State) {
initState := s
var localAIURL string
switch c.Runtime {
case utils.RuntimeNVIDIA:
localAIURL = fmt.Sprintf("https://sertacstorage.blob.core.windows.net/localai/%s/local-ai", localAIVersion)
localAIURL = fmt.Sprintf("https://sertaccdn.azureedge.net/localai/%s/local-ai", localAIVersion)
case utils.RuntimeCPUAVX2:
localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx2-Linux-x86_64", localAIVersion)
case utils.RuntimeCPUAVX512:
Expand All @@ -100,10 +110,17 @@ func addLocalAI(c *config.Config, s llb.State, merge llb.State) llb.State {
localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx-Linux-x86_64", localAIVersion)
}

s = s.Run(llb.Shlexf("curl -Lo /usr/bin/local-ai %s", localAIURL)).Root()
s = s.Run(llb.Shlex("chmod +x /usr/bin/local-ai")).Root()
var opts []llb.HTTPOption
opts = append(opts, llb.Filename("local-ai"))
opts = append(opts, llb.Chmod(0o755))
localAI := llb.HTTP(localAIURL, opts...)
s = s.File(
llb.Copy(localAI, "local-ai", "/usr/bin"),
llb.WithCustomName("Copying "+fileNameFromURL(localAIURL)+" to /usr/bin"), //nolint: goconst
)

diff := llb.Diff(initState, s)
return llb.Merge([]llb.State{merge, diff})
return s, llb.Merge([]llb.State{merge, diff})
}

func shf(cmd string, v ...interface{}) llb.RunOption {
Expand Down
1 change: 1 addition & 0 deletions test/aikitfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ debug: true
models:
- name: llama-2-7b-chat
source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
config: |
- name: llama-2-7b-chat
backend: llama
Expand Down

0 comments on commit 6091128

Please sign in to comment.