diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml
new file mode 100644
index 00000000..c507d449
--- /dev/null
+++ b/.github/dependabot.yaml
@@ -0,0 +1,10 @@
+version: 2
+updates:
+ - package-ecosystem: "gomod"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "weekly"
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
new file mode 100644
index 00000000..95d25259
--- /dev/null
+++ b/.github/workflows/lint.yaml
@@ -0,0 +1,30 @@
+name: lint
+
+on:
+ push:
+ branches:
+ - main
+ paths-ignore:
+ - '**.md'
+ pull_request:
+ branches:
+ - main
+ paths-ignore:
+ - '**.md'
+
+permissions: read-all
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: actions/setup-go@v4
+ with:
+ go-version: "1.21"
+
+ - name: lint
+ uses: golangci/golangci-lint-action@v3
+ with:
+ version: v1.55.2
\ No newline at end of file
diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml
new file mode 100644
index 00000000..1ffe70ed
--- /dev/null
+++ b/.github/workflows/pre-release.yaml
@@ -0,0 +1,42 @@
+name: pre-release
+
+on:
+ push:
+ branches:
+ - main
+
+permissions:
+ contents: write
+ packages: write
+
+jobs:
+ release:
+ runs-on: ubuntu-latest
+ timeout-minutes: 360
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: cleanup disk space
+ run: |
+ df -H
+ docker system prune -f -a --volumes
+ sudo rm -rf /usr/share/dotnet
+ sudo rm -rf /opt/ghc
+ sudo rm -rf "/usr/local/share/boost"
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+ df -H
+
+ - name: Login to ghcr
+ uses: docker/login-action@v3
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: create buildx builder
+ run: docker buildx create --use --name builder --bootstrap
+ - uses: crazy-max/ghaction-github-runtime@v3
+
+ - name: Push aikit:dev to GHCR
+ run: |
+ docker buildx build -t ghcr.io/sozercan/aikit:dev --push .
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
new file mode 100644
index 00000000..5cb7f0cd
--- /dev/null
+++ b/.github/workflows/release.yaml
@@ -0,0 +1,38 @@
+name: release
+
+on:
+ push:
+ tags:
+ - v*
+
+permissions:
+ contents: write
+ packages: write
+
+jobs:
+ release:
+ runs-on: ubuntu-latest
+ timeout-minutes: 360
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Login to ghcr
+ uses: docker/login-action@v3
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Get tag
+ run: |
+ echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
+
+ - name: Push aikit:latest to GHCR
+ run: |
+ docker buildx build . -t ghcr.io/sozercan/aikit:${TAG} -t ghcr.io/sozercan/aikit:latest --push --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
+
+ # - name: release llama 2 image
+ # run: |
+ # docker buildx build -t ghcr.io/sozercan/llama2:7b -t ghcr.io/sozercan/llama2:7b-chat -t ghcr.io/sozercan/llama2:chat -t ghcr.io/sozercan/llama2:latest -f models/llama-2-chat-7b.yaml --push --cache-from=type=gha,scope=llama-2-7b-chat --cache-to=type=gha,scope=llama-2-7b-chat,mode=max
diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
new file mode 100644
index 00000000..e4fc5710
--- /dev/null
+++ b/.github/workflows/test-docker.yaml
@@ -0,0 +1,54 @@
+name: docker-test
+
+on:
+ push:
+ branches:
+ - main
+ paths-ignore:
+ - '**.md'
+ pull_request:
+ branches:
+ - main
+ paths-ignore:
+ - '**.md'
+
+permissions: read-all
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ timeout-minutes: 240
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: cleanup disk space
+ run: |
+ df -H
+ docker system prune -f -a --volumes
+ sudo rm -rf /usr/share/dotnet
+ sudo rm -rf /opt/ghc
+ sudo rm -rf "/usr/local/share/boost"
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+ df -H
+
+ - name: create buildx builder
+ run: docker buildx create --use --name builder --bootstrap
+ - uses: crazy-max/ghaction-github-runtime@v3
+
+ - name: build aikit
+ run: docker buildx build . -t sozercan/aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
+
+ - name: build test model
+ run: docker buildx build . -t sozercan/testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max
+
+ - name: list images
+ run: docker images
+
+ - name: run test model
+ run: docker run -d -p 8080:8080 sozercan/testmodel:test
+
+ - name: install e2e dependencies
+ run: make test-e2e-dependencies
+
+ - name: run bats test
+ run: make test-e2e
diff --git a/.github/workflows/test-kubernetes.yaml b/.github/workflows/test-kubernetes.yaml
new file mode 100644
index 00000000..840ced48
--- /dev/null
+++ b/.github/workflows/test-kubernetes.yaml
@@ -0,0 +1,60 @@
+name: kubernetes-test
+
+on:
+ push:
+ branches:
+ - main
+ paths-ignore:
+ - '**.md'
+ pull_request:
+ branches:
+ - main
+ paths-ignore:
+ - '**.md'
+
+permissions: read-all
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ timeout-minutes: 240
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: cleanup disk space
+ run: |
+ df -H
+ docker system prune -a -f
+ sudo rm -rf /usr/share/dotnet
+ sudo rm -rf /opt/ghc
+ sudo rm -rf "/usr/local/share/boost"
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+ df -H
+
+ - name: create buildx builder
+ run: docker buildx create --use --name builder --bootstrap
+ - uses: crazy-max/ghaction-github-runtime@v3
+
+ - name: build aikit
+ run: docker buildx build . -t sozercan/aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
+
+ - name: build test model
+ run: docker buildx build . -t sozercan/testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max
+
+ - name: install e2e dependencies
+ run: make test-e2e-dependencies
+
+ - name: create kind cluster
+ run: kind create cluster --wait 5m
+
+ - name: load test model image into kind cluster
+ run: kind load docker-image sozercan/testmodel:test
+
+ - name: deploy test model
+ run: |
+ kubectl create deployment test-model-deployment --image=sozercan/testmodel:test --replicas 1
+ kubectl expose deployment test-model-deployment --port=8080 --target-port=8008 --name=test-model-service
+ kubectl port-forward service/test-model-service 8080:8080 &
+
+ - name: run bats test
+ run: make test-e2e
\ No newline at end of file
diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml
new file mode 100644
index 00000000..75250f3b
--- /dev/null
+++ b/.github/workflows/unit-test.yaml
@@ -0,0 +1,33 @@
+name: unit-test
+
+on:
+ push:
+ branches:
+ - main
+ paths-ignore:
+ - '**.md'
+ pull_request:
+ branches:
+ - main
+ paths-ignore:
+ - '**.md'
+
+permissions: read-all
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: actions/setup-go@v4
+ with:
+ go-version: "1.21"
+
+ - name: go mod tidy
+ run: |
+ go mod tidy
+ git diff --exit-code
+
+ - name: test
+ run: make test
diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
new file mode 100644
index 00000000..2d5d3280
--- /dev/null
+++ b/.github/workflows/update-models.yaml
@@ -0,0 +1,38 @@
+name: update-models
+
+on:
+ workflow_dispatch:
+
+permissions:
+ contents: write
+ packages: write
+
+jobs:
+ update-models:
+ runs-on: ubuntu-latest
+ timeout-minutes: 360
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Login to GHCR
+ uses: docker/login-action@v3
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Push llama 2 models to GHCR
+ run: |
+ docker buildx create --use --name builder --bootstrap
+
+ # cpu avx
+ MODELS_PATH=models
+ docker buildx build . -t ghcr.io/sozercan/llama2:7b -f ${MODELS_PATH}/llama-2-7b-chat.yaml --push
+ docker buildx build . -t ghcr.io/sozercan/llama2:13b -f ${MODELS_PATH}/llama-2-13b-chat.yaml --push
+ docker buildx build . -t ghcr.io/sozercan/orca2:13b -f ${MODELS_PATH}/orca-2-13b.yaml --push
+
+ # cuda
+ CUDA_MODELS_PATH=models/cuda
+ docker buildx build . -t ghcr.io/sozercan/llama2:7b-cuda -f ${CUDA_MODELS_PATH}/llama-2-7b-chat.yaml --push
+ docker buildx build . -t ghcr.io/sozercan/llama2:13b-cuda -f ${CUDA_MODELS_PATH}/llama-2-13b-chat.yaml --push
+ docker buildx build . -t ghcr.io/sozercan/orca2:13b-cuda -f ${CUDA_MODELS_PATH}/orca-2-13b.yaml --push
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3b735ec4..9108c945 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,6 @@
# Go workspace file
go.work
+
+bin
+coverage.txt
diff --git a/.golangci.yaml b/.golangci.yaml
new file mode 100644
index 00000000..0a828406
--- /dev/null
+++ b/.golangci.yaml
@@ -0,0 +1,40 @@
+run:
+ timeout: 5m
+
+linters-settings:
+ # gocritic:
+ # enabled-tags:
+ # - performance
+ lll:
+ line-length: 200
+
+ misspell:
+ locale: US
+ staticcheck:
+ go: "1.21"
+
+linters:
+ disable-all: true
+ enable:
+ - errcheck
+ - errorlint
+ - exportloopref
+ - forcetypeassert
+ - gci
+ - gocritic
+ - goconst
+ - godot
+ - gofmt
+ - gofumpt
+ - goimports
+ - gosec
+ - gosimple
+ - govet
+ - ineffassign
+ - misspell
+ - revive
+ - staticcheck
+ - typecheck
+ - unconvert
+ - unused
+ - whitespace
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..2a398d85
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,8 @@
+FROM golang:1.21-bullseye as builder
+COPY . /go/src/github.com/sozercan/aikit
+WORKDIR /go/src/github.com/sozercan/aikit
+RUN CGO_ENABLED=0 go build -o /aikit --ldflags '-extldflags "-static"' ./cmd/frontend
+
+FROM scratch
+COPY --from=builder /aikit /bin/aikit
+ENTRYPOINT ["/bin/aikit"]
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..bbe36043
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,44 @@
+REGISTRY ?= ghcr.io/sozercan
+BATS_TESTS_FILE ?= test/bats/test.bats
+BATS_VERSION ?= 1.10.0
+KIND_VERSION ?= 0.20.0
+KUBERNETES_VERSION ?= 1.28.0
+TAG ?= test
+OUTPUT_TYPE ?= type=docker
+PULL ?=
+NO_CACHE ?=
+
+.PHONY: lint
+lint:
+ golangci-lint run -v ./... --timeout 5m
+
+.PHONY: build-aikit
+build-aikit:
+ docker buildx build . -t ${REGISTRY}/aikit:${TAG} --output=${OUTPUT_TYPE}
+
+.PHONY: build-test-model
+build-test-model:
+ docker buildx build . -t ${REGISTRY}/testmodel:${TAG} -f test/aikitfile.yaml --output=${OUTPUT_TYPE}
+
+.PHONY: run-test-model
+run-test-model:
+ docker run -p 8080:8080 ${REGISTRY}/testmodel:${TAG}
+
+.PHONY: test
+test:
+ go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic
+
+.PHONY: test-e2e-dependencies
+test-e2e-dependencies:
+ mkdir -p ${GITHUB_WORKSPACE}/bin
+ echo "${GITHUB_WORKSPACE}/bin" >> ${GITHUB_PATH}
+
+ curl -sSLO https://github.com/bats-core/bats-core/archive/v${BATS_VERSION}.tar.gz && tar -zxvf v${BATS_VERSION}.tar.gz && bash bats-core-${BATS_VERSION}/install.sh ${GITHUB_WORKSPACE}
+
+ # used for kubernetes test
+ curl -sSL https://dl.k8s.io/release/v${KUBERNETES_VERSION}/bin/linux/amd64/kubectl -o ${GITHUB_WORKSPACE}/bin/kubectl && chmod +x ${GITHUB_WORKSPACE}/bin/kubectl
+ curl -sSL https://github.com/kubernetes-sigs/kind/releases/download/v${KIND_VERSION}/kind-linux-amd64 -o ${GITHUB_WORKSPACE}/bin/kind && chmod +x ${GITHUB_WORKSPACE}/bin/kind
+
+.PHONY: test-e2e
+test-e2e:
+ /home/runner/work/aikit/aikit/bin/bats --verbose-run --trace ${BATS_TESTS_FILE}
diff --git a/README.md b/README.md
index cf24d118..beb34892 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,193 @@
-# aikit
\ No newline at end of file
+# AIKit ✨
+
+
+
+
+
+AIKit is a quick, easy, and local or cloud-agnostic way to get started to host and deploy large language models (LLMs) for inference. No GPU, internet access or additional tools are needed to get started except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
+
+AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), to send requests to open-source LLMs powered by AIKit!
+
+> [!NOTE]
+> At this time, AIKit is tested with LocalAI `llama` backend. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends.
+
+## Features
+
+- 🐳 No GPU, internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
+- 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image
+- 🚀 Easy to use declarative configuration
+- ✨ OpenAI API compatible to use with any OpenAI API compatible client
+- 🚢 Kubernetes deployment ready
+- 📦 Supports multiple models with a single image
+- 🖥️ Supports GPU-accelerated inferencing with NVIDIA GPUs
+
+## Demos
+
+### Building an image with a Llama 2 model
+
+[](https://asciinema.org/a/J9bitkONKPvedSfU1RkrmVEhD)
+
+### Inference
+
+[](https://asciinema.org/a/DYh5bCQMNPSis1whhsfPeMOoM)
+
+## Pre-made Models
+
+AIKit comes with pre-made models that you can use out-of-the-box!
+
+### CPU
+- 🦙 Llama 2 7B Chat: `ghcr.io/sozercan/llama2:7b`
+- 🦙 Llama 2 13B Chat: `ghcr.io/sozercan/llama2:13b`
+- 🐬 Orca 2 13B: `ghcr.io/sozercan/orca2:13b`
+
+### NVIDIA CUDA
+
+- 🦙 Llama 2 7B Chat (CUDA): `ghcr.io/sozercan/llama2:7b-cuda`
+- 🦙 Llama 2 13B Chat (CUDA): `ghcr.io/sozercan/llama2:13b-cuda`
+- 🐬 Orca 2 13B (CUDA): `ghcr.io/sozercan/orca2:13b-cuda`
+
+> CUDA models includes CUDA v12. They are used with [NVIDIA GPU acceleration](#gpu-acceleration-support).
+
+## Quick Start
+
+### Creating an image
+
+> [!NOTE]
+> This section shows how to create a custom image with models of your choosing. If you want to use one of the pre-made models, skip to [running models](#running-models).
+>
+> Please see [models folder](./models/) for pre-made model definitions. You can find more model examples at [go-skynet/model-gallery](https://github.com/go-skynet/model-gallery).
+
+Create an `aikitfile.yaml` with the following structure:
+
+```yaml
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+models:
+ - name: llama-2-7b-chat
+ source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+```
+
+> [!TIP]
+> This is the simplest way to get started. For full `aikitfile` specification, see [specs](docs/specs.md).
+
+First, create a buildx buildkit instance. Alternatively, if you are using Docker v24 with [containerd image store](https://docs.docker.com/storage/containerd/) enabled, you can skip this step.
+
+```bash
+docker buildx create --use --name aikit-builder
+```
+
+Then build your image with:
+
+```bash
+docker buildx build . -t my-model -f aikitfile.yaml --load
+```
+
+This will build a local container image with your model(s). You can see the image with:
+
+```bash
+docker images
+REPOSITORY TAG IMAGE ID CREATED SIZE
+my-model latest e7b7c5a4a2cb About an hour ago 5.51GB
+```
+
+### Running models
+
+You can start the inferencing server for your models with:
+
+```bash
+# for pre-made models, replace "my-model" with the image name
+docker run -d --rm -p 8080:8080 my-model
+```
+
+You can then send requests to `localhost:8080` to run inference from your models. For example:
+
+```bash
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llama-2-7b-chat",
+ "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
+ }'
+{"created":1701236489,"object":"chat.completion","id":"dd1ff40b-31a7-4418-9e32-42151ab6875a","model":"llama-2-7b-chat","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"\nKubernetes is a container orchestration system that automates the deployment, scaling, and management of containerized applications in a microservices architecture."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
+```
+
+## Kubernetes Deployment
+
+It is easy to get started to deploy your models to Kubernetes!
+
+Make sure you have a Kubernetes cluster running and `kubectl` is configured to talk to it, and your model images are accessible from the cluster. You can use [kind](https://kind.sigs.k8s.io/) to create a local Kubernetes cluster for testing purposes.
+
+```bash
+# create a deployment
+# for pre-made models, replace "my-model" with the image name
+kubectl create deployment my-llm-deployment --image=my-model
+
+# expose it as a service
+kubectl expose deployment my-llm-deployment --port=8080 --target-port=8080 --name=my-llm-service
+
+# easy to scale up and down
+kubectl scale deployment my-llm-deployment --replicas=3
+
+# port-forward for testing locally
+kubectl port-forward service/my-llm-service 8080:8080
+
+# send requests to your model
+curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llama-2-7b-chat",
+ "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
+ }'
+{"created":1701236489,"object":"chat.completion","id":"dd1ff40b-31a7-4418-9e32-42151ab6875a","model":"llama-2-7b-chat","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"\nKubernetes is a container orchestration system that automates the deployment, scaling, and management of containerized applications in a microservices architecture."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
+```
+
+> [!TIP]
+> For an example Kubernetes deployment and service YAML, see [kubernetes folder](./kubernetes/).
+
+## GPU Acceleration Support
+
+> [!NOTE]
+> At this time, only NVIDIA GPU acceleration is supported. Please open an issue if you'd like to see support for other GPU vendors.
+
+### NVIDIA
+
+AIKit supports GPU accelerated inferencing with [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). You must also have [NVIDIA Drivers](https://www.nvidia.com/en-us/drivers/unix/) installed on your host machine.
+
+For Kubernetes, [NVIDIA GPU Operator](https://github.com/NVIDIA/gpu-operator) provides a streamlined way to install the NVIDIA drivers and container toolkit to configure your cluster to use GPUs.
+
+To get started with GPU-accelerated inferencing, make sure to set the following in your `aikitfile` and build your model.
+
+```yaml
+runtime: cuda # use NVIDIA CUDA runtime
+f16: true # use float16 precision
+gpu_layers: 35 # number of layers to offload to GPU
+low_vram: true # for devices with low VRAM
+```
+
+> [!TIP]
+> Make sure to customize these values based on your model and GPU specs.
+
+After building the model, you can run it with [`--gpus all`](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html#gpu-enumeration) flag to enable GPU support:
+
+```bash
+# for pre-made models, replace "my-model" with the image name
+docker run --rm --gpus all -p 8080:8080 my-model
+```
+
+If GPU acceleration is working, you'll see output that is similar to following in the debug logs:
+
+```bash
+5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr ggml_init_cublas: found 1 CUDA devices:
+5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr Device 0: Tesla T4, compute capability 7.5
+...
+5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: using CUDA for GPU acceleration
+5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: mem required = 70.41 MB (+ 2048.00 MB per state)
+5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading 32 repeating layers to GPU
+5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading non-repeating layers to GPU
+5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading v cache to GPU
+5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading k cache to GPU
+5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloaded 35/35 layers to GPU
+5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: VRAM used: 5869 MB
+```
+
+## Acknowledgements and Credits
+
+- [LocalAI](https://localai.io/) for providing the inference engine
+- [Mockerfile](https://github.com/r2d4/mockerfile) for the inspiration and sample code
+- [Huggingface](https://huggingface.co/) and [TheBloke](https://huggingface.co/TheBloke) for providing the models
diff --git a/cmd/frontend/main.go b/cmd/frontend/main.go
new file mode 100644
index 00000000..37ae5c5c
--- /dev/null
+++ b/cmd/frontend/main.go
@@ -0,0 +1,24 @@
+package main
+
+import (
+ "os"
+
+ "github.com/moby/buildkit/frontend/gateway/grpcclient"
+ "github.com/moby/buildkit/util/appcontext"
+ "github.com/moby/buildkit/util/bklog"
+ "github.com/sirupsen/logrus"
+ "github.com/sozercan/aikit/pkg/build"
+ "google.golang.org/grpc/grpclog"
+)
+
+func main() {
+ bklog.L.Logger.SetOutput(os.Stderr)
+ grpclog.SetLoggerV2(grpclog.NewLoggerV2WithVerbosity(bklog.L.WriterLevel(logrus.InfoLevel), bklog.L.WriterLevel(logrus.WarnLevel), bklog.L.WriterLevel(logrus.ErrorLevel), 1))
+
+ ctx := appcontext.Context()
+
+ if err := grpcclient.RunFromEnvironment(ctx, build.Build); err != nil {
+ bklog.L.WithError(err).Fatal("error running frontend")
+ os.Exit(1)
+ }
+}
diff --git a/docs/images/logo.png b/docs/images/logo.png
new file mode 100644
index 00000000..31caeb11
Binary files /dev/null and b/docs/images/logo.png differ
diff --git a/docs/specs.md b/docs/specs.md
new file mode 100644
index 00000000..b1cf6df9
--- /dev/null
+++ b/docs/specs.md
@@ -0,0 +1,58 @@
+# API Specifications
+
+## v1alpha1
+
+```yaml
+apiVersion: # required. only v1alpha1 is supported at the moment
+debug: # optional. if set to true, will print debug logs
+runtime: # optional. defaults to avx. can be avx, avx2, avx512, cuda
+models: # required. list of models to build
+ - name: # required. name of the model
+ source: # required. source of the model. must be a url
+ sha256: # optional. sha256 hash of the model file
+ promptTemplates: # optional. list of prompt templates for a model
+ - name: # required. name of the template
+ template: # required. template string
+config: # optional. list of config files
+```
+
+Example:
+
+```yaml
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+runtime: cuda
+models:
+ - name: llama-2-7b-chat
+ source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+ sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
+ promptTemplates:
+ - name: "llama-2-7b-chat"
+ template: |
+ {{if eq .RoleName \"assistant\"}}{{.Content}}{{else}}
+ [INST]
+ {{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName \"system\"}}<>{{.Content}}<>
+
+ {{else if .Content}}{{.Content}}{{end}}
+ [/INST]
+ {{end}}
+config: |
+ - name: \"llama-2-7b-chat\"
+ backend: \"llama\"
+ parameters:
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+ model: \"llama-2-7b-chat.Q4_K_M.gguf\"
+ context_size: 4096
+ roles:
+ function: 'Function Result:'
+ assistant_function_call: 'Function Call:'
+ assistant: 'Assistant:'
+ user: 'User:'
+ system: 'System:'
+ template:
+ chat_message: \"llama-2-7b-chat\"
+ system_prompt: \"You are a helpful assistant, below is a conversation, please respond with the next message and do not ask follow-up questions\"
+```
diff --git a/go.mod b/go.mod
new file mode 100644
index 00000000..72149b4c
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,56 @@
+module github.com/sozercan/aikit
+
+go 1.21
+
+require (
+ github.com/containerd/containerd v1.7.9
+ github.com/moby/buildkit v0.12.3
+ github.com/opencontainers/image-spec v1.1.0-rc5
+ github.com/pkg/errors v0.9.1
+ github.com/sirupsen/logrus v1.9.3
+ google.golang.org/grpc v1.59.0
+ gopkg.in/yaml.v2 v2.4.0
+)
+
+require (
+ cloud.google.com/go/compute/metadata v0.2.3 // indirect
+ github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
+ github.com/Microsoft/go-winio v0.6.1 // indirect
+ github.com/Microsoft/hcsshim v0.11.4 // indirect
+ github.com/containerd/continuity v0.4.2 // indirect
+ github.com/containerd/log v0.1.0 // indirect
+ github.com/containerd/ttrpc v1.2.2 // indirect
+ github.com/containerd/typeurl/v2 v2.1.1 // indirect
+ github.com/docker/distribution v2.8.2+incompatible // indirect
+ github.com/docker/docker v24.0.0-rc.2.0.20230718135204-8e51b8b59cb8+incompatible // indirect
+ github.com/docker/go-units v0.5.0 // indirect
+ github.com/go-logr/logr v1.2.4 // indirect
+ github.com/go-logr/stdr v1.2.2 // indirect
+ github.com/gogo/googleapis v1.4.1 // indirect
+ github.com/gogo/protobuf v1.3.2 // indirect
+ github.com/golang/protobuf v1.5.3 // indirect
+ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
+ github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
+ github.com/in-toto/in-toto-golang v0.5.0 // indirect
+ github.com/klauspost/compress v1.16.3 // indirect
+ github.com/kr/text v0.2.0 // indirect
+ github.com/moby/locker v1.0.1 // indirect
+ github.com/moby/sys/signal v0.7.0 // indirect
+ github.com/opencontainers/go-digest v1.0.0 // indirect
+ github.com/secure-systems-lab/go-securesystemslib v0.4.0 // indirect
+ github.com/shibumi/go-pathspec v1.3.0 // indirect
+ github.com/tonistiigi/fsutil v0.0.0-20230629203738-36ef4d8c0dbb // indirect
+ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.45.0 // indirect
+ go.opentelemetry.io/otel v1.19.0 // indirect
+ go.opentelemetry.io/otel/metric v1.19.0 // indirect
+ go.opentelemetry.io/otel/trace v1.19.0 // indirect
+ golang.org/x/crypto v0.14.0 // indirect
+ golang.org/x/mod v0.11.0 // indirect
+ golang.org/x/net v0.17.0 // indirect
+ golang.org/x/sync v0.3.0 // indirect
+ golang.org/x/sys v0.13.0 // indirect
+ golang.org/x/text v0.13.0 // indirect
+ golang.org/x/tools v0.10.0 // indirect
+ google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
+ google.golang.org/protobuf v1.31.0 // indirect
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 00000000..b165cb49
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,245 @@
+cloud.google.com/go v0.26.0 h1:e0WKqKTd5BnrG8aKH3J3h+QvEIQtSUcf2n5UZ5ZgLtQ=
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY=
+cloud.google.com/go/compute v1.23.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM=
+cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY=
+cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA=
+github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU=
+github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=
+github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
+github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8=
+github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
+github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k=
+github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
+github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb h1:EDmT6Q9Zs+SbUoc7Ik9EfrFqcylYqgPZ9ANSbTAntnE=
+github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb/go.mod h1:ZjrT6AXHbDs86ZSdt/osfBi5qfexBrKUdONk989Wnk4=
+github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
+github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
+github.com/containerd/containerd v1.7.9 h1:KOhK01szQbM80YfW1H6RZKh85PHGqY/9OcEZ35Je8sc=
+github.com/containerd/containerd v1.7.9/go.mod h1:0/W44LWEYfSHoxBtsHIiNU/duEkgpMokemafHVCpq9Y=
+github.com/containerd/continuity v0.4.2 h1:v3y/4Yz5jwnvqPKJJ+7Wf93fyWoCB3F5EclWG023MDM=
+github.com/containerd/continuity v0.4.2/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ=
+github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
+github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
+github.com/containerd/ttrpc v1.2.2 h1:9vqZr0pxwOF5koz6N0N3kJ0zDHokrcPxIR/ZR2YFtOs=
+github.com/containerd/ttrpc v1.2.2/go.mod h1:sIT6l32Ph/H9cvnJsfXM5drIVzTr5A2flTf1G5tYZak=
+github.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9FqcttATPO/4=
+github.com/containerd/typeurl/v2 v2.1.1/go.mod h1:IDp2JFvbwZ31H8dQbEIY7sDl2L3o3HZj1hsSQlywkQ0=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8=
+github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
+github.com/docker/docker v24.0.0-rc.2.0.20230718135204-8e51b8b59cb8+incompatible h1:qMc+sk+l2GSLokgRA1uuKgkUVQ/vhAm4LYHC5rtSMq0=
+github.com/docker/docker v24.0.0-rc.2.0.20230718135204-8e51b8b59cb8+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
+github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
+github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBFApVqftFV6k087DA=
+github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE=
+github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
+github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ=
+github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
+github.com/gogo/googleapis v1.4.1 h1:1Yx4Myt7BxzvUr5ldGSbwYiZG6t9wGBZ+8/fX3Wvtq0=
+github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
+github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
+github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
+github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
+github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
+github.com/in-toto/in-toto-golang v0.5.0 h1:hb8bgwr0M2hGdDsLjkJ3ZqJ8JFLL/tgYdAxF/XEFBbY=
+github.com/in-toto/in-toto-golang v0.5.0/go.mod h1:/Rq0IZHLV7Ku5gielPT4wPHJfH1GdHMCq8+WPxw8/BE=
+github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
+github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
+github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
+github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
+github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
+github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs=
+github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/moby/buildkit v0.12.3 h1:cFaPVnyC0PwAP5xHHfzdU5v9rgQrCi6HnGSg3WuFKp4=
+github.com/moby/buildkit v0.12.3/go.mod h1:adB4y0SxxX8trnrY+oEulb48ODLqPO6pKMF0ppGcCoI=
+github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg=
+github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc=
+github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78=
+github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
+github.com/moby/sys/signal v0.7.0 h1:25RW3d5TnQEoKvRbEKUGay6DCQ46IxAVTT9CUMgmsSI=
+github.com/moby/sys/signal v0.7.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg=
+github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
+github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
+github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI=
+github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8=
+github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
+github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=
+github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI=
+github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY=
+github.com/secure-systems-lab/go-securesystemslib v0.4.0 h1:b23VGrQhTA8cN2CbBw7/FulN9fTtqYUdS5+Oxzt+DUE=
+github.com/secure-systems-lab/go-securesystemslib v0.4.0/go.mod h1:FGBZgq2tXWICsxWQW1msNf49F0Pf2Op5Htayx335Qbs=
+github.com/shibumi/go-pathspec v1.3.0 h1:QUyMZhFo0Md5B8zV8x2tesohbb5kfbpTi9rBnKh5dkI=
+github.com/shibumi/go-pathspec v1.3.0/go.mod h1:Xutfslp817l2I1cZvgcfeMQJG5QnU2lh5tVaaMCl3jE=
+github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
+github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
+github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
+github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/tonistiigi/fsutil v0.0.0-20230629203738-36ef4d8c0dbb h1:uUe8rNyVXM8moActoBol6Xf6xX2GMr7SosR2EywMvGg=
+github.com/tonistiigi/fsutil v0.0.0-20230629203738-36ef4d8c0dbb/go.mod h1:SxX/oNQ/ag6Vaoli547ipFK9J7BZn5JqJG0JE8lf8bA=
+github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
+go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.45.0 h1:RsQi0qJ2imFfCvZabqzM9cNXBG8k6gXMv1A0cXRmH6A=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.45.0/go.mod h1:vsh3ySueQCiKPxFLvjWC4Z135gIa34TQ/NSqkDTZYUM=
+go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs=
+go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY=
+go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE=
+go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8=
+go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg=
+go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo=
+go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
+go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
+go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
+golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.11.0 h1:bUO06HqtnRcc/7l71XBe4WcqTZ+3AH1J59zWDDwLKgU=
+golang.org/x/mod v0.11.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
+golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/oauth2 v0.11.0 h1:vPL4xzxBM4niKCW6g9whtaWVXTJf1U5e4aZxxFx/gbU=
+golang.org/x/oauth2 v0.11.0/go.mod h1:LdF7O/8bLR/qWK9DrpXmbHLTouvRHK0SgJl0GmDBchk=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
+golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
+golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
+golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.10.0 h1:tvDr/iQoUqNdohiYm0LmmKcBk+q86lb9EprIUFhHHGg=
+golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
+google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
+google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk=
+google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
+google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
diff --git a/kubernetes/llama-2-7b-chat.yaml b/kubernetes/llama-2-7b-chat.yaml
new file mode 100644
index 00000000..d7e1aecd
--- /dev/null
+++ b/kubernetes/llama-2-7b-chat.yaml
@@ -0,0 +1,33 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: llama2
+ labels:
+ app: llama2
+spec:
+ replicas: 3
+ selector:
+ matchLabels:
+ app: llama2
+ template:
+ metadata:
+ labels:
+ app: llama2
+ spec:
+ containers:
+ - name: llama2
+ image: ghcr.io/sozercan/llama2:7b
+ ports:
+ - containerPort: 8080
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: llama2
+spec:
+ selector:
+ app: llama2
+ ports:
+ - protocol: TCP
+ port: 8080
+ targetPort: 8080
diff --git a/kubernetes/orca-2-13b.yaml b/kubernetes/orca-2-13b.yaml
new file mode 100644
index 00000000..1578a244
--- /dev/null
+++ b/kubernetes/orca-2-13b.yaml
@@ -0,0 +1,33 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: orca2
+ labels:
+ app: orca2
+spec:
+ replicas: 3
+ selector:
+ matchLabels:
+ app: orca2
+ template:
+ metadata:
+ labels:
+ app: orca2
+ spec:
+ containers:
+ - name: orca2
+ image: ghcr.io/sozercan/orca2:13b
+ ports:
+ - containerPort: 8080
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: orca2
+spec:
+ selector:
+ app: orca2
+ ports:
+ - protocol: TCP
+ port: 8080
+ targetPort: 8080
diff --git a/models/cuda/llama-2-13b-chat.yaml b/models/cuda/llama-2-13b-chat.yaml
new file mode 100644
index 00000000..01f98e35
--- /dev/null
+++ b/models/cuda/llama-2-13b-chat.yaml
@@ -0,0 +1,21 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+runtime: cuda
+models:
+ - name: llama-2-13b-chat
+ source: https://huggingface.co/TheBloke/Llama-2-13B-Chat-GGUF/resolve/main/llama-2-13b-chat.Q4_K_M.gguf
+ sha256: 7ddfe27f61bf994542c22aca213c46ecbd8a624cca74abff02a7b5a8c18f787f
+config: |
+ - name: llama-2-13b-chat
+ backend: llama
+ parameters:
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+ model: llama-2-13b-chat.Q4_K_M.gguf
+ context_size: 4096
+ gpu_layers: 43
+ f16: true
+ batch: 512
+ mmap: true
diff --git a/models/cuda/llama-2-7b-chat.yaml b/models/cuda/llama-2-7b-chat.yaml
new file mode 100644
index 00000000..e0eede81
--- /dev/null
+++ b/models/cuda/llama-2-7b-chat.yaml
@@ -0,0 +1,21 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+runtime: cuda
+models:
+ - name: llama-2-7b-chat
+ source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+ sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
+config: |
+ - name: llama-2-7b-chat
+ backend: llama
+ parameters:
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+ model: llama-2-7b-chat.Q4_K_M.gguf
+ context_size: 4096
+ gpu_layers: 35
+ f16: true
+ batch: 512
+ mmap: true
diff --git a/models/cuda/orca-2-13b.yaml b/models/cuda/orca-2-13b.yaml
new file mode 100644
index 00000000..8abec499
--- /dev/null
+++ b/models/cuda/orca-2-13b.yaml
@@ -0,0 +1,20 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+models:
+ - name: orca-2-13b
+ source: https://huggingface.co/TheBloke/Orca-2-13B-GGUF/resolve/main/orca-2-13b.Q4_K_M.gguf
+ sha256: d37ea225dbe22318a4784a458a1832e34193d46f01a31e0b62e3a841fb8ec9ce
+config: |
+ - name: orca-2-13b
+ backend: llama
+ parameters:
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+ model: orca-2-13b.Q4_K_M.gguf
+ context_size: 4096
+ gpu_layers: 43
+ f16: true
+ batch: 512
+ mmap: true
diff --git a/models/llama-2-13b-chat.yaml b/models/llama-2-13b-chat.yaml
new file mode 100644
index 00000000..2fb46789
--- /dev/null
+++ b/models/llama-2-13b-chat.yaml
@@ -0,0 +1,16 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+models:
+ - name: llama-2-13b-chat
+ source: https://huggingface.co/TheBloke/Llama-2-13B-Chat-GGUF/resolve/main/llama-2-13b-chat.Q4_K_M.gguf
+ sha256: 7ddfe27f61bf994542c22aca213c46ecbd8a624cca74abff02a7b5a8c18f787f
+config: |
+ - name: llama-2-13b-chat
+ backend: llama
+ parameters:
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+ model: llama-2-13b-chat.Q4_K_M.gguf
+ context_size: 4096
diff --git a/models/llama-2-7b-chat.yaml b/models/llama-2-7b-chat.yaml
new file mode 100644
index 00000000..9f55c7ea
--- /dev/null
+++ b/models/llama-2-7b-chat.yaml
@@ -0,0 +1,16 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+models:
+ - name: llama-2-7b-chat
+ source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+ sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
+config: |
+ - name: llama-2-7b-chat
+ backend: llama
+ parameters:
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+ model: llama-2-7b-chat.Q4_K_M.gguf
+ context_size: 4096
diff --git a/models/orca-2-13b.yaml b/models/orca-2-13b.yaml
new file mode 100644
index 00000000..8d5c21f3
--- /dev/null
+++ b/models/orca-2-13b.yaml
@@ -0,0 +1,16 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+models:
+ - name: orca-2-13b
+ source: https://huggingface.co/TheBloke/Orca-2-13B-GGUF/resolve/main/orca-2-13b.Q4_K_M.gguf
+ sha256: d37ea225dbe22318a4784a458a1832e34193d46f01a31e0b62e3a841fb8ec9ce
+config: |
+ - name: orca-2-13b
+ backend: llama
+ parameters:
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+ model: orca-2-13b.Q4_K_M.gguf
+ context_size: 4096
diff --git a/pkg/aikit/config/specs.go b/pkg/aikit/config/specs.go
new file mode 100644
index 00000000..ea9b26bf
--- /dev/null
+++ b/pkg/aikit/config/specs.go
@@ -0,0 +1,34 @@
+package config
+
+import (
+ "github.com/pkg/errors"
+ yaml "gopkg.in/yaml.v2"
+)
+
+func NewFromBytes(b []byte) (*Config, error) {
+ c := &Config{}
+ if err := yaml.Unmarshal(b, c); err != nil {
+ return nil, errors.Wrap(err, "unmarshal config")
+ }
+ return c, nil
+}
+
+type Config struct {
+ APIVersion string `yaml:"apiVersion"`
+ Debug bool `yaml:"debug,omitempty"`
+ Runtime string `yaml:"runtime,omitempty"`
+ Models []Model `yaml:"models"`
+ Config string `yaml:"config,omitempty"`
+}
+
+type Model struct {
+ Name string `yaml:"name"`
+ Source string `yaml:"source"`
+ SHA256 string `yaml:"sha256,omitempty"`
+ PromptTemplates []PromptTemplate `yaml:"promptTemplates,omitempty"`
+}
+
+type PromptTemplate struct {
+ Name string `yaml:"name,omitempty"`
+ Template string `yaml:"template,omitempty"`
+}
diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go
new file mode 100644
index 00000000..15fd38bb
--- /dev/null
+++ b/pkg/aikit2llb/convert.go
@@ -0,0 +1,111 @@
+package aikit2llb
+
+import (
+ "fmt"
+ "net/url"
+ "path"
+
+ "github.com/moby/buildkit/client/llb"
+ specs "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/sozercan/aikit/pkg/aikit/config"
+ "github.com/sozercan/aikit/pkg/utils"
+)
+
+const (
+ debianSlim = "docker.io/library/debian:12-slim"
+ distrolessBase = "gcr.io/distroless/cc-debian12:latest"
+ localAIVersion = "v1.40.0"
+ retryCount = 5
+ cudaVersion = "12-3"
+)
+
+func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
+ var merge llb.State
+ s := llb.Image(debianSlim)
+ s = curl(s)
+ if c.Runtime == utils.RuntimeNVIDIA {
+ s, merge = installCuda(s)
+ } else {
+ merge = llb.Image(distrolessBase)
+ }
+ s, merge = copyModels(s, merge, c)
+ s = addLocalAI(c, s, merge)
+ imageCfg := NewImageConfig(c)
+ return s, imageCfg
+}
+
+func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb.State) {
+ initState := s
+
+ // create config file if defined
+ if c.Config != "" {
+ s = s.Run(shf("echo \"%s\" >> /config.yaml", c.Config)).Root()
+ }
+
+ for _, model := range c.Models {
+ s = s.Run(llb.Shlexf("curl --retry %d --create-dirs -sSLO --output-dir /models %s", retryCount, model.Source)).Root()
+ // verify sha256 checksum if defined
+ if model.SHA256 != "" {
+ path := fmt.Sprintf("/models/%s", fileNameFromURL(model.Source))
+ s = s.Run(shf("echo \"%s %s\" | sha256sum -c -", model.SHA256, path)).Root()
+ }
+ // create prompt templates if defined
+ for _, pt := range model.PromptTemplates {
+ if pt.Name != "" && pt.Template != "" {
+ s = s.Run(shf("echo \"%s\" >> /models/%s.tmpl", pt.Template, pt.Name)).Root()
+ }
+ }
+ }
+ diff := llb.Diff(initState, s)
+ merge = llb.Merge([]llb.State{merge, diff})
+ return s, merge
+}
+
+func fileNameFromURL(urlString string) string {
+ parsedURL, err := url.Parse(urlString)
+ if err != nil {
+ panic(err)
+ }
+ return path.Base(parsedURL.Path)
+}
+
+func curl(s llb.State) llb.State {
+ i := s.Run(llb.Shlex("apt-get update"), llb.IgnoreCache).Root()
+ return i.Run(llb.Shlex("apt-get install curl -y")).Root()
+}
+
+func installCuda(s llb.State) (llb.State, llb.State) {
+ initState := s
+
+ s = s.Run(shf("curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
+ s = s.Run(llb.Shlex("apt-get update"), llb.IgnoreCache).Root()
+ s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root()
+
+ diff := llb.Diff(initState, s)
+ merge := llb.Merge([]llb.State{llb.Image(distrolessBase), diff})
+ return s, merge
+}
+
+func addLocalAI(c *config.Config, s llb.State, merge llb.State) llb.State {
+ initState := s
+ var localAIURL string
+ switch c.Runtime {
+ case utils.RuntimeNVIDIA:
+ localAIURL = fmt.Sprintf("https://sertacstorage.blob.core.windows.net/localai/%s/local-ai", localAIVersion)
+ case utils.RuntimeCPUAVX2:
+ localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx2-Linux-x86_64", localAIVersion)
+ case utils.RuntimeCPUAVX512:
+ localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx512-Linux-x86_64", localAIVersion)
+ case utils.RuntimeCPUAVX, "":
+ localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx-Linux-x86_64", localAIVersion)
+ }
+
+ s = s.Run(llb.Shlexf("curl -Lo /usr/bin/local-ai %s", localAIURL)).Root()
+ s = s.Run(llb.Shlex("chmod +x /usr/bin/local-ai")).Root()
+ diff := llb.Diff(initState, s)
+ return llb.Merge([]llb.State{merge, diff})
+}
+
+func shf(cmd string, v ...interface{}) llb.RunOption {
+ return llb.Args([]string{"/bin/sh", "-c", fmt.Sprintf(cmd, v...)})
+}
diff --git a/pkg/aikit2llb/image.go b/pkg/aikit2llb/image.go
new file mode 100644
index 00000000..75e5972b
--- /dev/null
+++ b/pkg/aikit2llb/image.go
@@ -0,0 +1,50 @@
+package aikit2llb
+
+import (
+ "github.com/moby/buildkit/util/system"
+ specs "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/sozercan/aikit/pkg/aikit/config"
+ "github.com/sozercan/aikit/pkg/utils"
+)
+
+func NewImageConfig(c *config.Config) *specs.Image {
+ img := emptyImage(c)
+ var debug, config string
+ if c.Debug {
+ debug = "--debug"
+ }
+ if c.Config != "" {
+ config = "--config-file=/config.yaml"
+ }
+ img.Config.Entrypoint = []string{"local-ai", debug, config}
+ return img
+}
+
+func emptyImage(c *config.Config) *specs.Image {
+ img := &specs.Image{
+ Platform: specs.Platform{
+ Architecture: "amd64",
+ OS: "linux",
+ },
+ }
+ img.RootFS.Type = "layers"
+ img.Config.WorkingDir = "/"
+
+ cudaEnv := []string{
+ "PATH=" + system.DefaultPathEnv("linux") + ":/usr/local/cuda/bin",
+ "NVIDIA_REQUIRE_CUDA=cuda>=12.0",
+ "NVIDIA_DRIVER_CAPABILITIES=compute,utility",
+ "NVIDIA_VISIBLE_DEVICES=all",
+ "LD_LIBRARY_PATH=/usr/local/cuda/lib64",
+ }
+
+ if c.Runtime == utils.RuntimeNVIDIA {
+ img.Config.Env = cudaEnv
+ } else {
+ img.Config.Env = []string{
+ "PATH=" + system.DefaultPathEnv("linux"),
+ }
+ }
+
+ return img
+}
diff --git a/pkg/build/build.go b/pkg/build/build.go
new file mode 100644
index 00000000..f80eb249
--- /dev/null
+++ b/pkg/build/build.go
@@ -0,0 +1,137 @@
+package build
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "slices"
+
+ "github.com/containerd/containerd/platforms"
+ "github.com/moby/buildkit/client/llb"
+ "github.com/moby/buildkit/exporter/containerimage/exptypes"
+ "github.com/moby/buildkit/frontend/dockerui"
+ "github.com/moby/buildkit/frontend/gateway/client"
+ "github.com/pkg/errors"
+ "github.com/sozercan/aikit/pkg/aikit/config"
+ "github.com/sozercan/aikit/pkg/aikit2llb"
+ "github.com/sozercan/aikit/pkg/utils"
+)
+
+const (
+ LocalNameDockerfile = "dockerfile"
+ keyFilename = "filename"
+ defaultDockerfileName = "aikitfile.yaml"
+)
+
+func Build(ctx context.Context, c client.Client) (*client.Result, error) {
+ cfg, err := getAikitfileConfig(ctx, c)
+ if err != nil {
+ return nil, errors.Wrap(err, "getting aikitfile")
+ }
+
+ err = validateConfig(cfg)
+ if err != nil {
+ return nil, errors.Wrap(err, "validating aikitfile")
+ }
+
+ st, img := aikit2llb.Aikit2LLB(cfg)
+
+ def, err := st.Marshal(ctx)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to marshal local source")
+ }
+ res, err := c.Solve(ctx, client.SolveRequest{
+ Definition: def.ToPB(),
+ })
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to resolve dockerfile")
+ }
+ ref, err := res.SingleRef()
+ if err != nil {
+ return nil, err
+ }
+
+ config, err := json.Marshal(img)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to marshal image config")
+ }
+ k := platforms.Format(platforms.DefaultSpec())
+
+ res.AddMeta(fmt.Sprintf("%s/%s", exptypes.ExporterImageConfigKey, k), config)
+ res.SetRef(ref)
+
+ return res, nil
+}
+
+func getAikitfileConfig(ctx context.Context, c client.Client) (*config.Config, error) {
+ opts := c.BuildOpts().Opts
+ filename := opts[keyFilename]
+ if filename == "" {
+ filename = defaultDockerfileName
+ }
+
+ name := "load aikitfile"
+ if filename != "aikitfile" {
+ name += " from " + filename
+ }
+
+ src := llb.Local(LocalNameDockerfile,
+ llb.IncludePatterns([]string{filename}),
+ llb.SessionID(c.BuildOpts().SessionID),
+ llb.SharedKeyHint(defaultDockerfileName),
+ dockerui.WithInternalName(name),
+ )
+
+ def, err := src.Marshal(ctx)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to marshal local source")
+ }
+
+ var dtDockerfile []byte
+ res, err := c.Solve(ctx, client.SolveRequest{
+ Definition: def.ToPB(),
+ })
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to resolve dockerfile")
+ }
+
+ ref, err := res.SingleRef()
+ if err != nil {
+ return nil, err
+ }
+
+ dtDockerfile, err = ref.ReadFile(ctx, client.ReadRequest{
+ Filename: filename,
+ })
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to read dockerfile")
+ }
+
+ cfg, err := config.NewFromBytes(dtDockerfile)
+ if err != nil {
+ return nil, errors.Wrap(err, "getting config")
+ }
+
+ return cfg, nil
+}
+
+func validateConfig(c *config.Config) error {
+ if c.APIVersion == "" {
+ return errors.New("apiVersion is not defined")
+ }
+
+ if c.APIVersion != utils.APIv1alpha1 {
+ return errors.Errorf("apiVersion %s is not supported", c.APIVersion)
+ }
+
+ if len(c.Models) == 0 {
+ return errors.New("no models defined")
+ }
+
+ runtimes := []string{"", utils.RuntimeNVIDIA, utils.RuntimeCPUAVX, utils.RuntimeCPUAVX2, utils.RuntimeCPUAVX512}
+ if !slices.Contains(runtimes, c.Runtime) {
+ return errors.Errorf("runtime %s is not supported", c.Runtime)
+ }
+
+ return nil
+}
diff --git a/pkg/utils/const.go b/pkg/utils/const.go
new file mode 100644
index 00000000..cabba268
--- /dev/null
+++ b/pkg/utils/const.go
@@ -0,0 +1,10 @@
+package utils
+
+const (
+ RuntimeNVIDIA = "cuda"
+ RuntimeCPUAVX = "avx"
+ RuntimeCPUAVX2 = "avx2"
+ RuntimeCPUAVX512 = "avx512"
+
+ APIv1alpha1 = "v1alpha1"
+)
diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml
new file mode 100644
index 00000000..98ff34dc
--- /dev/null
+++ b/test/aikitfile-cuda.yaml
@@ -0,0 +1,40 @@
+#syntax=ghcr.io/sozercan/aikit:latest
+apiVersion: v1alpha1
+debug: true
+runtime: cuda
+models:
+ - name: llama-2-7b-chat
+ source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+ - name: mistral-7b-instruct
+ source: https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf
+config: |
+ - name: llama-2-7b-chat
+ backend: llama
+ parameters:
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+ model: llama-2-7b-chat.Q4_K_M.gguf
+ context_size: 4096
+ gpu_layers: 35
+ f16: true
+ batch: 512
+ mmap: true
+ - name: mistral-7b-instruct
+ context_size: 4096
+ threads: 4
+ parameters:
+ model: mistral-7b-openorca.Q6_K.gguf
+ temperature: 0.2
+ top_k: 40
+ top_p: 0.95
+ template:
+ chat_message: chatml
+ chat: chatml-block
+ completion: completion
+ stopwords:
+ - <|im_end|>
+ gpu_layers: 35
+ f16: true
+ batch: 512
+ mmap: true
\ No newline at end of file
diff --git a/test/aikitfile-dev.yaml b/test/aikitfile-dev.yaml
new file mode 100644
index 00000000..913b75d0
--- /dev/null
+++ b/test/aikitfile-dev.yaml
@@ -0,0 +1,15 @@
+#syntax=ghcr.io/sozercan/aikit:dev
+apiVersion: v1alpha1
+debug: true
+models:
+ - name: llama-2-7b-chat
+ source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+config: |
+ - name: llama-2-7b-chat
+ backend: llama
+ parameters:
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+ model: llama-2-7b-chat.Q4_K_M.gguf
+ context_size: 4096
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
new file mode 100644
index 00000000..d6606e3b
--- /dev/null
+++ b/test/aikitfile.yaml
@@ -0,0 +1,15 @@
+#syntax=sozercan/aikit:test
+apiVersion: v1alpha1
+debug: true
+models:
+ - name: llama-2-7b-chat
+ source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+config: |
+ - name: llama-2-7b-chat
+ backend: llama
+ parameters:
+ top_k: 80
+ temperature: 0.2
+ top_p: 0.7
+ model: llama-2-7b-chat.Q4_K_M.gguf
+ context_size: 4096
diff --git a/test/bats/helpers.bash b/test/bats/helpers.bash
new file mode 100644
index 00000000..cefef431
--- /dev/null
+++ b/test/bats/helpers.bash
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+assert_success() {
+ if [[ "$status" != 0 ]]; then
+ echo "expected: 0"
+ echo "actual: $status"
+ echo "output: $output"
+ return 1
+ fi
+}
+
+assert_failure() {
+ if [[ "$status" == 0 ]]; then
+ echo "expected: non-zero exit code"
+ echo "actual: $status"
+ echo "output: $output"
+ return 1
+ fi
+}
+
+assert_equal() {
+ if [[ "$1" != "$2" ]]; then
+ echo "expected: $1"
+ echo "actual: $2"
+ return 1
+ fi
+}
+
+assert_not_equal() {
+ if [[ "$1" == "$2" ]]; then
+ echo "unexpected: $1"
+ echo "actual: $2"
+ return 1
+ fi
+}
+
+assert_match() {
+ if [[ ! "$2" =~ $1 ]]; then
+ echo "expected: $1"
+ echo "actual: $2"
+ return 1
+ fi
+}
+
+assert_not_match() {
+ if [[ "$2" =~ $1 ]]; then
+ echo "expected: $1"
+ echo "actual: $2"
+ return 1
+ fi
+}
+
+assert_len() {
+ if [[ "$1" != "${#2}" ]]; then
+ echo "expected len: $1"
+ echo "actual len: ${#2} ($2)"
+ return 1
+ fi
+}
+
+wait_for_process() {
+ wait_time="$1"
+ sleep_time="$2"
+ cmd="$3"
+ while [ "$wait_time" -gt 0 ]; do
+ if eval "$cmd"; then
+ return 0
+ else
+ sleep "$sleep_time"
+ wait_time=$((wait_time - sleep_time))
+ fi
+ done
+ return 1
+}
diff --git a/test/bats/test.bats b/test/bats/test.bats
new file mode 100644
index 00000000..90ab1631
--- /dev/null
+++ b/test/bats/test.bats
@@ -0,0 +1,14 @@
+#!/usr/bin/env bats
+
+load helpers
+
+WAIT_TIME=120
+SLEEP_TIME=1
+
+@test "send request to llama-2-7b-chat" {
+ run curl --retry 20 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+ "model": "llama-2-7b-chat",
+ "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}],
+ }'
+ assert_success
+}