diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml new file mode 100644 index 00000000..c507d449 --- /dev/null +++ b/.github/dependabot.yaml @@ -0,0 +1,10 @@ +version: 2 +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "weekly" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..95d25259 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,30 @@ +name: lint + +on: + push: + branches: + - main + paths-ignore: + - '**.md' + pull_request: + branches: + - main + paths-ignore: + - '**.md' + +permissions: read-all + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - name: lint + uses: golangci/golangci-lint-action@v3 + with: + version: v1.55.2 \ No newline at end of file diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml new file mode 100644 index 00000000..1ffe70ed --- /dev/null +++ b/.github/workflows/pre-release.yaml @@ -0,0 +1,42 @@ +name: pre-release + +on: + push: + branches: + - main + +permissions: + contents: write + packages: write + +jobs: + release: + runs-on: ubuntu-latest + timeout-minutes: 360 + steps: + - uses: actions/checkout@v4 + + - name: cleanup disk space + run: | + df -H + docker system prune -f -a --volumes + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + df -H + + - name: Login to ghcr + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: create buildx builder + run: docker buildx create --use --name builder --bootstrap + - uses: crazy-max/ghaction-github-runtime@v3 + + - name: Push aikit:dev to GHCR + run: | + docker buildx build -t ghcr.io/sozercan/aikit:dev --push . diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 00000000..5cb7f0cd --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,38 @@ +name: release + +on: + push: + tags: + - v* + +permissions: + contents: write + packages: write + +jobs: + release: + runs-on: ubuntu-latest + timeout-minutes: 360 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Login to ghcr + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Get tag + run: | + echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV + + - name: Push aikit:latest to GHCR + run: | + docker buildx build . -t ghcr.io/sozercan/aikit:${TAG} -t ghcr.io/sozercan/aikit:latest --push --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max + + # - name: release llama 2 image + # run: | + # docker buildx build -t ghcr.io/sozercan/llama2:7b -t ghcr.io/sozercan/llama2:7b-chat -t ghcr.io/sozercan/llama2:chat -t ghcr.io/sozercan/llama2:latest -f models/llama-2-chat-7b.yaml --push --cache-from=type=gha,scope=llama-2-7b-chat --cache-to=type=gha,scope=llama-2-7b-chat,mode=max diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml new file mode 100644 index 00000000..e4fc5710 --- /dev/null +++ b/.github/workflows/test-docker.yaml @@ -0,0 +1,54 @@ +name: docker-test + +on: + push: + branches: + - main + paths-ignore: + - '**.md' + pull_request: + branches: + - main + paths-ignore: + - '**.md' + +permissions: read-all + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 240 + steps: + - uses: actions/checkout@v4 + + - name: cleanup disk space + run: | + df -H + docker system prune -f -a --volumes + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + df -H + + - name: create buildx builder + run: docker buildx create --use --name builder --bootstrap + - uses: crazy-max/ghaction-github-runtime@v3 + + - name: build aikit + run: docker buildx build . -t sozercan/aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max + + - name: build test model + run: docker buildx build . -t sozercan/testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max + + - name: list images + run: docker images + + - name: run test model + run: docker run -d -p 8080:8080 sozercan/testmodel:test + + - name: install e2e dependencies + run: make test-e2e-dependencies + + - name: run bats test + run: make test-e2e diff --git a/.github/workflows/test-kubernetes.yaml b/.github/workflows/test-kubernetes.yaml new file mode 100644 index 00000000..840ced48 --- /dev/null +++ b/.github/workflows/test-kubernetes.yaml @@ -0,0 +1,60 @@ +name: kubernetes-test + +on: + push: + branches: + - main + paths-ignore: + - '**.md' + pull_request: + branches: + - main + paths-ignore: + - '**.md' + +permissions: read-all + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 240 + steps: + - uses: actions/checkout@v4 + + - name: cleanup disk space + run: | + df -H + docker system prune -a -f + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + df -H + + - name: create buildx builder + run: docker buildx create --use --name builder --bootstrap + - uses: crazy-max/ghaction-github-runtime@v3 + + - name: build aikit + run: docker buildx build . -t sozercan/aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max + + - name: build test model + run: docker buildx build . -t sozercan/testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max + + - name: install e2e dependencies + run: make test-e2e-dependencies + + - name: create kind cluster + run: kind create cluster --wait 5m + + - name: load test model image into kind cluster + run: kind load docker-image sozercan/testmodel:test + + - name: deploy test model + run: | + kubectl create deployment test-model-deployment --image=sozercan/testmodel:test --replicas 1 + kubectl expose deployment test-model-deployment --port=8080 --target-port=8008 --name=test-model-service + kubectl port-forward service/test-model-service 8080:8080 & + + - name: run bats test + run: make test-e2e \ No newline at end of file diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml new file mode 100644 index 00000000..75250f3b --- /dev/null +++ b/.github/workflows/unit-test.yaml @@ -0,0 +1,33 @@ +name: unit-test + +on: + push: + branches: + - main + paths-ignore: + - '**.md' + pull_request: + branches: + - main + paths-ignore: + - '**.md' + +permissions: read-all + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - name: go mod tidy + run: | + go mod tidy + git diff --exit-code + + - name: test + run: make test diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml new file mode 100644 index 00000000..2d5d3280 --- /dev/null +++ b/.github/workflows/update-models.yaml @@ -0,0 +1,38 @@ +name: update-models + +on: + workflow_dispatch: + +permissions: + contents: write + packages: write + +jobs: + update-models: + runs-on: ubuntu-latest + timeout-minutes: 360 + steps: + - uses: actions/checkout@v4 + + - name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Push llama 2 models to GHCR + run: | + docker buildx create --use --name builder --bootstrap + + # cpu avx + MODELS_PATH=models + docker buildx build . -t ghcr.io/sozercan/llama2:7b -f ${MODELS_PATH}/llama-2-7b-chat.yaml --push + docker buildx build . -t ghcr.io/sozercan/llama2:13b -f ${MODELS_PATH}/llama-2-13b-chat.yaml --push + docker buildx build . -t ghcr.io/sozercan/orca2:13b -f ${MODELS_PATH}/orca-2-13b.yaml --push + + # cuda + CUDA_MODELS_PATH=models/cuda + docker buildx build . -t ghcr.io/sozercan/llama2:7b-cuda -f ${CUDA_MODELS_PATH}/llama-2-7b-chat.yaml --push + docker buildx build . -t ghcr.io/sozercan/llama2:13b-cuda -f ${CUDA_MODELS_PATH}/llama-2-13b-chat.yaml --push + docker buildx build . -t ghcr.io/sozercan/orca2:13b-cuda -f ${CUDA_MODELS_PATH}/orca-2-13b.yaml --push \ No newline at end of file diff --git a/.gitignore b/.gitignore index 3b735ec4..9108c945 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,6 @@ # Go workspace file go.work + +bin +coverage.txt diff --git a/.golangci.yaml b/.golangci.yaml new file mode 100644 index 00000000..0a828406 --- /dev/null +++ b/.golangci.yaml @@ -0,0 +1,40 @@ +run: + timeout: 5m + +linters-settings: + # gocritic: + # enabled-tags: + # - performance + lll: + line-length: 200 + + misspell: + locale: US + staticcheck: + go: "1.21" + +linters: + disable-all: true + enable: + - errcheck + - errorlint + - exportloopref + - forcetypeassert + - gci + - gocritic + - goconst + - godot + - gofmt + - gofumpt + - goimports + - gosec + - gosimple + - govet + - ineffassign + - misspell + - revive + - staticcheck + - typecheck + - unconvert + - unused + - whitespace diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..2a398d85 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM golang:1.21-bullseye as builder +COPY . /go/src/github.com/sozercan/aikit +WORKDIR /go/src/github.com/sozercan/aikit +RUN CGO_ENABLED=0 go build -o /aikit --ldflags '-extldflags "-static"' ./cmd/frontend + +FROM scratch +COPY --from=builder /aikit /bin/aikit +ENTRYPOINT ["/bin/aikit"] diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..bbe36043 --- /dev/null +++ b/Makefile @@ -0,0 +1,44 @@ +REGISTRY ?= ghcr.io/sozercan +BATS_TESTS_FILE ?= test/bats/test.bats +BATS_VERSION ?= 1.10.0 +KIND_VERSION ?= 0.20.0 +KUBERNETES_VERSION ?= 1.28.0 +TAG ?= test +OUTPUT_TYPE ?= type=docker +PULL ?= +NO_CACHE ?= + +.PHONY: lint +lint: + golangci-lint run -v ./... --timeout 5m + +.PHONY: build-aikit +build-aikit: + docker buildx build . -t ${REGISTRY}/aikit:${TAG} --output=${OUTPUT_TYPE} + +.PHONY: build-test-model +build-test-model: + docker buildx build . -t ${REGISTRY}/testmodel:${TAG} -f test/aikitfile.yaml --output=${OUTPUT_TYPE} + +.PHONY: run-test-model +run-test-model: + docker run -p 8080:8080 ${REGISTRY}/testmodel:${TAG} + +.PHONY: test +test: + go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic + +.PHONY: test-e2e-dependencies +test-e2e-dependencies: + mkdir -p ${GITHUB_WORKSPACE}/bin + echo "${GITHUB_WORKSPACE}/bin" >> ${GITHUB_PATH} + + curl -sSLO https://github.com/bats-core/bats-core/archive/v${BATS_VERSION}.tar.gz && tar -zxvf v${BATS_VERSION}.tar.gz && bash bats-core-${BATS_VERSION}/install.sh ${GITHUB_WORKSPACE} + + # used for kubernetes test + curl -sSL https://dl.k8s.io/release/v${KUBERNETES_VERSION}/bin/linux/amd64/kubectl -o ${GITHUB_WORKSPACE}/bin/kubectl && chmod +x ${GITHUB_WORKSPACE}/bin/kubectl + curl -sSL https://github.com/kubernetes-sigs/kind/releases/download/v${KIND_VERSION}/kind-linux-amd64 -o ${GITHUB_WORKSPACE}/bin/kind && chmod +x ${GITHUB_WORKSPACE}/bin/kind + +.PHONY: test-e2e +test-e2e: + /home/runner/work/aikit/aikit/bin/bats --verbose-run --trace ${BATS_TESTS_FILE} diff --git a/README.md b/README.md index cf24d118..beb34892 100644 --- a/README.md +++ b/README.md @@ -1 +1,193 @@ -# aikit \ No newline at end of file +# AIKit ✨ + +

+
+

+ +AIKit is a quick, easy, and local or cloud-agnostic way to get started to host and deploy large language models (LLMs) for inference. No GPU, internet access or additional tools are needed to get started except for [Docker](https://docs.docker.com/desktop/install/linux-install/)! + +AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), to send requests to open-source LLMs powered by AIKit! + +> [!NOTE] +> At this time, AIKit is tested with LocalAI `llama` backend. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends. + +## Features + +- 🐳 No GPU, internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)! +- 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image +- 🚀 Easy to use declarative configuration +- ✨ OpenAI API compatible to use with any OpenAI API compatible client +- 🚢 Kubernetes deployment ready +- 📦 Supports multiple models with a single image +- 🖥️ Supports GPU-accelerated inferencing with NVIDIA GPUs + +## Demos + +### Building an image with a Llama 2 model + +[](https://asciinema.org/a/J9bitkONKPvedSfU1RkrmVEhD) + +### Inference + +[](https://asciinema.org/a/DYh5bCQMNPSis1whhsfPeMOoM) + +## Pre-made Models + +AIKit comes with pre-made models that you can use out-of-the-box! + +### CPU +- 🦙 Llama 2 7B Chat: `ghcr.io/sozercan/llama2:7b` +- 🦙 Llama 2 13B Chat: `ghcr.io/sozercan/llama2:13b` +- 🐬 Orca 2 13B: `ghcr.io/sozercan/orca2:13b` + +### NVIDIA CUDA + +- 🦙 Llama 2 7B Chat (CUDA): `ghcr.io/sozercan/llama2:7b-cuda` +- 🦙 Llama 2 13B Chat (CUDA): `ghcr.io/sozercan/llama2:13b-cuda` +- 🐬 Orca 2 13B (CUDA): `ghcr.io/sozercan/orca2:13b-cuda` + +> CUDA models includes CUDA v12. They are used with [NVIDIA GPU acceleration](#gpu-acceleration-support). + +## Quick Start + +### Creating an image + +> [!NOTE] +> This section shows how to create a custom image with models of your choosing. If you want to use one of the pre-made models, skip to [running models](#running-models). +> +> Please see [models folder](./models/) for pre-made model definitions. You can find more model examples at [go-skynet/model-gallery](https://github.com/go-skynet/model-gallery). + +Create an `aikitfile.yaml` with the following structure: + +```yaml +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +models: + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf +``` + +> [!TIP] +> This is the simplest way to get started. For full `aikitfile` specification, see [specs](docs/specs.md). + +First, create a buildx buildkit instance. Alternatively, if you are using Docker v24 with [containerd image store](https://docs.docker.com/storage/containerd/) enabled, you can skip this step. + +```bash +docker buildx create --use --name aikit-builder +``` + +Then build your image with: + +```bash +docker buildx build . -t my-model -f aikitfile.yaml --load +``` + +This will build a local container image with your model(s). You can see the image with: + +```bash +docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +my-model latest e7b7c5a4a2cb About an hour ago 5.51GB +``` + +### Running models + +You can start the inferencing server for your models with: + +```bash +# for pre-made models, replace "my-model" with the image name +docker run -d --rm -p 8080:8080 my-model +``` + +You can then send requests to `localhost:8080` to run inference from your models. For example: + +```bash +curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llama-2-7b-chat", + "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] + }' +{"created":1701236489,"object":"chat.completion","id":"dd1ff40b-31a7-4418-9e32-42151ab6875a","model":"llama-2-7b-chat","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"\nKubernetes is a container orchestration system that automates the deployment, scaling, and management of containerized applications in a microservices architecture."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} +``` + +## Kubernetes Deployment + +It is easy to get started to deploy your models to Kubernetes! + +Make sure you have a Kubernetes cluster running and `kubectl` is configured to talk to it, and your model images are accessible from the cluster. You can use [kind](https://kind.sigs.k8s.io/) to create a local Kubernetes cluster for testing purposes. + +```bash +# create a deployment +# for pre-made models, replace "my-model" with the image name +kubectl create deployment my-llm-deployment --image=my-model + +# expose it as a service +kubectl expose deployment my-llm-deployment --port=8080 --target-port=8080 --name=my-llm-service + +# easy to scale up and down +kubectl scale deployment my-llm-deployment --replicas=3 + +# port-forward for testing locally +kubectl port-forward service/my-llm-service 8080:8080 + +# send requests to your model +curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llama-2-7b-chat", + "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] + }' +{"created":1701236489,"object":"chat.completion","id":"dd1ff40b-31a7-4418-9e32-42151ab6875a","model":"llama-2-7b-chat","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"\nKubernetes is a container orchestration system that automates the deployment, scaling, and management of containerized applications in a microservices architecture."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}} +``` + +> [!TIP] +> For an example Kubernetes deployment and service YAML, see [kubernetes folder](./kubernetes/). + +## GPU Acceleration Support + +> [!NOTE] +> At this time, only NVIDIA GPU acceleration is supported. Please open an issue if you'd like to see support for other GPU vendors. + +### NVIDIA + +AIKit supports GPU accelerated inferencing with [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). You must also have [NVIDIA Drivers](https://www.nvidia.com/en-us/drivers/unix/) installed on your host machine. + +For Kubernetes, [NVIDIA GPU Operator](https://github.com/NVIDIA/gpu-operator) provides a streamlined way to install the NVIDIA drivers and container toolkit to configure your cluster to use GPUs. + +To get started with GPU-accelerated inferencing, make sure to set the following in your `aikitfile` and build your model. + +```yaml +runtime: cuda # use NVIDIA CUDA runtime +f16: true # use float16 precision +gpu_layers: 35 # number of layers to offload to GPU +low_vram: true # for devices with low VRAM +``` + +> [!TIP] +> Make sure to customize these values based on your model and GPU specs. + +After building the model, you can run it with [`--gpus all`](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html#gpu-enumeration) flag to enable GPU support: + +```bash +# for pre-made models, replace "my-model" with the image name +docker run --rm --gpus all -p 8080:8080 my-model +``` + +If GPU acceleration is working, you'll see output that is similar to following in the debug logs: + +```bash +5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr ggml_init_cublas: found 1 CUDA devices: +5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr Device 0: Tesla T4, compute capability 7.5 +... +5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: using CUDA for GPU acceleration +5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: mem required = 70.41 MB (+ 2048.00 MB per state) +5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading 32 repeating layers to GPU +5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading non-repeating layers to GPU +5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading v cache to GPU +5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading k cache to GPU +5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloaded 35/35 layers to GPU +5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: VRAM used: 5869 MB +``` + +## Acknowledgements and Credits + +- [LocalAI](https://localai.io/) for providing the inference engine +- [Mockerfile](https://github.com/r2d4/mockerfile) for the inspiration and sample code +- [Huggingface](https://huggingface.co/) and [TheBloke](https://huggingface.co/TheBloke) for providing the models diff --git a/cmd/frontend/main.go b/cmd/frontend/main.go new file mode 100644 index 00000000..37ae5c5c --- /dev/null +++ b/cmd/frontend/main.go @@ -0,0 +1,24 @@ +package main + +import ( + "os" + + "github.com/moby/buildkit/frontend/gateway/grpcclient" + "github.com/moby/buildkit/util/appcontext" + "github.com/moby/buildkit/util/bklog" + "github.com/sirupsen/logrus" + "github.com/sozercan/aikit/pkg/build" + "google.golang.org/grpc/grpclog" +) + +func main() { + bklog.L.Logger.SetOutput(os.Stderr) + grpclog.SetLoggerV2(grpclog.NewLoggerV2WithVerbosity(bklog.L.WriterLevel(logrus.InfoLevel), bklog.L.WriterLevel(logrus.WarnLevel), bklog.L.WriterLevel(logrus.ErrorLevel), 1)) + + ctx := appcontext.Context() + + if err := grpcclient.RunFromEnvironment(ctx, build.Build); err != nil { + bklog.L.WithError(err).Fatal("error running frontend") + os.Exit(1) + } +} diff --git a/docs/images/logo.png b/docs/images/logo.png new file mode 100644 index 00000000..31caeb11 Binary files /dev/null and b/docs/images/logo.png differ diff --git a/docs/specs.md b/docs/specs.md new file mode 100644 index 00000000..b1cf6df9 --- /dev/null +++ b/docs/specs.md @@ -0,0 +1,58 @@ +# API Specifications + +## v1alpha1 + +```yaml +apiVersion: # required. only v1alpha1 is supported at the moment +debug: # optional. if set to true, will print debug logs +runtime: # optional. defaults to avx. can be avx, avx2, avx512, cuda +models: # required. list of models to build + - name: # required. name of the model + source: # required. source of the model. must be a url + sha256: # optional. sha256 hash of the model file + promptTemplates: # optional. list of prompt templates for a model + - name: # required. name of the template + template: # required. template string +config: # optional. list of config files +``` + +Example: + +```yaml +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +debug: true +runtime: cuda +models: + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf + sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" + promptTemplates: + - name: "llama-2-7b-chat" + template: | + {{if eq .RoleName \"assistant\"}}{{.Content}}{{else}} + [INST] + {{if .SystemPrompt}}{{.SystemPrompt}}{{else if eq .RoleName \"system\"}}<>{{.Content}}<> + + {{else if .Content}}{{.Content}}{{end}} + [/INST] + {{end}} +config: | + - name: \"llama-2-7b-chat\" + backend: \"llama\" + parameters: + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: \"llama-2-7b-chat.Q4_K_M.gguf\" + context_size: 4096 + roles: + function: 'Function Result:' + assistant_function_call: 'Function Call:' + assistant: 'Assistant:' + user: 'User:' + system: 'System:' + template: + chat_message: \"llama-2-7b-chat\" + system_prompt: \"You are a helpful assistant, below is a conversation, please respond with the next message and do not ask follow-up questions\" +``` diff --git a/go.mod b/go.mod new file mode 100644 index 00000000..72149b4c --- /dev/null +++ b/go.mod @@ -0,0 +1,56 @@ +module github.com/sozercan/aikit + +go 1.21 + +require ( + github.com/containerd/containerd v1.7.9 + github.com/moby/buildkit v0.12.3 + github.com/opencontainers/image-spec v1.1.0-rc5 + github.com/pkg/errors v0.9.1 + github.com/sirupsen/logrus v1.9.3 + google.golang.org/grpc v1.59.0 + gopkg.in/yaml.v2 v2.4.0 +) + +require ( + cloud.google.com/go/compute/metadata v0.2.3 // indirect + github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect + github.com/Microsoft/go-winio v0.6.1 // indirect + github.com/Microsoft/hcsshim v0.11.4 // indirect + github.com/containerd/continuity v0.4.2 // indirect + github.com/containerd/log v0.1.0 // indirect + github.com/containerd/ttrpc v1.2.2 // indirect + github.com/containerd/typeurl/v2 v2.1.1 // indirect + github.com/docker/distribution v2.8.2+incompatible // indirect + github.com/docker/docker v24.0.0-rc.2.0.20230718135204-8e51b8b59cb8+incompatible // indirect + github.com/docker/go-units v0.5.0 // indirect + github.com/go-logr/logr v1.2.4 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/gogo/googleapis v1.4.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.3 // indirect + github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect + github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/in-toto/in-toto-golang v0.5.0 // indirect + github.com/klauspost/compress v1.16.3 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/moby/locker v1.0.1 // indirect + github.com/moby/sys/signal v0.7.0 // indirect + github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/secure-systems-lab/go-securesystemslib v0.4.0 // indirect + github.com/shibumi/go-pathspec v1.3.0 // indirect + github.com/tonistiigi/fsutil v0.0.0-20230629203738-36ef4d8c0dbb // indirect + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.45.0 // indirect + go.opentelemetry.io/otel v1.19.0 // indirect + go.opentelemetry.io/otel/metric v1.19.0 // indirect + go.opentelemetry.io/otel/trace v1.19.0 // indirect + golang.org/x/crypto v0.14.0 // indirect + golang.org/x/mod v0.11.0 // indirect + golang.org/x/net v0.17.0 // indirect + golang.org/x/sync v0.3.0 // indirect + golang.org/x/sys v0.13.0 // indirect + golang.org/x/text v0.13.0 // indirect + golang.org/x/tools v0.10.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/protobuf v1.31.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 00000000..b165cb49 --- /dev/null +++ b/go.sum @@ -0,0 +1,245 @@ +cloud.google.com/go v0.26.0 h1:e0WKqKTd5BnrG8aKH3J3h+QvEIQtSUcf2n5UZ5ZgLtQ= +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY= +cloud.google.com/go/compute v1.23.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= +cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= +cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= +github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= +github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7fz8= +github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= +github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb h1:EDmT6Q9Zs+SbUoc7Ik9EfrFqcylYqgPZ9ANSbTAntnE= +github.com/codahale/rfc6979 v0.0.0-20141003034818-6a90f24967eb/go.mod h1:ZjrT6AXHbDs86ZSdt/osfBi5qfexBrKUdONk989Wnk4= +github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= +github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= +github.com/containerd/containerd v1.7.9 h1:KOhK01szQbM80YfW1H6RZKh85PHGqY/9OcEZ35Je8sc= +github.com/containerd/containerd v1.7.9/go.mod h1:0/W44LWEYfSHoxBtsHIiNU/duEkgpMokemafHVCpq9Y= +github.com/containerd/continuity v0.4.2 h1:v3y/4Yz5jwnvqPKJJ+7Wf93fyWoCB3F5EclWG023MDM= +github.com/containerd/continuity v0.4.2/go.mod h1:F6PTNCKepoxEaXLQp3wDAjygEnImnZ/7o4JzpodfroQ= +github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= +github.com/containerd/ttrpc v1.2.2 h1:9vqZr0pxwOF5koz6N0N3kJ0zDHokrcPxIR/ZR2YFtOs= +github.com/containerd/ttrpc v1.2.2/go.mod h1:sIT6l32Ph/H9cvnJsfXM5drIVzTr5A2flTf1G5tYZak= +github.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9FqcttATPO/4= +github.com/containerd/typeurl/v2 v2.1.1/go.mod h1:IDp2JFvbwZ31H8dQbEIY7sDl2L3o3HZj1hsSQlywkQ0= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8= +github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= +github.com/docker/docker v24.0.0-rc.2.0.20230718135204-8e51b8b59cb8+incompatible h1:qMc+sk+l2GSLokgRA1uuKgkUVQ/vhAm4LYHC5rtSMq0= +github.com/docker/docker v24.0.0-rc.2.0.20230718135204-8e51b8b59cb8+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= +github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBFApVqftFV6k087DA= +github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= +github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/gogo/googleapis v1.4.1 h1:1Yx4Myt7BxzvUr5ldGSbwYiZG6t9wGBZ+8/fX3Wvtq0= +github.com/gogo/googleapis v1.4.1/go.mod h1:2lpHqI5OcWCtVElxXnPt+s8oJvMpySlOyM6xDCrzib4= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= +github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/in-toto/in-toto-golang v0.5.0 h1:hb8bgwr0M2hGdDsLjkJ3ZqJ8JFLL/tgYdAxF/XEFBbY= +github.com/in-toto/in-toto-golang v0.5.0/go.mod h1:/Rq0IZHLV7Ku5gielPT4wPHJfH1GdHMCq8+WPxw8/BE= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY= +github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs= +github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/moby/buildkit v0.12.3 h1:cFaPVnyC0PwAP5xHHfzdU5v9rgQrCi6HnGSg3WuFKp4= +github.com/moby/buildkit v0.12.3/go.mod h1:adB4y0SxxX8trnrY+oEulb48ODLqPO6pKMF0ppGcCoI= +github.com/moby/locker v1.0.1 h1:fOXqR41zeveg4fFODix+1Ch4mj/gT0NE1XJbp/epuBg= +github.com/moby/locker v1.0.1/go.mod h1:S7SDdo5zpBK84bzzVlKr2V0hz+7x9hWbYC/kq7oQppc= +github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vygl78= +github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI= +github.com/moby/sys/signal v0.7.0 h1:25RW3d5TnQEoKvRbEKUGay6DCQ46IxAVTT9CUMgmsSI= +github.com/moby/sys/signal v0.7.0/go.mod h1:GQ6ObYZfqacOwTtlXvcmh9A26dVRul/hbOZn88Kg8Tg= +github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= +github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI= +github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= +github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= +github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= +github.com/secure-systems-lab/go-securesystemslib v0.4.0 h1:b23VGrQhTA8cN2CbBw7/FulN9fTtqYUdS5+Oxzt+DUE= +github.com/secure-systems-lab/go-securesystemslib v0.4.0/go.mod h1:FGBZgq2tXWICsxWQW1msNf49F0Pf2Op5Htayx335Qbs= +github.com/shibumi/go-pathspec v1.3.0 h1:QUyMZhFo0Md5B8zV8x2tesohbb5kfbpTi9rBnKh5dkI= +github.com/shibumi/go-pathspec v1.3.0/go.mod h1:Xutfslp817l2I1cZvgcfeMQJG5QnU2lh5tVaaMCl3jE= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= +github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/tonistiigi/fsutil v0.0.0-20230629203738-36ef4d8c0dbb h1:uUe8rNyVXM8moActoBol6Xf6xX2GMr7SosR2EywMvGg= +github.com/tonistiigi/fsutil v0.0.0-20230629203738-36ef4d8c0dbb/go.mod h1:SxX/oNQ/ag6Vaoli547ipFK9J7BZn5JqJG0JE8lf8bA= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= +go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.45.0 h1:RsQi0qJ2imFfCvZabqzM9cNXBG8k6gXMv1A0cXRmH6A= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.45.0/go.mod h1:vsh3ySueQCiKPxFLvjWC4Z135gIa34TQ/NSqkDTZYUM= +go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= +go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY= +go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE= +go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8= +go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg= +go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc= +golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.11.0 h1:bUO06HqtnRcc/7l71XBe4WcqTZ+3AH1J59zWDDwLKgU= +golang.org/x/mod v0.11.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= +golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.11.0 h1:vPL4xzxBM4niKCW6g9whtaWVXTJf1U5e4aZxxFx/gbU= +golang.org/x/oauth2 v0.11.0/go.mod h1:LdF7O/8bLR/qWK9DrpXmbHLTouvRHK0SgJl0GmDBchk= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek= +golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.10.0 h1:tvDr/iQoUqNdohiYm0LmmKcBk+q86lb9EprIUFhHHGg= +golang.org/x/tools v0.10.0/go.mod h1:UJwyiVBsOA2uwvK/e5OY3GTpDUJriEd+/YlqAwLPmyM= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= +google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= +google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= +google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/kubernetes/llama-2-7b-chat.yaml b/kubernetes/llama-2-7b-chat.yaml new file mode 100644 index 00000000..d7e1aecd --- /dev/null +++ b/kubernetes/llama-2-7b-chat.yaml @@ -0,0 +1,33 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llama2 + labels: + app: llama2 +spec: + replicas: 3 + selector: + matchLabels: + app: llama2 + template: + metadata: + labels: + app: llama2 + spec: + containers: + - name: llama2 + image: ghcr.io/sozercan/llama2:7b + ports: + - containerPort: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: llama2 +spec: + selector: + app: llama2 + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 diff --git a/kubernetes/orca-2-13b.yaml b/kubernetes/orca-2-13b.yaml new file mode 100644 index 00000000..1578a244 --- /dev/null +++ b/kubernetes/orca-2-13b.yaml @@ -0,0 +1,33 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: orca2 + labels: + app: orca2 +spec: + replicas: 3 + selector: + matchLabels: + app: orca2 + template: + metadata: + labels: + app: orca2 + spec: + containers: + - name: orca2 + image: ghcr.io/sozercan/orca2:13b + ports: + - containerPort: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: orca2 +spec: + selector: + app: orca2 + ports: + - protocol: TCP + port: 8080 + targetPort: 8080 diff --git a/models/cuda/llama-2-13b-chat.yaml b/models/cuda/llama-2-13b-chat.yaml new file mode 100644 index 00000000..01f98e35 --- /dev/null +++ b/models/cuda/llama-2-13b-chat.yaml @@ -0,0 +1,21 @@ +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +debug: true +runtime: cuda +models: + - name: llama-2-13b-chat + source: https://huggingface.co/TheBloke/Llama-2-13B-Chat-GGUF/resolve/main/llama-2-13b-chat.Q4_K_M.gguf + sha256: 7ddfe27f61bf994542c22aca213c46ecbd8a624cca74abff02a7b5a8c18f787f +config: | + - name: llama-2-13b-chat + backend: llama + parameters: + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: llama-2-13b-chat.Q4_K_M.gguf + context_size: 4096 + gpu_layers: 43 + f16: true + batch: 512 + mmap: true diff --git a/models/cuda/llama-2-7b-chat.yaml b/models/cuda/llama-2-7b-chat.yaml new file mode 100644 index 00000000..e0eede81 --- /dev/null +++ b/models/cuda/llama-2-7b-chat.yaml @@ -0,0 +1,21 @@ +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +debug: true +runtime: cuda +models: + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf + sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" +config: | + - name: llama-2-7b-chat + backend: llama + parameters: + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: llama-2-7b-chat.Q4_K_M.gguf + context_size: 4096 + gpu_layers: 35 + f16: true + batch: 512 + mmap: true diff --git a/models/cuda/orca-2-13b.yaml b/models/cuda/orca-2-13b.yaml new file mode 100644 index 00000000..8abec499 --- /dev/null +++ b/models/cuda/orca-2-13b.yaml @@ -0,0 +1,20 @@ +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +debug: true +models: + - name: orca-2-13b + source: https://huggingface.co/TheBloke/Orca-2-13B-GGUF/resolve/main/orca-2-13b.Q4_K_M.gguf + sha256: d37ea225dbe22318a4784a458a1832e34193d46f01a31e0b62e3a841fb8ec9ce +config: | + - name: orca-2-13b + backend: llama + parameters: + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: orca-2-13b.Q4_K_M.gguf + context_size: 4096 + gpu_layers: 43 + f16: true + batch: 512 + mmap: true diff --git a/models/llama-2-13b-chat.yaml b/models/llama-2-13b-chat.yaml new file mode 100644 index 00000000..2fb46789 --- /dev/null +++ b/models/llama-2-13b-chat.yaml @@ -0,0 +1,16 @@ +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +debug: true +models: + - name: llama-2-13b-chat + source: https://huggingface.co/TheBloke/Llama-2-13B-Chat-GGUF/resolve/main/llama-2-13b-chat.Q4_K_M.gguf + sha256: 7ddfe27f61bf994542c22aca213c46ecbd8a624cca74abff02a7b5a8c18f787f +config: | + - name: llama-2-13b-chat + backend: llama + parameters: + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: llama-2-13b-chat.Q4_K_M.gguf + context_size: 4096 diff --git a/models/llama-2-7b-chat.yaml b/models/llama-2-7b-chat.yaml new file mode 100644 index 00000000..9f55c7ea --- /dev/null +++ b/models/llama-2-7b-chat.yaml @@ -0,0 +1,16 @@ +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +debug: true +models: + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf + sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" +config: | + - name: llama-2-7b-chat + backend: llama + parameters: + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: llama-2-7b-chat.Q4_K_M.gguf + context_size: 4096 diff --git a/models/orca-2-13b.yaml b/models/orca-2-13b.yaml new file mode 100644 index 00000000..8d5c21f3 --- /dev/null +++ b/models/orca-2-13b.yaml @@ -0,0 +1,16 @@ +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +debug: true +models: + - name: orca-2-13b + source: https://huggingface.co/TheBloke/Orca-2-13B-GGUF/resolve/main/orca-2-13b.Q4_K_M.gguf + sha256: d37ea225dbe22318a4784a458a1832e34193d46f01a31e0b62e3a841fb8ec9ce +config: | + - name: orca-2-13b + backend: llama + parameters: + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: orca-2-13b.Q4_K_M.gguf + context_size: 4096 diff --git a/pkg/aikit/config/specs.go b/pkg/aikit/config/specs.go new file mode 100644 index 00000000..ea9b26bf --- /dev/null +++ b/pkg/aikit/config/specs.go @@ -0,0 +1,34 @@ +package config + +import ( + "github.com/pkg/errors" + yaml "gopkg.in/yaml.v2" +) + +func NewFromBytes(b []byte) (*Config, error) { + c := &Config{} + if err := yaml.Unmarshal(b, c); err != nil { + return nil, errors.Wrap(err, "unmarshal config") + } + return c, nil +} + +type Config struct { + APIVersion string `yaml:"apiVersion"` + Debug bool `yaml:"debug,omitempty"` + Runtime string `yaml:"runtime,omitempty"` + Models []Model `yaml:"models"` + Config string `yaml:"config,omitempty"` +} + +type Model struct { + Name string `yaml:"name"` + Source string `yaml:"source"` + SHA256 string `yaml:"sha256,omitempty"` + PromptTemplates []PromptTemplate `yaml:"promptTemplates,omitempty"` +} + +type PromptTemplate struct { + Name string `yaml:"name,omitempty"` + Template string `yaml:"template,omitempty"` +} diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go new file mode 100644 index 00000000..15fd38bb --- /dev/null +++ b/pkg/aikit2llb/convert.go @@ -0,0 +1,111 @@ +package aikit2llb + +import ( + "fmt" + "net/url" + "path" + + "github.com/moby/buildkit/client/llb" + specs "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/sozercan/aikit/pkg/aikit/config" + "github.com/sozercan/aikit/pkg/utils" +) + +const ( + debianSlim = "docker.io/library/debian:12-slim" + distrolessBase = "gcr.io/distroless/cc-debian12:latest" + localAIVersion = "v1.40.0" + retryCount = 5 + cudaVersion = "12-3" +) + +func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) { + var merge llb.State + s := llb.Image(debianSlim) + s = curl(s) + if c.Runtime == utils.RuntimeNVIDIA { + s, merge = installCuda(s) + } else { + merge = llb.Image(distrolessBase) + } + s, merge = copyModels(s, merge, c) + s = addLocalAI(c, s, merge) + imageCfg := NewImageConfig(c) + return s, imageCfg +} + +func copyModels(s llb.State, merge llb.State, c *config.Config) (llb.State, llb.State) { + initState := s + + // create config file if defined + if c.Config != "" { + s = s.Run(shf("echo \"%s\" >> /config.yaml", c.Config)).Root() + } + + for _, model := range c.Models { + s = s.Run(llb.Shlexf("curl --retry %d --create-dirs -sSLO --output-dir /models %s", retryCount, model.Source)).Root() + // verify sha256 checksum if defined + if model.SHA256 != "" { + path := fmt.Sprintf("/models/%s", fileNameFromURL(model.Source)) + s = s.Run(shf("echo \"%s %s\" | sha256sum -c -", model.SHA256, path)).Root() + } + // create prompt templates if defined + for _, pt := range model.PromptTemplates { + if pt.Name != "" && pt.Template != "" { + s = s.Run(shf("echo \"%s\" >> /models/%s.tmpl", pt.Template, pt.Name)).Root() + } + } + } + diff := llb.Diff(initState, s) + merge = llb.Merge([]llb.State{merge, diff}) + return s, merge +} + +func fileNameFromURL(urlString string) string { + parsedURL, err := url.Parse(urlString) + if err != nil { + panic(err) + } + return path.Base(parsedURL.Path) +} + +func curl(s llb.State) llb.State { + i := s.Run(llb.Shlex("apt-get update"), llb.IgnoreCache).Root() + return i.Run(llb.Shlex("apt-get install curl -y")).Root() +} + +func installCuda(s llb.State) (llb.State, llb.State) { + initState := s + + s = s.Run(shf("curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root() + s = s.Run(llb.Shlex("apt-get update"), llb.IgnoreCache).Root() + s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root() + + diff := llb.Diff(initState, s) + merge := llb.Merge([]llb.State{llb.Image(distrolessBase), diff}) + return s, merge +} + +func addLocalAI(c *config.Config, s llb.State, merge llb.State) llb.State { + initState := s + var localAIURL string + switch c.Runtime { + case utils.RuntimeNVIDIA: + localAIURL = fmt.Sprintf("https://sertacstorage.blob.core.windows.net/localai/%s/local-ai", localAIVersion) + case utils.RuntimeCPUAVX2: + localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx2-Linux-x86_64", localAIVersion) + case utils.RuntimeCPUAVX512: + localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx512-Linux-x86_64", localAIVersion) + case utils.RuntimeCPUAVX, "": + localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%s/local-ai-avx-Linux-x86_64", localAIVersion) + } + + s = s.Run(llb.Shlexf("curl -Lo /usr/bin/local-ai %s", localAIURL)).Root() + s = s.Run(llb.Shlex("chmod +x /usr/bin/local-ai")).Root() + diff := llb.Diff(initState, s) + return llb.Merge([]llb.State{merge, diff}) +} + +func shf(cmd string, v ...interface{}) llb.RunOption { + return llb.Args([]string{"/bin/sh", "-c", fmt.Sprintf(cmd, v...)}) +} diff --git a/pkg/aikit2llb/image.go b/pkg/aikit2llb/image.go new file mode 100644 index 00000000..75e5972b --- /dev/null +++ b/pkg/aikit2llb/image.go @@ -0,0 +1,50 @@ +package aikit2llb + +import ( + "github.com/moby/buildkit/util/system" + specs "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/sozercan/aikit/pkg/aikit/config" + "github.com/sozercan/aikit/pkg/utils" +) + +func NewImageConfig(c *config.Config) *specs.Image { + img := emptyImage(c) + var debug, config string + if c.Debug { + debug = "--debug" + } + if c.Config != "" { + config = "--config-file=/config.yaml" + } + img.Config.Entrypoint = []string{"local-ai", debug, config} + return img +} + +func emptyImage(c *config.Config) *specs.Image { + img := &specs.Image{ + Platform: specs.Platform{ + Architecture: "amd64", + OS: "linux", + }, + } + img.RootFS.Type = "layers" + img.Config.WorkingDir = "/" + + cudaEnv := []string{ + "PATH=" + system.DefaultPathEnv("linux") + ":/usr/local/cuda/bin", + "NVIDIA_REQUIRE_CUDA=cuda>=12.0", + "NVIDIA_DRIVER_CAPABILITIES=compute,utility", + "NVIDIA_VISIBLE_DEVICES=all", + "LD_LIBRARY_PATH=/usr/local/cuda/lib64", + } + + if c.Runtime == utils.RuntimeNVIDIA { + img.Config.Env = cudaEnv + } else { + img.Config.Env = []string{ + "PATH=" + system.DefaultPathEnv("linux"), + } + } + + return img +} diff --git a/pkg/build/build.go b/pkg/build/build.go new file mode 100644 index 00000000..f80eb249 --- /dev/null +++ b/pkg/build/build.go @@ -0,0 +1,137 @@ +package build + +import ( + "context" + "encoding/json" + "fmt" + "slices" + + "github.com/containerd/containerd/platforms" + "github.com/moby/buildkit/client/llb" + "github.com/moby/buildkit/exporter/containerimage/exptypes" + "github.com/moby/buildkit/frontend/dockerui" + "github.com/moby/buildkit/frontend/gateway/client" + "github.com/pkg/errors" + "github.com/sozercan/aikit/pkg/aikit/config" + "github.com/sozercan/aikit/pkg/aikit2llb" + "github.com/sozercan/aikit/pkg/utils" +) + +const ( + LocalNameDockerfile = "dockerfile" + keyFilename = "filename" + defaultDockerfileName = "aikitfile.yaml" +) + +func Build(ctx context.Context, c client.Client) (*client.Result, error) { + cfg, err := getAikitfileConfig(ctx, c) + if err != nil { + return nil, errors.Wrap(err, "getting aikitfile") + } + + err = validateConfig(cfg) + if err != nil { + return nil, errors.Wrap(err, "validating aikitfile") + } + + st, img := aikit2llb.Aikit2LLB(cfg) + + def, err := st.Marshal(ctx) + if err != nil { + return nil, errors.Wrapf(err, "failed to marshal local source") + } + res, err := c.Solve(ctx, client.SolveRequest{ + Definition: def.ToPB(), + }) + if err != nil { + return nil, errors.Wrapf(err, "failed to resolve dockerfile") + } + ref, err := res.SingleRef() + if err != nil { + return nil, err + } + + config, err := json.Marshal(img) + if err != nil { + return nil, errors.Wrapf(err, "failed to marshal image config") + } + k := platforms.Format(platforms.DefaultSpec()) + + res.AddMeta(fmt.Sprintf("%s/%s", exptypes.ExporterImageConfigKey, k), config) + res.SetRef(ref) + + return res, nil +} + +func getAikitfileConfig(ctx context.Context, c client.Client) (*config.Config, error) { + opts := c.BuildOpts().Opts + filename := opts[keyFilename] + if filename == "" { + filename = defaultDockerfileName + } + + name := "load aikitfile" + if filename != "aikitfile" { + name += " from " + filename + } + + src := llb.Local(LocalNameDockerfile, + llb.IncludePatterns([]string{filename}), + llb.SessionID(c.BuildOpts().SessionID), + llb.SharedKeyHint(defaultDockerfileName), + dockerui.WithInternalName(name), + ) + + def, err := src.Marshal(ctx) + if err != nil { + return nil, errors.Wrapf(err, "failed to marshal local source") + } + + var dtDockerfile []byte + res, err := c.Solve(ctx, client.SolveRequest{ + Definition: def.ToPB(), + }) + if err != nil { + return nil, errors.Wrapf(err, "failed to resolve dockerfile") + } + + ref, err := res.SingleRef() + if err != nil { + return nil, err + } + + dtDockerfile, err = ref.ReadFile(ctx, client.ReadRequest{ + Filename: filename, + }) + if err != nil { + return nil, errors.Wrapf(err, "failed to read dockerfile") + } + + cfg, err := config.NewFromBytes(dtDockerfile) + if err != nil { + return nil, errors.Wrap(err, "getting config") + } + + return cfg, nil +} + +func validateConfig(c *config.Config) error { + if c.APIVersion == "" { + return errors.New("apiVersion is not defined") + } + + if c.APIVersion != utils.APIv1alpha1 { + return errors.Errorf("apiVersion %s is not supported", c.APIVersion) + } + + if len(c.Models) == 0 { + return errors.New("no models defined") + } + + runtimes := []string{"", utils.RuntimeNVIDIA, utils.RuntimeCPUAVX, utils.RuntimeCPUAVX2, utils.RuntimeCPUAVX512} + if !slices.Contains(runtimes, c.Runtime) { + return errors.Errorf("runtime %s is not supported", c.Runtime) + } + + return nil +} diff --git a/pkg/utils/const.go b/pkg/utils/const.go new file mode 100644 index 00000000..cabba268 --- /dev/null +++ b/pkg/utils/const.go @@ -0,0 +1,10 @@ +package utils + +const ( + RuntimeNVIDIA = "cuda" + RuntimeCPUAVX = "avx" + RuntimeCPUAVX2 = "avx2" + RuntimeCPUAVX512 = "avx512" + + APIv1alpha1 = "v1alpha1" +) diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml new file mode 100644 index 00000000..98ff34dc --- /dev/null +++ b/test/aikitfile-cuda.yaml @@ -0,0 +1,40 @@ +#syntax=ghcr.io/sozercan/aikit:latest +apiVersion: v1alpha1 +debug: true +runtime: cuda +models: + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf + - name: mistral-7b-instruct + source: https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf +config: | + - name: llama-2-7b-chat + backend: llama + parameters: + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: llama-2-7b-chat.Q4_K_M.gguf + context_size: 4096 + gpu_layers: 35 + f16: true + batch: 512 + mmap: true + - name: mistral-7b-instruct + context_size: 4096 + threads: 4 + parameters: + model: mistral-7b-openorca.Q6_K.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + template: + chat_message: chatml + chat: chatml-block + completion: completion + stopwords: + - <|im_end|> + gpu_layers: 35 + f16: true + batch: 512 + mmap: true \ No newline at end of file diff --git a/test/aikitfile-dev.yaml b/test/aikitfile-dev.yaml new file mode 100644 index 00000000..913b75d0 --- /dev/null +++ b/test/aikitfile-dev.yaml @@ -0,0 +1,15 @@ +#syntax=ghcr.io/sozercan/aikit:dev +apiVersion: v1alpha1 +debug: true +models: + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf +config: | + - name: llama-2-7b-chat + backend: llama + parameters: + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: llama-2-7b-chat.Q4_K_M.gguf + context_size: 4096 diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml new file mode 100644 index 00000000..d6606e3b --- /dev/null +++ b/test/aikitfile.yaml @@ -0,0 +1,15 @@ +#syntax=sozercan/aikit:test +apiVersion: v1alpha1 +debug: true +models: + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf +config: | + - name: llama-2-7b-chat + backend: llama + parameters: + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: llama-2-7b-chat.Q4_K_M.gguf + context_size: 4096 diff --git a/test/bats/helpers.bash b/test/bats/helpers.bash new file mode 100644 index 00000000..cefef431 --- /dev/null +++ b/test/bats/helpers.bash @@ -0,0 +1,74 @@ +#!/bin/bash + +assert_success() { + if [[ "$status" != 0 ]]; then + echo "expected: 0" + echo "actual: $status" + echo "output: $output" + return 1 + fi +} + +assert_failure() { + if [[ "$status" == 0 ]]; then + echo "expected: non-zero exit code" + echo "actual: $status" + echo "output: $output" + return 1 + fi +} + +assert_equal() { + if [[ "$1" != "$2" ]]; then + echo "expected: $1" + echo "actual: $2" + return 1 + fi +} + +assert_not_equal() { + if [[ "$1" == "$2" ]]; then + echo "unexpected: $1" + echo "actual: $2" + return 1 + fi +} + +assert_match() { + if [[ ! "$2" =~ $1 ]]; then + echo "expected: $1" + echo "actual: $2" + return 1 + fi +} + +assert_not_match() { + if [[ "$2" =~ $1 ]]; then + echo "expected: $1" + echo "actual: $2" + return 1 + fi +} + +assert_len() { + if [[ "$1" != "${#2}" ]]; then + echo "expected len: $1" + echo "actual len: ${#2} ($2)" + return 1 + fi +} + +wait_for_process() { + wait_time="$1" + sleep_time="$2" + cmd="$3" + while [ "$wait_time" -gt 0 ]; do + if eval "$cmd"; then + return 0 + else + sleep "$sleep_time" + wait_time=$((wait_time - sleep_time)) + fi + done + return 1 +} diff --git a/test/bats/test.bats b/test/bats/test.bats new file mode 100644 index 00000000..90ab1631 --- /dev/null +++ b/test/bats/test.bats @@ -0,0 +1,14 @@ +#!/usr/bin/env bats + +load helpers + +WAIT_TIME=120 +SLEEP_TIME=1 + +@test "send request to llama-2-7b-chat" { + run curl --retry 20 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llama-2-7b-chat", + "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}], + }' + assert_success +}