diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml index 1ffe70ed..d26b2109 100644 --- a/.github/workflows/pre-release.yaml +++ b/.github/workflows/pre-release.yaml @@ -8,9 +8,10 @@ on: permissions: contents: write packages: write + id-token: write jobs: - release: + pre-release: runs-on: ubuntu-latest timeout-minutes: 360 steps: @@ -18,25 +19,43 @@ jobs: - name: cleanup disk space run: | - df -H docker system prune -f -a --volumes sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" sudo rm -rf "$AGENT_TOOLSDIRECTORY" - df -H - - name: Login to ghcr + - name: Install Cosign + uses: sigstore/cosign-installer@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GHCR uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: create buildx builder - run: docker buildx create --use --name builder --bootstrap - - uses: crazy-max/ghaction-github-runtime@v3 + - name: Build and push + uses: docker/build-push-action@v5 + id: build-and-push + with: + push: true + tags: ghcr.io/sozercan/aikit:dev + cache-from: type=gha,scope=aikit + cache-to: type=gha,scope=aikit,mode=max + + - name: Sign the images with GitHub OIDC Token + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} + run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}" - - name: Push aikit:dev to GHCR + - name: Verify image signature + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} run: | - docker buildx build -t ghcr.io/sozercan/aikit:dev --push . + cosign verify ghcr.io/sozercan/aikit@${DIGEST} \ + --certificate-oidc-issuer https://token.actions.githubusercontent.com \ + --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/heads/main diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 5cb7f0cd..488e3d66 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -8,17 +8,31 @@ on: permissions: contents: write packages: write + id-token: write jobs: - release: + release: runs-on: ubuntu-latest timeout-minutes: 360 steps: - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Login to ghcr + - name: cleanup disk space + run: | + docker system prune -f -a --volumes + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Install Cosign + uses: sigstore/cosign-installer@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - uses: crazy-max/ghaction-github-runtime@v3 + + - name: Login to GHCR uses: docker/login-action@v3 with: registry: ghcr.io @@ -29,10 +43,26 @@ jobs: run: | echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV - - name: Push aikit:latest to GHCR - run: | - docker buildx build . -t ghcr.io/sozercan/aikit:${TAG} -t ghcr.io/sozercan/aikit:latest --push --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max + - name: Build and push + uses: docker/build-push-action@v5 + id: build-and-push + with: + push: true + tags: | + ghcr.io/sozercan/aikit:${TAG} + ghcr.io/sozercan/aikit:latest + cache-from: type=gha,scope=aikit + cache-to: type=gha,scope=aikit,mode=max - # - name: release llama 2 image - # run: | - # docker buildx build -t ghcr.io/sozercan/llama2:7b -t ghcr.io/sozercan/llama2:7b-chat -t ghcr.io/sozercan/llama2:chat -t ghcr.io/sozercan/llama2:latest -f models/llama-2-chat-7b.yaml --push --cache-from=type=gha,scope=llama-2-7b-chat --cache-to=type=gha,scope=llama-2-7b-chat,mode=max + - name: Sign the images with GitHub OIDC Token + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} + run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}" + + - name: Verify image signature + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} + run: | + cosign verify ghcr.io/sozercan/aikit@${DIGEST} \ + --certificate-oidc-issuer https://token.actions.githubusercontent.com \ + --certificate-identity https://github.com/sozercan/aikit/.github/workflows/release.yaml@refs/heads/main diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml index e4fc5710..9f93a548 100644 --- a/.github/workflows/test-docker.yaml +++ b/.github/workflows/test-docker.yaml @@ -23,32 +23,60 @@ jobs: - name: cleanup disk space run: | - df -H docker system prune -f -a --volumes sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" sudo rm -rf "$AGENT_TOOLSDIRECTORY" - df -H - - name: create buildx builder - run: docker buildx create --use --name builder --bootstrap + # need containerd image store for testing local images + - uses: crazy-max/ghaction-setup-docker@v2 + with: + daemon-config: | + { + "debug": true, + "features": { + "containerd-snapshotter": true + } + } - uses: crazy-max/ghaction-github-runtime@v3 - name: build aikit - run: docker buildx build . -t sozercan/aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max + run: | + docker buildx build . -t aikit:test \ + --load --progress plain \ + --cache-from=type=gha,scope=aikit \ + --cache-to=type=gha,scope=aikit,mode=max - name: build test model - run: docker buildx build . -t sozercan/testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max + run: | + docker buildx build . -t testmodel:test \ + -f test/aikitfile.yaml \ + --load --progress plain \ + --cache-from=type=gha,scope=testmodel \ + --cache-to=type=gha,scope=testmodel,mode=max - name: list images run: docker images - name: run test model - run: docker run -d -p 8080:8080 sozercan/testmodel:test + run: docker run --name testmodel -d -p 8080:8080 testmodel:test - name: install e2e dependencies run: make test-e2e-dependencies - - name: run bats test - run: make test-e2e + - name: run test + run: | + curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "llama-2-7b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}' + + - name: save logs + if: always() + run: | + docker logs testmodel > docker.log + + - name: publish logs + if: always() + uses: actions/upload-artifact@v3 + with: + name: docker-logs + path: docker.log diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml index 2d5d3280..f76f98fd 100644 --- a/.github/workflows/update-models.yaml +++ b/.github/workflows/update-models.yaml @@ -6,14 +6,32 @@ on: permissions: contents: write packages: write + id-token: write jobs: update-models: + strategy: + fail-fast: false + matrix: + model: + - llama-2-7b-chat + - llama-2-13b-chat + - orca-2-13b + - llama-2-7b-chat-cuda + - llama-2-13b-chat-cuda + - orca-2-13b-cuda runs-on: ubuntu-latest timeout-minutes: 360 steps: - uses: actions/checkout@v4 + - name: Install Cosign + uses: sigstore/cosign-installer@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - uses: crazy-max/ghaction-github-runtime@v3 + - name: Login to GHCR uses: docker/login-action@v3 with: @@ -21,18 +39,31 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Push llama 2 models to GHCR + - name: parse matrix + run: | + echo "MODEL_NAME=$(echo ${{ matrix.model }} | cut -d'-' -f1-2 | sed 's/-//g')" >> $GITHUB_ENV + echo "MODEL_SIZE=$(echo ${{ matrix.model }} | rev | cut -d'-' -f2 | rev)" >> $GITHUB_ENV + echo "MODEL_RUNTIME=-$(echo ${{ matrix.model }} | rev | cut -d'-' -f1 | rev)" >> $GITHUB_ENV + + - name: Build and push + uses: docker/build-push-action@v5 + id: build-and-push + with: + push: true + tags: ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }} + file: models/${{ matrix.model }}.yaml + cache-from: type=gha,scope=${{ env.MODEL_NAME }}-${{ env.MODEL_SIZE }} + cache-to: type=gha,scope=${{ env.MODEL_NAME }}-${{ env.MODEL_SIZE }},mode=max + + - name: Sign the images with GitHub OIDC Token + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} + run: cosign sign --yes "ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST}" + + - name: Verify image signature + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} run: | - docker buildx create --use --name builder --bootstrap - - # cpu avx - MODELS_PATH=models - docker buildx build . -t ghcr.io/sozercan/llama2:7b -f ${MODELS_PATH}/llama-2-7b-chat.yaml --push - docker buildx build . -t ghcr.io/sozercan/llama2:13b -f ${MODELS_PATH}/llama-2-13b-chat.yaml --push - docker buildx build . -t ghcr.io/sozercan/orca2:13b -f ${MODELS_PATH}/orca-2-13b.yaml --push - - # cuda - CUDA_MODELS_PATH=models/cuda - docker buildx build . -t ghcr.io/sozercan/llama2:7b-cuda -f ${CUDA_MODELS_PATH}/llama-2-7b-chat.yaml --push - docker buildx build . -t ghcr.io/sozercan/llama2:13b-cuda -f ${CUDA_MODELS_PATH}/llama-2-13b-chat.yaml --push - docker buildx build . -t ghcr.io/sozercan/orca2:13b-cuda -f ${CUDA_MODELS_PATH}/orca-2-13b.yaml --push \ No newline at end of file + cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \ + --certificate-oidc-issuer https://token.actions.githubusercontent.com \ + --certificate-identity https://github.com/sozercan/aikit/.github/workflows/update-models.yaml@refs/heads/main diff --git a/models/cuda/llama-2-13b-chat.yaml b/models/llama-2-13b-chat-cuda.yaml similarity index 100% rename from models/cuda/llama-2-13b-chat.yaml rename to models/llama-2-13b-chat-cuda.yaml diff --git a/models/cuda/llama-2-7b-chat.yaml b/models/llama-2-7b-chat-cuda.yaml similarity index 100% rename from models/cuda/llama-2-7b-chat.yaml rename to models/llama-2-7b-chat-cuda.yaml diff --git a/models/cuda/orca-2-13b.yaml b/models/orca-2-13b-cuda.yaml similarity index 100% rename from models/cuda/orca-2-13b.yaml rename to models/orca-2-13b-cuda.yaml diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go index 634f95f7..b90b1d03 100644 --- a/pkg/aikit2llb/convert.go +++ b/pkg/aikit2llb/convert.go @@ -25,10 +25,10 @@ func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) { s, merge = copyModels(c, s) s, merge = addLocalAI(c, s, merge) if c.Runtime == utils.RuntimeNVIDIA { - s = installCuda(s, merge) + merge = installCuda(s, merge) } imageCfg := NewImageConfig(c) - return s, imageCfg + return merge, imageCfg } func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) { @@ -87,8 +87,9 @@ func installCuda(s llb.State, merge llb.State) llb.State { llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst ) s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root() + s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root() savedState := s - s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root() + s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root() diff := llb.Diff(savedState, s) merge = llb.Merge([]llb.State{merge, diff}) diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml index 98ff34dc..c282fc98 100644 --- a/test/aikitfile-cuda.yaml +++ b/test/aikitfile-cuda.yaml @@ -1,12 +1,11 @@ -#syntax=ghcr.io/sozercan/aikit:latest +#syntax=aikit:test apiVersion: v1alpha1 debug: true runtime: cuda models: - name: llama-2-7b-chat source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf - - name: mistral-7b-instruct - source: https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf + sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" config: | - name: llama-2-7b-chat backend: llama @@ -20,21 +19,3 @@ config: | f16: true batch: 512 mmap: true - - name: mistral-7b-instruct - context_size: 4096 - threads: 4 - parameters: - model: mistral-7b-openorca.Q6_K.gguf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - template: - chat_message: chatml - chat: chatml-block - completion: completion - stopwords: - - <|im_end|> - gpu_layers: 35 - f16: true - batch: 512 - mmap: true \ No newline at end of file diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml index 856ac72c..d88de38b 100644 --- a/test/aikitfile.yaml +++ b/test/aikitfile.yaml @@ -1,4 +1,4 @@ -#syntax=sozercan/aikit:test +#syntax=aikit:test apiVersion: v1alpha1 debug: true models: diff --git a/test/bats/test.bats b/test/bats/test.bats index 90ab1631..0884d9f7 100644 --- a/test/bats/test.bats +++ b/test/bats/test.bats @@ -5,9 +5,9 @@ load helpers WAIT_TIME=120 SLEEP_TIME=1 -@test "send request to llama-2-7b-chat" { +@test "send request to tinyllama-1.1b-chat" { run curl --retry 20 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "llama-2-7b-chat", + "model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}], }' assert_success