From 5dde5b0ea694e0421473c2a3805877a1f710e861 Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Sun, 3 Dec 2023 23:13:07 +0000 Subject: [PATCH 1/9] ci: update release process and sign Signed-off-by: Sertac Ozercan --- .github/workflows/pre-release.yaml | 29 +++++++--- .github/workflows/release.yaml | 40 ++++++++++--- .github/workflows/test-docker.yaml | 20 ++++--- .github/workflows/update-models.yaml | 58 ++++++++++++++----- ...b-chat.yaml => llama-2-13b-chat-cuda.yaml} | 0 ...7b-chat.yaml => llama-2-7b-chat-cuda.yaml} | 0 .../orca-2-13b.yaml => orca-2-13b-cuda.yaml} | 0 test/aikitfile-cuda.yaml | 2 +- test/aikitfile.yaml | 2 +- 9 files changed, 110 insertions(+), 41 deletions(-) rename models/{cuda/llama-2-13b-chat.yaml => llama-2-13b-chat-cuda.yaml} (100%) rename models/{cuda/llama-2-7b-chat.yaml => llama-2-7b-chat-cuda.yaml} (100%) rename models/{cuda/orca-2-13b.yaml => orca-2-13b-cuda.yaml} (100%) diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml index 1ffe70ed..af59653b 100644 --- a/.github/workflows/pre-release.yaml +++ b/.github/workflows/pre-release.yaml @@ -8,6 +8,7 @@ on: permissions: contents: write packages: write + id-token: write jobs: release: @@ -18,25 +19,35 @@ jobs: - name: cleanup disk space run: | - df -H docker system prune -f -a --volumes sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" sudo rm -rf "$AGENT_TOOLSDIRECTORY" - df -H - - name: Login to ghcr + - name: Install Cosign + uses: sigstore/cosign-installer@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GHCR uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: create buildx builder - run: docker buildx create --use --name builder --bootstrap - - uses: crazy-max/ghaction-github-runtime@v3 + - name: Build and push + uses: docker/build-push-action@v5 + id: build-and-push + with: + push: true + tags: ghcr.io/sozercan/aikit:dev + cache-from: type=gha,scope=aikit + cache-to: type=gha,scope=aikit,mode=max - - name: Push aikit:dev to GHCR - run: | - docker buildx build -t ghcr.io/sozercan/aikit:dev --push . + - name: Sign the images with GitHub OIDC Token + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} + run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}" diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 5cb7f0cd..8a2cb95e 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -8,6 +8,7 @@ on: permissions: contents: write packages: write + id-token: write jobs: release: @@ -15,10 +16,23 @@ jobs: timeout-minutes: 360 steps: - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Login to ghcr + - name: cleanup disk space + run: | + docker system prune -f -a --volumes + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + + - name: Install Cosign + uses: sigstore/cosign-installer@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - uses: crazy-max/ghaction-github-runtime@v3 + + - name: Login to GHCR uses: docker/login-action@v3 with: registry: ghcr.io @@ -29,10 +43,18 @@ jobs: run: | echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV - - name: Push aikit:latest to GHCR - run: | - docker buildx build . -t ghcr.io/sozercan/aikit:${TAG} -t ghcr.io/sozercan/aikit:latest --push --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max + - name: Build and push + uses: docker/build-push-action@v5 + id: build-and-push + with: + push: true + tags: | + ghcr.io/sozercan/aikit:${TAG} + ghcr.io/sozercan/aikit:latest + cache-from: type=gha,scope=aikit + cache-to: type=gha,scope=aikit,mode=max - # - name: release llama 2 image - # run: | - # docker buildx build -t ghcr.io/sozercan/llama2:7b -t ghcr.io/sozercan/llama2:7b-chat -t ghcr.io/sozercan/llama2:chat -t ghcr.io/sozercan/llama2:latest -f models/llama-2-chat-7b.yaml --push --cache-from=type=gha,scope=llama-2-7b-chat --cache-to=type=gha,scope=llama-2-7b-chat,mode=max + - name: Sign the images with GitHub OIDC Token + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} + run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}" diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml index e4fc5710..28c96856 100644 --- a/.github/workflows/test-docker.yaml +++ b/.github/workflows/test-docker.yaml @@ -23,29 +23,35 @@ jobs: - name: cleanup disk space run: | - df -H docker system prune -f -a --volumes sudo rm -rf /usr/share/dotnet sudo rm -rf /opt/ghc sudo rm -rf "/usr/local/share/boost" sudo rm -rf "$AGENT_TOOLSDIRECTORY" - df -H - - name: create buildx builder - run: docker buildx create --use --name builder --bootstrap + # need containerd image store for testing local images + - uses: crazy-max/ghaction-setup-docker@v2 + with: + daemon-config: | + { + "debug": true, + "features": { + "containerd-snapshotter": true + } + } - uses: crazy-max/ghaction-github-runtime@v3 - name: build aikit - run: docker buildx build . -t sozercan/aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max + run: docker buildx build . -t aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max - name: build test model - run: docker buildx build . -t sozercan/testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max + run: docker buildx build . -t testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max - name: list images run: docker images - name: run test model - run: docker run -d -p 8080:8080 sozercan/testmodel:test + run: docker run -d -p 8080:8080 testmodel:test - name: install e2e dependencies run: make test-e2e-dependencies diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml index 2d5d3280..4418dbf7 100644 --- a/.github/workflows/update-models.yaml +++ b/.github/workflows/update-models.yaml @@ -6,14 +6,32 @@ on: permissions: contents: write packages: write + id-token: write jobs: update-models: + strategy: + fail-fast: false + matrix: + model: + - llama-2-7b-chat + - llama-2-13b-chat + - orca-2-13b + - llama-2-7b-chat-cuda + - llama-2-13b-chat-cuda + - orca-2-13b-cuda runs-on: ubuntu-latest timeout-minutes: 360 steps: - uses: actions/checkout@v4 + - name: Install Cosign + uses: sigstore/cosign-installer@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - uses: crazy-max/ghaction-github-runtime@v3 + - name: Login to GHCR uses: docker/login-action@v3 with: @@ -21,18 +39,30 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Push llama 2 models to GHCR + - name: parse matrix + run: | + echo "MODEL_NAME=$(echo ${{ matrix.model }} | cut -d'-' -f1-2 | sed 's/-//g')" >> $GITHUB_ENV + echo "MODEL_SIZE=$(echo ${{ matrix.model }} | rev | cut -d'-' -f2 | rev)" >> $GITHUB_ENV + echo "MODEL_RUNTIME=-$(echo ${{ matrix.model }} | rev | cut -d'-' -f1 | rev)" >> $GITHUB_ENV + + - name: Build and push + uses: docker/build-push-action@v5 + with: + push: true + tags: ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }} + file: models/${{ matrix.model }}.yaml + cache-from: type=gha,scope=${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }} + cache-to: type=gha,scope=${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }},mode=max + + - name: Sign the images with GitHub OIDC Token + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} + run: cosign sign --yes "ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST}" + + - name: verify image signature + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} run: | - docker buildx create --use --name builder --bootstrap - - # cpu avx - MODELS_PATH=models - docker buildx build . -t ghcr.io/sozercan/llama2:7b -f ${MODELS_PATH}/llama-2-7b-chat.yaml --push - docker buildx build . -t ghcr.io/sozercan/llama2:13b -f ${MODELS_PATH}/llama-2-13b-chat.yaml --push - docker buildx build . -t ghcr.io/sozercan/orca2:13b -f ${MODELS_PATH}/orca-2-13b.yaml --push - - # cuda - CUDA_MODELS_PATH=models/cuda - docker buildx build . -t ghcr.io/sozercan/llama2:7b-cuda -f ${CUDA_MODELS_PATH}/llama-2-7b-chat.yaml --push - docker buildx build . -t ghcr.io/sozercan/llama2:13b-cuda -f ${CUDA_MODELS_PATH}/llama-2-13b-chat.yaml --push - docker buildx build . -t ghcr.io/sozercan/orca2:13b-cuda -f ${CUDA_MODELS_PATH}/orca-2-13b.yaml --push \ No newline at end of file + cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \ + --certificate-identity https://github.com/sozercan/aikit/.github/workflows/update-models.yaml@refs/heads/main \ + --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq diff --git a/models/cuda/llama-2-13b-chat.yaml b/models/llama-2-13b-chat-cuda.yaml similarity index 100% rename from models/cuda/llama-2-13b-chat.yaml rename to models/llama-2-13b-chat-cuda.yaml diff --git a/models/cuda/llama-2-7b-chat.yaml b/models/llama-2-7b-chat-cuda.yaml similarity index 100% rename from models/cuda/llama-2-7b-chat.yaml rename to models/llama-2-7b-chat-cuda.yaml diff --git a/models/cuda/orca-2-13b.yaml b/models/orca-2-13b-cuda.yaml similarity index 100% rename from models/cuda/orca-2-13b.yaml rename to models/orca-2-13b-cuda.yaml diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml index 98ff34dc..17888400 100644 --- a/test/aikitfile-cuda.yaml +++ b/test/aikitfile-cuda.yaml @@ -1,4 +1,4 @@ -#syntax=ghcr.io/sozercan/aikit:latest +#syntax=aikit:test apiVersion: v1alpha1 debug: true runtime: cuda diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml index 856ac72c..d88de38b 100644 --- a/test/aikitfile.yaml +++ b/test/aikitfile.yaml @@ -1,4 +1,4 @@ -#syntax=sozercan/aikit:test +#syntax=aikit:test apiVersion: v1alpha1 debug: true models: From a5223b1a1bf8f8809788edc08bc42ed292dc0340 Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 4 Dec 2023 01:17:20 +0000 Subject: [PATCH 2/9] use tinyllama Signed-off-by: Sertac Ozercan --- test/aikitfile-cuda.yaml | 33 +++++++-------------------------- test/aikitfile.yaml | 12 ++++++------ 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml index 17888400..24bd2d4e 100644 --- a/test/aikitfile-cuda.yaml +++ b/test/aikitfile-cuda.yaml @@ -3,38 +3,19 @@ apiVersion: v1alpha1 debug: true runtime: cuda models: - - name: llama-2-7b-chat - source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf - - name: mistral-7b-instruct - source: https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf + - name: tinyllama-1.1b-chat + source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf + sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f" config: | - - name: llama-2-7b-chat + - name: tinyllama-1.1b-chat backend: llama parameters: top_k: 80 temperature: 0.2 top_p: 0.7 - model: llama-2-7b-chat.Q4_K_M.gguf - context_size: 4096 - gpu_layers: 35 + model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf + context_size: 1000 + gpu_layers: 10 f16: true batch: 512 mmap: true - - name: mistral-7b-instruct - context_size: 4096 - threads: 4 - parameters: - model: mistral-7b-openorca.Q6_K.gguf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - template: - chat_message: chatml - chat: chatml-block - completion: completion - stopwords: - - <|im_end|> - gpu_layers: 35 - f16: true - batch: 512 - mmap: true \ No newline at end of file diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml index d88de38b..7b11030d 100644 --- a/test/aikitfile.yaml +++ b/test/aikitfile.yaml @@ -2,15 +2,15 @@ apiVersion: v1alpha1 debug: true models: - - name: llama-2-7b-chat - source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf - sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" + - name: tinyllama-1.1b-chat + source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf + sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f" config: | - - name: llama-2-7b-chat + - name: tinyllama-1.1b-chat backend: llama parameters: top_k: 80 temperature: 0.2 top_p: 0.7 - model: llama-2-7b-chat.Q4_K_M.gguf - context_size: 4096 + model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf + context_size: 1000 From 42ad5d731123dc165b7b9343359e25b6d7b42ad1 Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 4 Dec 2023 01:47:44 +0000 Subject: [PATCH 3/9] use tinyllama Signed-off-by: Sertac Ozercan --- .github/workflows/pre-release.yaml | 8 ++++++++ .github/workflows/release.yaml | 8 ++++++++ .github/workflows/test-docker.yaml | 13 +++++++++++-- .github/workflows/update-models.yaml | 3 ++- test/aikitfile.yaml | 1 + test/bats/test.bats | 4 ++-- 6 files changed, 32 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml index af59653b..18f6b827 100644 --- a/.github/workflows/pre-release.yaml +++ b/.github/workflows/pre-release.yaml @@ -51,3 +51,11 @@ jobs: env: DIGEST: ${{ steps.build-and-push.outputs.digest }} run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}" + + - name: Verify image signature + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} + run: | + cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \ + --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/heads/main \ + --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 8a2cb95e..13f64e1a 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -58,3 +58,11 @@ jobs: env: DIGEST: ${{ steps.build-and-push.outputs.digest }} run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}" + + - name: Verify image signature + env: + DIGEST: ${{ steps.build-and-push.outputs.digest }} + run: | + cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \ + --certificate-identity https://github.com/sozercan/aikit/.github/workflows/release.yaml@refs/heads/main \ + --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml index 28c96856..978ea61c 100644 --- a/.github/workflows/test-docker.yaml +++ b/.github/workflows/test-docker.yaml @@ -42,10 +42,19 @@ jobs: - uses: crazy-max/ghaction-github-runtime@v3 - name: build aikit - run: docker buildx build . -t aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max + run: | + docker buildx build . -t aikit:test \ + --load --progress plain \ + --cache-from=type=gha,scope=aikit \ + --cache-to=type=gha,scope=aikit,mode=max - name: build test model - run: docker buildx build . -t testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max + run: | + docker buildx build . -t testmodel:test \ + -f test/aikitfile.yaml \ + --load --progress plain \ + --cache-from=type=gha,scope=testmodel \ + --cache-to=type=gha,scope=testmodel,mode=max - name: list images run: docker images diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml index 4418dbf7..23b4a5d7 100644 --- a/.github/workflows/update-models.yaml +++ b/.github/workflows/update-models.yaml @@ -47,6 +47,7 @@ jobs: - name: Build and push uses: docker/build-push-action@v5 + id: build-and-push with: push: true tags: ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }} @@ -59,7 +60,7 @@ jobs: DIGEST: ${{ steps.build-and-push.outputs.digest }} run: cosign sign --yes "ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST}" - - name: verify image signature + - name: Verify image signature env: DIGEST: ${{ steps.build-and-push.outputs.digest }} run: | diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml index 7b11030d..ada0c5ef 100644 --- a/test/aikitfile.yaml +++ b/test/aikitfile.yaml @@ -14,3 +14,4 @@ config: | top_p: 0.7 model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf context_size: 1000 + mmap: false \ No newline at end of file diff --git a/test/bats/test.bats b/test/bats/test.bats index 90ab1631..0884d9f7 100644 --- a/test/bats/test.bats +++ b/test/bats/test.bats @@ -5,9 +5,9 @@ load helpers WAIT_TIME=120 SLEEP_TIME=1 -@test "send request to llama-2-7b-chat" { +@test "send request to tinyllama-1.1b-chat" { run curl --retry 20 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "llama-2-7b-chat", + "model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}], }' assert_success From a78e3eb3364c9758eeef6f9be84a42ba1b098679 Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 4 Dec 2023 02:06:48 +0000 Subject: [PATCH 4/9] test Signed-off-by: Sertac Ozercan --- .github/workflows/pre-release.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml index 18f6b827..b861b8dc 100644 --- a/.github/workflows/pre-release.yaml +++ b/.github/workflows/pre-release.yaml @@ -1,7 +1,8 @@ name: pre-release on: - push: + # push: + pull_request: branches: - main From e367411b01f97e64615a62ff4d697b8f90096cc0 Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 4 Dec 2023 02:13:20 +0000 Subject: [PATCH 5/9] test Signed-off-by: Sertac Ozercan --- .github/workflows/pre-release.yaml | 7 ++++--- .github/workflows/release.yaml | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml index b861b8dc..80cb56bb 100644 --- a/.github/workflows/pre-release.yaml +++ b/.github/workflows/pre-release.yaml @@ -12,7 +12,7 @@ permissions: id-token: write jobs: - release: + pre-release: runs-on: ubuntu-latest timeout-minutes: 360 steps: @@ -57,6 +57,7 @@ jobs: env: DIGEST: ${{ steps.build-and-push.outputs.digest }} run: | - cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \ - --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/heads/main \ + # refs/heads/main + cosign verify ghcr.io/sozercan/aikit@${DIGEST} \ + --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/pull/14/merge \ --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 13f64e1a..610773fe 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -11,7 +11,7 @@ permissions: id-token: write jobs: - release: + release: runs-on: ubuntu-latest timeout-minutes: 360 steps: From 171d9ceac84181fa5e5e1c62736f061861f11d70 Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 4 Dec 2023 03:42:06 +0000 Subject: [PATCH 6/9] test Signed-off-by: Sertac Ozercan --- .github/workflows/pre-release.yaml | 8 +++----- .github/workflows/test-docker.yaml | 8 ++++++-- test/aikitfile.yaml | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml index 80cb56bb..d26b2109 100644 --- a/.github/workflows/pre-release.yaml +++ b/.github/workflows/pre-release.yaml @@ -1,8 +1,7 @@ name: pre-release on: - # push: - pull_request: + push: branches: - main @@ -57,7 +56,6 @@ jobs: env: DIGEST: ${{ steps.build-and-push.outputs.digest }} run: | - # refs/heads/main cosign verify ghcr.io/sozercan/aikit@${DIGEST} \ - --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/pull/14/merge \ - --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq + --certificate-oidc-issuer https://token.actions.githubusercontent.com \ + --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/heads/main diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml index 978ea61c..86ed83ba 100644 --- a/.github/workflows/test-docker.yaml +++ b/.github/workflows/test-docker.yaml @@ -65,5 +65,9 @@ jobs: - name: install e2e dependencies run: make test-e2e-dependencies - - name: run bats test - run: make test-e2e + - name: run test + run: | + curl --retry 20 --retry-delay 30 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "tinyllama-1.1b-chat", + "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}], + }' diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml index ada0c5ef..935d007c 100644 --- a/test/aikitfile.yaml +++ b/test/aikitfile.yaml @@ -14,4 +14,4 @@ config: | top_p: 0.7 model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf context_size: 1000 - mmap: false \ No newline at end of file + mmap: false From 7efaa3dd8f6092078fcdf2d767cfa231de8e2f2f Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 4 Dec 2023 04:23:35 +0000 Subject: [PATCH 7/9] test Signed-off-by: Sertac Ozercan --- .github/workflows/test-docker.yaml | 19 ++++++++++++++----- test/aikitfile.yaml | 6 +++--- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml index 86ed83ba..b1d0098b 100644 --- a/.github/workflows/test-docker.yaml +++ b/.github/workflows/test-docker.yaml @@ -60,14 +60,23 @@ jobs: run: docker images - name: run test model - run: docker run -d -p 8080:8080 testmodel:test + run: docker run --name testmodel -d -p 8080:8080 testmodel:test - name: install e2e dependencies run: make test-e2e-dependencies - name: run test run: | - curl --retry 20 --retry-delay 30 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "tinyllama-1.1b-chat", - "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}], - }' + curl --retry 20 --retry-delay 30 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}' + + - name: save logs + if: always() + run: | + docker logs testmodel > docker.log + + - name: publish logs + if: always() + uses: actions/upload-artifact@v3 + with: + name: docker-logs + path: docker.log diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml index 935d007c..aeefa534 100644 --- a/test/aikitfile.yaml +++ b/test/aikitfile.yaml @@ -9,9 +9,9 @@ config: | - name: tinyllama-1.1b-chat backend: llama parameters: - top_k: 80 - temperature: 0.2 - top_p: 0.7 + top_k: 40 + temperature: 0 + top_p: 0.1 model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf context_size: 1000 mmap: false From 0ea86cee1f28291a0d7c1264ac8bb3df6148dd8c Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 4 Dec 2023 04:38:42 +0000 Subject: [PATCH 8/9] test Signed-off-by: Sertac Ozercan --- .github/workflows/release.yaml | 6 +++--- .github/workflows/test-docker.yaml | 2 +- .github/workflows/update-models.yaml | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 610773fe..488e3d66 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -63,6 +63,6 @@ jobs: env: DIGEST: ${{ steps.build-and-push.outputs.digest }} run: | - cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \ - --certificate-identity https://github.com/sozercan/aikit/.github/workflows/release.yaml@refs/heads/main \ - --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq + cosign verify ghcr.io/sozercan/aikit@${DIGEST} \ + --certificate-oidc-issuer https://token.actions.githubusercontent.com \ + --certificate-identity https://github.com/sozercan/aikit/.github/workflows/release.yaml@refs/heads/main diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml index b1d0098b..f7900b2c 100644 --- a/.github/workflows/test-docker.yaml +++ b/.github/workflows/test-docker.yaml @@ -67,7 +67,7 @@ jobs: - name: run test run: | - curl --retry 20 --retry-delay 30 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}' + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}' - name: save logs if: always() diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml index 23b4a5d7..f76f98fd 100644 --- a/.github/workflows/update-models.yaml +++ b/.github/workflows/update-models.yaml @@ -52,8 +52,8 @@ jobs: push: true tags: ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }} file: models/${{ matrix.model }}.yaml - cache-from: type=gha,scope=${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }} - cache-to: type=gha,scope=${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }},mode=max + cache-from: type=gha,scope=${{ env.MODEL_NAME }}-${{ env.MODEL_SIZE }} + cache-to: type=gha,scope=${{ env.MODEL_NAME }}-${{ env.MODEL_SIZE }},mode=max - name: Sign the images with GitHub OIDC Token env: @@ -65,5 +65,5 @@ jobs: DIGEST: ${{ steps.build-and-push.outputs.digest }} run: | cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \ - --certificate-identity https://github.com/sozercan/aikit/.github/workflows/update-models.yaml@refs/heads/main \ - --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq + --certificate-oidc-issuer https://token.actions.githubusercontent.com \ + --certificate-identity https://github.com/sozercan/aikit/.github/workflows/update-models.yaml@refs/heads/main From 1324ee2154c7a0cef6848b5d2dc39aed423517af Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 4 Dec 2023 07:17:23 +0000 Subject: [PATCH 9/9] test Signed-off-by: Sertac Ozercan --- .github/workflows/test-docker.yaml | 2 +- pkg/aikit2llb/convert.go | 7 ++++--- test/aikitfile-cuda.yaml | 14 +++++++------- test/aikitfile.yaml | 19 +++++++++---------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml index f7900b2c..a70ae078 100644 --- a/.github/workflows/test-docker.yaml +++ b/.github/workflows/test-docker.yaml @@ -67,7 +67,7 @@ jobs: - name: run test run: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}' + curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}' - name: save logs if: always() diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go index 634f95f7..b90b1d03 100644 --- a/pkg/aikit2llb/convert.go +++ b/pkg/aikit2llb/convert.go @@ -25,10 +25,10 @@ func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) { s, merge = copyModels(c, s) s, merge = addLocalAI(c, s, merge) if c.Runtime == utils.RuntimeNVIDIA { - s = installCuda(s, merge) + merge = installCuda(s, merge) } imageCfg := NewImageConfig(c) - return s, imageCfg + return merge, imageCfg } func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) { @@ -87,8 +87,9 @@ func installCuda(s llb.State, merge llb.State) llb.State { llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst ) s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root() + s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root() savedState := s - s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root() + s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root() diff := llb.Diff(savedState, s) merge = llb.Merge([]llb.State{merge, diff}) diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml index 24bd2d4e..c282fc98 100644 --- a/test/aikitfile-cuda.yaml +++ b/test/aikitfile-cuda.yaml @@ -3,19 +3,19 @@ apiVersion: v1alpha1 debug: true runtime: cuda models: - - name: tinyllama-1.1b-chat - source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf - sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f" + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf + sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" config: | - - name: tinyllama-1.1b-chat + - name: llama-2-7b-chat backend: llama parameters: top_k: 80 temperature: 0.2 top_p: 0.7 - model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf - context_size: 1000 - gpu_layers: 10 + model: llama-2-7b-chat.Q4_K_M.gguf + context_size: 4096 + gpu_layers: 35 f16: true batch: 512 mmap: true diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml index aeefa534..d88de38b 100644 --- a/test/aikitfile.yaml +++ b/test/aikitfile.yaml @@ -2,16 +2,15 @@ apiVersion: v1alpha1 debug: true models: - - name: tinyllama-1.1b-chat - source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf - sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f" + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf + sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" config: | - - name: tinyllama-1.1b-chat + - name: llama-2-7b-chat backend: llama parameters: - top_k: 40 - temperature: 0 - top_p: 0.1 - model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf - context_size: 1000 - mmap: false + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: llama-2-7b-chat.Q4_K_M.gguf + context_size: 4096