From 5dde5b0ea694e0421473c2a3805877a1f710e861 Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Sun, 3 Dec 2023 23:13:07 +0000
Subject: [PATCH 1/9] ci: update release process and sign

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/pre-release.yaml            | 29 +++++++---
 .github/workflows/release.yaml                | 40 ++++++++++---
 .github/workflows/test-docker.yaml            | 20 ++++---
 .github/workflows/update-models.yaml          | 58 ++++++++++++++-----
 ...b-chat.yaml => llama-2-13b-chat-cuda.yaml} |  0
 ...7b-chat.yaml => llama-2-7b-chat-cuda.yaml} |  0
 .../orca-2-13b.yaml => orca-2-13b-cuda.yaml}  |  0
 test/aikitfile-cuda.yaml                      |  2 +-
 test/aikitfile.yaml                           |  2 +-
 9 files changed, 110 insertions(+), 41 deletions(-)
 rename models/{cuda/llama-2-13b-chat.yaml => llama-2-13b-chat-cuda.yaml} (100%)
 rename models/{cuda/llama-2-7b-chat.yaml => llama-2-7b-chat-cuda.yaml} (100%)
 rename models/{cuda/orca-2-13b.yaml => orca-2-13b-cuda.yaml} (100%)

diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml
index 1ffe70ed..af59653b 100644
--- a/.github/workflows/pre-release.yaml
+++ b/.github/workflows/pre-release.yaml
@@ -8,6 +8,7 @@ on:
 permissions:
   contents: write
   packages: write
+  id-token: write
 
 jobs:
  release:
@@ -18,25 +19,35 @@ jobs:
 
       - name: cleanup disk space
         run: |
-          df -H
           docker system prune -f -a --volumes
           sudo rm -rf /usr/share/dotnet
           sudo rm -rf /opt/ghc
           sudo rm -rf "/usr/local/share/boost"
           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-          df -H
 
-      - name: Login to ghcr
+      - name: Install Cosign
+        uses: sigstore/cosign-installer@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GHCR
         uses: docker/login-action@v3
         with:
           registry: ghcr.io
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: create buildx builder
-        run: docker buildx create --use --name builder --bootstrap
-      - uses: crazy-max/ghaction-github-runtime@v3
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        id: build-and-push
+        with:
+          push: true
+          tags: ghcr.io/sozercan/aikit:dev
+          cache-from: type=gha,scope=aikit
+          cache-to: type=gha,scope=aikit,mode=max
 
-      - name: Push aikit:dev to GHCR
-        run: |
-          docker buildx build -t ghcr.io/sozercan/aikit:dev --push .
+      - name: Sign the images with GitHub OIDC Token
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}"
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 5cb7f0cd..8a2cb95e 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -8,6 +8,7 @@ on:
 permissions:
   contents: write
   packages: write
+  id-token: write
 
 jobs:
  release:
@@ -15,10 +16,23 @@ jobs:
     timeout-minutes: 360
     steps:
       - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
 
-      - name: Login to ghcr
+      - name: cleanup disk space
+        run: |
+          docker system prune -f -a --volumes
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf "/usr/local/share/boost"
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
+      - name: Install Cosign
+        uses: sigstore/cosign-installer@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - uses: crazy-max/ghaction-github-runtime@v3
+
+      - name: Login to GHCR
         uses: docker/login-action@v3
         with:
           registry: ghcr.io
@@ -29,10 +43,18 @@ jobs:
         run: |
           echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
 
-      - name: Push aikit:latest to GHCR
-        run: |
-          docker buildx build . -t ghcr.io/sozercan/aikit:${TAG} -t ghcr.io/sozercan/aikit:latest --push --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        id: build-and-push
+        with:
+          push: true
+          tags: |
+            ghcr.io/sozercan/aikit:${TAG}
+            ghcr.io/sozercan/aikit:latest
+          cache-from: type=gha,scope=aikit
+          cache-to: type=gha,scope=aikit,mode=max
 
-      # - name: release llama 2 image
-      #   run: |
-      #     docker buildx build -t ghcr.io/sozercan/llama2:7b -t ghcr.io/sozercan/llama2:7b-chat -t ghcr.io/sozercan/llama2:chat -t ghcr.io/sozercan/llama2:latest -f models/llama-2-chat-7b.yaml --push --cache-from=type=gha,scope=llama-2-7b-chat --cache-to=type=gha,scope=llama-2-7b-chat,mode=max
+      - name: Sign the images with GitHub OIDC Token
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}"
diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
index e4fc5710..28c96856 100644
--- a/.github/workflows/test-docker.yaml
+++ b/.github/workflows/test-docker.yaml
@@ -23,29 +23,35 @@ jobs:
 
       - name: cleanup disk space
         run: |
-          df -H
           docker system prune -f -a --volumes
           sudo rm -rf /usr/share/dotnet
           sudo rm -rf /opt/ghc
           sudo rm -rf "/usr/local/share/boost"
           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-          df -H
 
-      - name: create buildx builder
-        run: docker buildx create --use --name builder --bootstrap
+      # need containerd image store for testing local images
+      - uses: crazy-max/ghaction-setup-docker@v2
+        with:
+          daemon-config: |
+            {
+              "debug": true,
+              "features": {
+                "containerd-snapshotter": true
+              }
+            }
       - uses: crazy-max/ghaction-github-runtime@v3
 
       - name: build aikit
-        run: docker buildx build . -t sozercan/aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
+        run: docker buildx build . -t aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
 
       - name: build test model
-        run: docker buildx build . -t sozercan/testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max
+        run: docker buildx build . -t testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max
 
       - name: list images
         run: docker images
 
       - name: run test model
-        run: docker run -d -p 8080:8080 sozercan/testmodel:test
+        run: docker run -d -p 8080:8080 testmodel:test
 
       - name: install e2e dependencies
         run: make test-e2e-dependencies
diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
index 2d5d3280..4418dbf7 100644
--- a/.github/workflows/update-models.yaml
+++ b/.github/workflows/update-models.yaml
@@ -6,14 +6,32 @@ on:
 permissions:
   contents: write
   packages: write
+  id-token: write
 
 jobs:
  update-models:
+    strategy:
+      fail-fast: false
+      matrix:
+        model:
+         - llama-2-7b-chat
+         - llama-2-13b-chat
+         - orca-2-13b
+         - llama-2-7b-chat-cuda
+         - llama-2-13b-chat-cuda
+         - orca-2-13b-cuda
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:
       - uses: actions/checkout@v4
 
+      - name: Install Cosign
+        uses: sigstore/cosign-installer@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - uses: crazy-max/ghaction-github-runtime@v3
+
       - name: Login to GHCR
         uses: docker/login-action@v3
         with:
@@ -21,18 +39,30 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Push llama 2 models to GHCR
+      - name: parse matrix
+        run: |
+          echo "MODEL_NAME=$(echo ${{ matrix.model }} | cut -d'-' -f1-2 | sed 's/-//g')" >> $GITHUB_ENV
+          echo "MODEL_SIZE=$(echo ${{ matrix.model }} | rev | cut -d'-' -f2 | rev)" >> $GITHUB_ENV
+          echo "MODEL_RUNTIME=-$(echo ${{ matrix.model }} | rev | cut -d'-' -f1 | rev)" >> $GITHUB_ENV
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          push: true
+          tags: ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}
+          file: models/${{ matrix.model }}.yaml
+          cache-from: type=gha,scope=${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}
+          cache-to: type=gha,scope=${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }},mode=max
+
+      - name: Sign the images with GitHub OIDC Token
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: cosign sign --yes "ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST}"
+
+      - name: verify image signature
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: |
-          docker buildx create --use --name builder --bootstrap
-
-          # cpu avx
-          MODELS_PATH=models
-          docker buildx build . -t ghcr.io/sozercan/llama2:7b -f ${MODELS_PATH}/llama-2-7b-chat.yaml --push
-          docker buildx build . -t ghcr.io/sozercan/llama2:13b -f ${MODELS_PATH}/llama-2-13b-chat.yaml --push
-          docker buildx build . -t ghcr.io/sozercan/orca2:13b -f ${MODELS_PATH}/orca-2-13b.yaml --push
-
-          # cuda
-          CUDA_MODELS_PATH=models/cuda
-          docker buildx build . -t ghcr.io/sozercan/llama2:7b-cuda -f ${CUDA_MODELS_PATH}/llama-2-7b-chat.yaml --push
-          docker buildx build . -t ghcr.io/sozercan/llama2:13b-cuda -f ${CUDA_MODELS_PATH}/llama-2-13b-chat.yaml --push
-          docker buildx build . -t ghcr.io/sozercan/orca2:13b-cuda -f ${CUDA_MODELS_PATH}/orca-2-13b.yaml --push
\ No newline at end of file
+          cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \
+            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/update-models.yaml@refs/heads/main \
+            --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq
diff --git a/models/cuda/llama-2-13b-chat.yaml b/models/llama-2-13b-chat-cuda.yaml
similarity index 100%
rename from models/cuda/llama-2-13b-chat.yaml
rename to models/llama-2-13b-chat-cuda.yaml
diff --git a/models/cuda/llama-2-7b-chat.yaml b/models/llama-2-7b-chat-cuda.yaml
similarity index 100%
rename from models/cuda/llama-2-7b-chat.yaml
rename to models/llama-2-7b-chat-cuda.yaml
diff --git a/models/cuda/orca-2-13b.yaml b/models/orca-2-13b-cuda.yaml
similarity index 100%
rename from models/cuda/orca-2-13b.yaml
rename to models/orca-2-13b-cuda.yaml
diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml
index 98ff34dc..17888400 100644
--- a/test/aikitfile-cuda.yaml
+++ b/test/aikitfile-cuda.yaml
@@ -1,4 +1,4 @@
-#syntax=ghcr.io/sozercan/aikit:latest
+#syntax=aikit:test
 apiVersion: v1alpha1
 debug: true
 runtime: cuda
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
index 856ac72c..d88de38b 100644
--- a/test/aikitfile.yaml
+++ b/test/aikitfile.yaml
@@ -1,4 +1,4 @@
-#syntax=sozercan/aikit:test
+#syntax=aikit:test
 apiVersion: v1alpha1
 debug: true
 models:

From a5223b1a1bf8f8809788edc08bc42ed292dc0340 Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Mon, 4 Dec 2023 01:17:20 +0000
Subject: [PATCH 2/9] use tinyllama

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 test/aikitfile-cuda.yaml | 33 +++++++--------------------------
 test/aikitfile.yaml      | 12 ++++++------
 2 files changed, 13 insertions(+), 32 deletions(-)

diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml
index 17888400..24bd2d4e 100644
--- a/test/aikitfile-cuda.yaml
+++ b/test/aikitfile-cuda.yaml
@@ -3,38 +3,19 @@ apiVersion: v1alpha1
 debug: true
 runtime: cuda
 models:
-  - name: llama-2-7b-chat
-    source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
-  - name: mistral-7b-instruct
-    source: https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf
+  - name: tinyllama-1.1b-chat
+    source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
+    sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f"
 config: |
-  - name: llama-2-7b-chat
+  - name: tinyllama-1.1b-chat
     backend: llama
     parameters:
       top_k: 80
       temperature: 0.2
       top_p: 0.7
-      model: llama-2-7b-chat.Q4_K_M.gguf
-    context_size: 4096
-    gpu_layers: 35
+      model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
+    context_size: 1000
+    gpu_layers: 10
     f16: true
     batch: 512
     mmap: true
-  - name: mistral-7b-instruct
-    context_size: 4096
-    threads: 4
-    parameters:
-      model: mistral-7b-openorca.Q6_K.gguf
-      temperature: 0.2
-      top_k: 40
-      top_p: 0.95
-    template:
-      chat_message: chatml
-      chat: chatml-block
-      completion: completion
-    stopwords:
-    - <|im_end|>
-    gpu_layers: 35
-    f16: true
-    batch: 512
-    mmap: true
\ No newline at end of file
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
index d88de38b..7b11030d 100644
--- a/test/aikitfile.yaml
+++ b/test/aikitfile.yaml
@@ -2,15 +2,15 @@
 apiVersion: v1alpha1
 debug: true
 models:
-  - name: llama-2-7b-chat
-    source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
-    sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
+  - name: tinyllama-1.1b-chat
+    source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
+    sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f"
 config: |
-  - name: llama-2-7b-chat
+  - name: tinyllama-1.1b-chat
     backend: llama
     parameters:
       top_k: 80
       temperature: 0.2
       top_p: 0.7
-      model: llama-2-7b-chat.Q4_K_M.gguf
-    context_size: 4096
+      model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
+    context_size: 1000

From 42ad5d731123dc165b7b9343359e25b6d7b42ad1 Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Mon, 4 Dec 2023 01:47:44 +0000
Subject: [PATCH 3/9] use tinyllama

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/pre-release.yaml   |  8 ++++++++
 .github/workflows/release.yaml       |  8 ++++++++
 .github/workflows/test-docker.yaml   | 13 +++++++++++--
 .github/workflows/update-models.yaml |  3 ++-
 test/aikitfile.yaml                  |  1 +
 test/bats/test.bats                  |  4 ++--
 6 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml
index af59653b..18f6b827 100644
--- a/.github/workflows/pre-release.yaml
+++ b/.github/workflows/pre-release.yaml
@@ -51,3 +51,11 @@ jobs:
         env:
           DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}"
+
+      - name: Verify image signature
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: |
+          cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \
+            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/heads/main \
+            --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 8a2cb95e..13f64e1a 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -58,3 +58,11 @@ jobs:
         env:
           DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}"
+
+      - name: Verify image signature
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: |
+          cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \
+            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/release.yaml@refs/heads/main \
+            --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq
diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
index 28c96856..978ea61c 100644
--- a/.github/workflows/test-docker.yaml
+++ b/.github/workflows/test-docker.yaml
@@ -42,10 +42,19 @@ jobs:
       - uses: crazy-max/ghaction-github-runtime@v3
 
       - name: build aikit
-        run: docker buildx build . -t aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
+        run: |
+          docker buildx build . -t aikit:test \
+            --load --progress plain \
+            --cache-from=type=gha,scope=aikit \
+            --cache-to=type=gha,scope=aikit,mode=max
 
       - name: build test model
-        run: docker buildx build . -t testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max
+        run: |
+          docker buildx build . -t testmodel:test \
+            -f test/aikitfile.yaml \
+            --load --progress plain \
+            --cache-from=type=gha,scope=testmodel \
+            --cache-to=type=gha,scope=testmodel,mode=max
 
       - name: list images
         run: docker images
diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
index 4418dbf7..23b4a5d7 100644
--- a/.github/workflows/update-models.yaml
+++ b/.github/workflows/update-models.yaml
@@ -47,6 +47,7 @@ jobs:
 
       - name: Build and push
         uses: docker/build-push-action@v5
+        id: build-and-push
         with:
           push: true
           tags: ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}
@@ -59,7 +60,7 @@ jobs:
           DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: cosign sign --yes "ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST}"
 
-      - name: verify image signature
+      - name: Verify image signature
         env:
           DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: |
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
index 7b11030d..ada0c5ef 100644
--- a/test/aikitfile.yaml
+++ b/test/aikitfile.yaml
@@ -14,3 +14,4 @@ config: |
       top_p: 0.7
       model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
     context_size: 1000
+    mmap: false
\ No newline at end of file
diff --git a/test/bats/test.bats b/test/bats/test.bats
index 90ab1631..0884d9f7 100644
--- a/test/bats/test.bats
+++ b/test/bats/test.bats
@@ -5,9 +5,9 @@ load helpers
 WAIT_TIME=120
 SLEEP_TIME=1
 
-@test "send request to llama-2-7b-chat" {
+@test "send request to tinyllama-1.1b-chat" {
     run curl --retry 20 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-     "model": "llama-2-7b-chat",
+     "model": "tinyllama-1.1b-chat",
      "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}],
     }'
     assert_success

From a78e3eb3364c9758eeef6f9be84a42ba1b098679 Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Mon, 4 Dec 2023 02:06:48 +0000
Subject: [PATCH 4/9] test

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/pre-release.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml
index 18f6b827..b861b8dc 100644
--- a/.github/workflows/pre-release.yaml
+++ b/.github/workflows/pre-release.yaml
@@ -1,7 +1,8 @@
 name: pre-release
 
 on:
-  push:
+  # push:
+  pull_request:
     branches:
       - main
 

From e367411b01f97e64615a62ff4d697b8f90096cc0 Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Mon, 4 Dec 2023 02:13:20 +0000
Subject: [PATCH 5/9] test

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/pre-release.yaml | 7 ++++---
 .github/workflows/release.yaml     | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml
index b861b8dc..80cb56bb 100644
--- a/.github/workflows/pre-release.yaml
+++ b/.github/workflows/pre-release.yaml
@@ -12,7 +12,7 @@ permissions:
   id-token: write
 
 jobs:
- release:
+  pre-release:
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:
@@ -57,6 +57,7 @@ jobs:
         env:
           DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: |
-          cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \
-            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/heads/main \
+          # refs/heads/main
+          cosign verify ghcr.io/sozercan/aikit@${DIGEST} \
+            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/pull/14/merge \
             --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 13f64e1a..610773fe 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -11,7 +11,7 @@ permissions:
   id-token: write
 
 jobs:
- release:
+  release:
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:

From 171d9ceac84181fa5e5e1c62736f061861f11d70 Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Mon, 4 Dec 2023 03:42:06 +0000
Subject: [PATCH 6/9] test

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/pre-release.yaml | 8 +++-----
 .github/workflows/test-docker.yaml | 8 ++++++--
 test/aikitfile.yaml                | 2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml
index 80cb56bb..d26b2109 100644
--- a/.github/workflows/pre-release.yaml
+++ b/.github/workflows/pre-release.yaml
@@ -1,8 +1,7 @@
 name: pre-release
 
 on:
-  # push:
-  pull_request:
+  push:
     branches:
       - main
 
@@ -57,7 +56,6 @@ jobs:
         env:
           DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: |
-          # refs/heads/main
           cosign verify ghcr.io/sozercan/aikit@${DIGEST} \
-            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/pull/14/merge \
-            --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq
+            --certificate-oidc-issuer https://token.actions.githubusercontent.com \
+            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/heads/main
diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
index 978ea61c..86ed83ba 100644
--- a/.github/workflows/test-docker.yaml
+++ b/.github/workflows/test-docker.yaml
@@ -65,5 +65,9 @@ jobs:
       - name: install e2e dependencies
         run: make test-e2e-dependencies
 
-      - name: run bats test
-        run: make test-e2e
+      - name: run test
+        run: |
+          curl --retry 20 --retry-delay 30 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "tinyllama-1.1b-chat",
+            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}],
+            }'
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
index ada0c5ef..935d007c 100644
--- a/test/aikitfile.yaml
+++ b/test/aikitfile.yaml
@@ -14,4 +14,4 @@ config: |
       top_p: 0.7
       model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
     context_size: 1000
-    mmap: false
\ No newline at end of file
+    mmap: false

From 7efaa3dd8f6092078fcdf2d767cfa231de8e2f2f Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Mon, 4 Dec 2023 04:23:35 +0000
Subject: [PATCH 7/9] test

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/test-docker.yaml | 19 ++++++++++++++-----
 test/aikitfile.yaml                |  6 +++---
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
index 86ed83ba..b1d0098b 100644
--- a/.github/workflows/test-docker.yaml
+++ b/.github/workflows/test-docker.yaml
@@ -60,14 +60,23 @@ jobs:
         run: docker images
 
       - name: run test model
-        run: docker run -d -p 8080:8080 testmodel:test
+        run: docker run --name testmodel -d -p 8080:8080 testmodel:test
 
       - name: install e2e dependencies
         run: make test-e2e-dependencies
 
       - name: run test
         run: |
-          curl --retry 20 --retry-delay 30 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-            "model": "tinyllama-1.1b-chat",
-            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}],
-            }'
+          curl --retry 20 --retry-delay 30 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
+
+      - name: save logs
+        if: always()
+        run: |
+          docker logs testmodel > docker.log
+
+      - name: publish logs
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: docker-logs
+          path: docker.log
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
index 935d007c..aeefa534 100644
--- a/test/aikitfile.yaml
+++ b/test/aikitfile.yaml
@@ -9,9 +9,9 @@ config: |
   - name: tinyllama-1.1b-chat
     backend: llama
     parameters:
-      top_k: 80
-      temperature: 0.2
-      top_p: 0.7
+      top_k: 40
+      temperature: 0
+      top_p: 0.1
       model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
     context_size: 1000
     mmap: false

From 0ea86cee1f28291a0d7c1264ac8bb3df6148dd8c Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Mon, 4 Dec 2023 04:38:42 +0000
Subject: [PATCH 8/9] test

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/release.yaml       | 6 +++---
 .github/workflows/test-docker.yaml   | 2 +-
 .github/workflows/update-models.yaml | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 610773fe..488e3d66 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -63,6 +63,6 @@ jobs:
         env:
           DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: |
-          cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \
-            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/release.yaml@refs/heads/main \
-            --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq
+          cosign verify ghcr.io/sozercan/aikit@${DIGEST} \
+            --certificate-oidc-issuer https://token.actions.githubusercontent.com \
+            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/release.yaml@refs/heads/main
diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
index b1d0098b..f7900b2c 100644
--- a/.github/workflows/test-docker.yaml
+++ b/.github/workflows/test-docker.yaml
@@ -67,7 +67,7 @@ jobs:
 
       - name: run test
         run: |
-          curl --retry 20 --retry-delay 30 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
+          curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
 
       - name: save logs
         if: always()
diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
index 23b4a5d7..f76f98fd 100644
--- a/.github/workflows/update-models.yaml
+++ b/.github/workflows/update-models.yaml
@@ -52,8 +52,8 @@ jobs:
           push: true
           tags: ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}
           file: models/${{ matrix.model }}.yaml
-          cache-from: type=gha,scope=${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}
-          cache-to: type=gha,scope=${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }},mode=max
+          cache-from: type=gha,scope=${{ env.MODEL_NAME }}-${{ env.MODEL_SIZE }}
+          cache-to: type=gha,scope=${{ env.MODEL_NAME }}-${{ env.MODEL_SIZE }},mode=max
 
       - name: Sign the images with GitHub OIDC Token
         env:
@@ -65,5 +65,5 @@ jobs:
           DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: |
           cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \
-            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/update-models.yaml@refs/heads/main \
-            --certificate-oidc-issuer https://token.actions.githubusercontent.com | jq
+            --certificate-oidc-issuer https://token.actions.githubusercontent.com \
+            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/update-models.yaml@refs/heads/main

From 1324ee2154c7a0cef6848b5d2dc39aed423517af Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Mon, 4 Dec 2023 07:17:23 +0000
Subject: [PATCH 9/9] test

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/test-docker.yaml |  2 +-
 pkg/aikit2llb/convert.go           |  7 ++++---
 test/aikitfile-cuda.yaml           | 14 +++++++-------
 test/aikitfile.yaml                | 19 +++++++++----------
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
index f7900b2c..a70ae078 100644
--- a/.github/workflows/test-docker.yaml
+++ b/.github/workflows/test-docker.yaml
@@ -67,7 +67,7 @@ jobs:
 
       - name: run test
         run: |
-          curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
+          curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
 
       - name: save logs
         if: always()
diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go
index 634f95f7..b90b1d03 100644
--- a/pkg/aikit2llb/convert.go
+++ b/pkg/aikit2llb/convert.go
@@ -25,10 +25,10 @@ func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
 	s, merge = copyModels(c, s)
 	s, merge = addLocalAI(c, s, merge)
 	if c.Runtime == utils.RuntimeNVIDIA {
-		s = installCuda(s, merge)
+		merge = installCuda(s, merge)
 	}
 	imageCfg := NewImageConfig(c)
-	return s, imageCfg
+	return merge, imageCfg
 }
 
 func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
@@ -87,8 +87,9 @@ func installCuda(s llb.State, merge llb.State) llb.State {
 		llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst
 	)
 	s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
+	s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root()
 	savedState := s
-	s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root()
+	s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root()
 
 	diff := llb.Diff(savedState, s)
 	merge = llb.Merge([]llb.State{merge, diff})
diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml
index 24bd2d4e..c282fc98 100644
--- a/test/aikitfile-cuda.yaml
+++ b/test/aikitfile-cuda.yaml
@@ -3,19 +3,19 @@ apiVersion: v1alpha1
 debug: true
 runtime: cuda
 models:
-  - name: tinyllama-1.1b-chat
-    source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f"
+  - name: llama-2-7b-chat
+    source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+    sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
 config: |
-  - name: tinyllama-1.1b-chat
+  - name: llama-2-7b-chat
     backend: llama
     parameters:
       top_k: 80
       temperature: 0.2
       top_p: 0.7
-      model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    context_size: 1000
-    gpu_layers: 10
+      model: llama-2-7b-chat.Q4_K_M.gguf
+    context_size: 4096
+    gpu_layers: 35
     f16: true
     batch: 512
     mmap: true
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
index aeefa534..d88de38b 100644
--- a/test/aikitfile.yaml
+++ b/test/aikitfile.yaml
@@ -2,16 +2,15 @@
 apiVersion: v1alpha1
 debug: true
 models:
-  - name: tinyllama-1.1b-chat
-    source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f"
+  - name: llama-2-7b-chat
+    source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+    sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
 config: |
-  - name: tinyllama-1.1b-chat
+  - name: llama-2-7b-chat
     backend: llama
     parameters:
-      top_k: 40
-      temperature: 0
-      top_p: 0.1
-      model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    context_size: 1000
-    mmap: false
+      top_k: 80
+      temperature: 0.2
+      top_p: 0.7
+      model: llama-2-7b-chat.Q4_K_M.gguf
+    context_size: 4096