sozercan · sozercan · Dec 4, 2023 · Dec 3, 2023 · Dec 4, 2023 · Dec 4, 2023
diff --git a/.github/workflows/pre-release.yaml b/.github/workflows/pre-release.yaml
@@ -8,35 +8,54 @@ on:
 permissions:
   contents: write
   packages: write
+  id-token: write
 
 jobs:
- release:
+  pre-release:
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:
       - uses: actions/checkout@v4
 
       - name: cleanup disk space
         run: |
-          df -H
           docker system prune -f -a --volumes
           sudo rm -rf /usr/share/dotnet
           sudo rm -rf /opt/ghc
           sudo rm -rf "/usr/local/share/boost"
           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-          df -H
 
-      - name: Login to ghcr
+      - name: Install Cosign
+        uses: sigstore/cosign-installer@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GHCR
         uses: docker/login-action@v3
         with:
           registry: ghcr.io
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: create buildx builder
-        run: docker buildx create --use --name builder --bootstrap
-      - uses: crazy-max/ghaction-github-runtime@v3
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        id: build-and-push
+        with:
+          push: true
+          tags: ghcr.io/sozercan/aikit:dev
+          cache-from: type=gha,scope=aikit
+          cache-to: type=gha,scope=aikit,mode=max
+
+      - name: Sign the images with GitHub OIDC Token
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}"
 
-      - name: Push aikit:dev to GHCR
+      - name: Verify image signature
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: |
-          docker buildx build -t ghcr.io/sozercan/aikit:dev --push .
+          cosign verify ghcr.io/sozercan/aikit@${DIGEST} \
+            --certificate-oidc-issuer https://token.actions.githubusercontent.com \
+            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/heads/main
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -8,17 +8,31 @@ on:
 permissions:
   contents: write
   packages: write
+  id-token: write
 
 jobs:
- release:
+  release:
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:
       - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
 
-      - name: Login to ghcr
+      - name: cleanup disk space
+        run: |
+          docker system prune -f -a --volumes
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf "/usr/local/share/boost"
+          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+
+      - name: Install Cosign
+        uses: sigstore/cosign-installer@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - uses: crazy-max/ghaction-github-runtime@v3
+
+      - name: Login to GHCR
         uses: docker/login-action@v3
         with:
           registry: ghcr.io
@@ -29,10 +43,26 @@ jobs:
         run: |
           echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
 
-      - name: Push aikit:latest to GHCR
-        run: |
-          docker buildx build . -t ghcr.io/sozercan/aikit:${TAG} -t ghcr.io/sozercan/aikit:latest --push --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        id: build-and-push
+        with:
+          push: true
+          tags: |
+            ghcr.io/sozercan/aikit:${TAG}
+            ghcr.io/sozercan/aikit:latest
+          cache-from: type=gha,scope=aikit
+          cache-to: type=gha,scope=aikit,mode=max
 
-      # - name: release llama 2 image
-      #   run: |
-      #     docker buildx build -t ghcr.io/sozercan/llama2:7b -t ghcr.io/sozercan/llama2:7b-chat -t ghcr.io/sozercan/llama2:chat -t ghcr.io/sozercan/llama2:latest -f models/llama-2-chat-7b.yaml --push --cache-from=type=gha,scope=llama-2-7b-chat --cache-to=type=gha,scope=llama-2-7b-chat,mode=max
+      - name: Sign the images with GitHub OIDC Token
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}"
+
+      - name: Verify image signature
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: |
+          cosign verify ghcr.io/sozercan/aikit@${DIGEST} \
+            --certificate-oidc-issuer https://token.actions.githubusercontent.com \
+            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/release.yaml@refs/heads/main
diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
@@ -23,32 +23,60 @@ jobs:
 
       - name: cleanup disk space
         run: |
-          df -H
           docker system prune -f -a --volumes
           sudo rm -rf /usr/share/dotnet
           sudo rm -rf /opt/ghc
           sudo rm -rf "/usr/local/share/boost"
           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
-          df -H
 
-      - name: create buildx builder
-        run: docker buildx create --use --name builder --bootstrap
+      # need containerd image store for testing local images
+      - uses: crazy-max/ghaction-setup-docker@v2
+        with:
+          daemon-config: |
+            {
+              "debug": true,
+              "features": {
+                "containerd-snapshotter": true
+              }
+            }
       - uses: crazy-max/ghaction-github-runtime@v3
 
       - name: build aikit
-        run: docker buildx build . -t sozercan/aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
+        run: |
+          docker buildx build . -t aikit:test \
+            --load --progress plain \
+            --cache-from=type=gha,scope=aikit \
+            --cache-to=type=gha,scope=aikit,mode=max
 
       - name: build test model
-        run: docker buildx build . -t sozercan/testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max
+        run: |
+          docker buildx build . -t testmodel:test \
+            -f test/aikitfile.yaml \
+            --load --progress plain \
+            --cache-from=type=gha,scope=testmodel \
+            --cache-to=type=gha,scope=testmodel,mode=max
 
       - name: list images
         run: docker images
 
       - name: run test model
-        run: docker run -d -p 8080:8080 sozercan/testmodel:test
+        run: docker run --name testmodel -d -p 8080:8080 testmodel:test
 
       - name: install e2e dependencies
         run: make test-e2e-dependencies
 
-      - name: run bats test
-        run: make test-e2e
+      - name: run test
+        run: |
+          curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "llama-2-7b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
+
+      - name: save logs
+        if: always()
+        run: |
+          docker logs testmodel > docker.log
+
+      - name: publish logs
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: docker-logs
+          path: docker.log
diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
@@ -6,33 +6,64 @@ on:
 permissions:
   contents: write
   packages: write
+  id-token: write
 
 jobs:
  update-models:
+    strategy:
+      fail-fast: false
+      matrix:
+        model:
+         - llama-2-7b-chat
+         - llama-2-13b-chat
+         - orca-2-13b
+         - llama-2-7b-chat-cuda
+         - llama-2-13b-chat-cuda
+         - orca-2-13b-cuda
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:
       - uses: actions/checkout@v4
 
+      - name: Install Cosign
+        uses: sigstore/cosign-installer@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - uses: crazy-max/ghaction-github-runtime@v3
+
       - name: Login to GHCR
         uses: docker/login-action@v3
         with:
           registry: ghcr.io
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Push llama 2 models to GHCR
+      - name: parse matrix
+        run: |
+          echo "MODEL_NAME=$(echo ${{ matrix.model }} | cut -d'-' -f1-2 | sed 's/-//g')" >> $GITHUB_ENV
+          echo "MODEL_SIZE=$(echo ${{ matrix.model }} | rev | cut -d'-' -f2 | rev)" >> $GITHUB_ENV
+          echo "MODEL_RUNTIME=-$(echo ${{ matrix.model }} | rev | cut -d'-' -f1 | rev)" >> $GITHUB_ENV
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        id: build-and-push
+        with:
+          push: true
+          tags: ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}
+          file: models/${{ matrix.model }}.yaml
+          cache-from: type=gha,scope=${{ env.MODEL_NAME }}-${{ env.MODEL_SIZE }}
+          cache-to: type=gha,scope=${{ env.MODEL_NAME }}-${{ env.MODEL_SIZE }},mode=max
+
+      - name: Sign the images with GitHub OIDC Token
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: cosign sign --yes "ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST}"
+
+      - name: Verify image signature
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
         run: |
-          docker buildx create --use --name builder --bootstrap
-
-          # cpu avx
-          MODELS_PATH=models
-          docker buildx build . -t ghcr.io/sozercan/llama2:7b -f ${MODELS_PATH}/llama-2-7b-chat.yaml --push
-          docker buildx build . -t ghcr.io/sozercan/llama2:13b -f ${MODELS_PATH}/llama-2-13b-chat.yaml --push
-          docker buildx build . -t ghcr.io/sozercan/orca2:13b -f ${MODELS_PATH}/orca-2-13b.yaml --push
-
-          # cuda
-          CUDA_MODELS_PATH=models/cuda
-          docker buildx build . -t ghcr.io/sozercan/llama2:7b-cuda -f ${CUDA_MODELS_PATH}/llama-2-7b-chat.yaml --push
-          docker buildx build . -t ghcr.io/sozercan/llama2:13b-cuda -f ${CUDA_MODELS_PATH}/llama-2-13b-chat.yaml --push
-          docker buildx build . -t ghcr.io/sozercan/orca2:13b-cuda -f ${CUDA_MODELS_PATH}/orca-2-13b.yaml --push
+          cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \
+            --certificate-oidc-issuer https://token.actions.githubusercontent.com \
+            --certificate-identity https://github.com/sozercan/aikit/.github/workflows/update-models.yaml@refs/heads/main
diff --git a/models/cuda/llama-2-13b-chat.yaml → models/llama-2-13b-chat-cuda.yaml b/models/cuda/llama-2-13b-chat.yaml → models/llama-2-13b-chat-cuda.yaml
diff --git a/models/cuda/llama-2-7b-chat.yaml → models/llama-2-7b-chat-cuda.yaml b/models/cuda/llama-2-7b-chat.yaml → models/llama-2-7b-chat-cuda.yaml
diff --git a/models/cuda/orca-2-13b.yaml → models/orca-2-13b-cuda.yaml b/models/cuda/orca-2-13b.yaml → models/orca-2-13b-cuda.yaml
diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go
@@ -25,10 +25,10 @@ func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
 	s, merge = copyModels(c, s)
 	s, merge = addLocalAI(c, s, merge)
 	if c.Runtime == utils.RuntimeNVIDIA {
-		s = installCuda(s, merge)
+		merge = installCuda(s, merge)
 	}
 	imageCfg := NewImageConfig(c)
-	return s, imageCfg
+	return merge, imageCfg
 }
 
 func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
@@ -87,8 +87,9 @@ func installCuda(s llb.State, merge llb.State) llb.State {
 		llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst
 	)
 	s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
+	s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root()
 	savedState := s
-	s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root()
+	s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root()
 
 	diff := llb.Diff(savedState, s)
 	merge = llb.Merge([]llb.State{merge, diff})

diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml
@@ -1,12 +1,11 @@
-#syntax=ghcr.io/sozercan/aikit:latest
+#syntax=aikit:test
 apiVersion: v1alpha1
 debug: true
 runtime: cuda
 models:
   - name: llama-2-7b-chat
     source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
-  - name: mistral-7b-instruct
-    source: https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf
+    sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
 config: |
   - name: llama-2-7b-chat
     backend: llama
@@ -20,21 +19,3 @@ config: |
     f16: true
     batch: 512
     mmap: true
-  - name: mistral-7b-instruct
-    context_size: 4096
-    threads: 4
-    parameters:
-      model: mistral-7b-openorca.Q6_K.gguf
-      temperature: 0.2
-      top_k: 40
-      top_p: 0.95
-    template:
-      chat_message: chatml
-      chat: chatml-block
-      completion: completion
-    stopwords:
-    - <|im_end|>
-    gpu_layers: 35
-    f16: true
-    batch: 512
-    mmap: true
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
@@ -1,4 +1,4 @@
-#syntax=sozercan/aikit:test
+#syntax=aikit:test
 apiVersion: v1alpha1
 debug: true
 models:

diff --git a/test/bats/test.bats b/test/bats/test.bats
@@ -5,9 +5,9 @@ load helpers
 WAIT_TIME=120
 SLEEP_TIME=1
 
-@test "send request to llama-2-7b-chat" {
+@test "send request to tinyllama-1.1b-chat" {
     run curl --retry 20 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
-     "model": "llama-2-7b-chat",
+     "model": "tinyllama-1.1b-chat",
      "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}],
     }'
     assert_success