Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: update release process and sign #14

Merged
merged 10 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 28 additions & 9 deletions .github/workflows/pre-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,54 @@ on:
permissions:
contents: write
packages: write
id-token: write

jobs:
release:
pre-release:
runs-on: ubuntu-latest
timeout-minutes: 360
steps:
- uses: actions/checkout@v4

- name: cleanup disk space
run: |
df -H
docker system prune -f -a --volumes
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
df -H

- name: Login to ghcr
- name: Install Cosign
uses: sigstore/cosign-installer@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: create buildx builder
run: docker buildx create --use --name builder --bootstrap
- uses: crazy-max/ghaction-github-runtime@v3
- name: Build and push
uses: docker/build-push-action@v5
id: build-and-push
with:
push: true
tags: ghcr.io/sozercan/aikit:dev
cache-from: type=gha,scope=aikit
cache-to: type=gha,scope=aikit,mode=max

- name: Sign the images with GitHub OIDC Token
env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}"

- name: Push aikit:dev to GHCR
- name: Verify image signature
env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
run: |
docker buildx build -t ghcr.io/sozercan/aikit:dev --push .
cosign verify ghcr.io/sozercan/aikit@${DIGEST} \
--certificate-oidc-issuer https://token.actions.githubusercontent.com \
--certificate-identity https://github.com/sozercan/aikit/.github/workflows/pre-release.yaml@refs/heads/main
50 changes: 40 additions & 10 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,31 @@ on:
permissions:
contents: write
packages: write
id-token: write

jobs:
release:
release:
runs-on: ubuntu-latest
timeout-minutes: 360
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Login to ghcr
- name: cleanup disk space
run: |
docker system prune -f -a --volumes
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"

- name: Install Cosign
uses: sigstore/cosign-installer@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- uses: crazy-max/ghaction-github-runtime@v3

- name: Login to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
Expand All @@ -29,10 +43,26 @@ jobs:
run: |
echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV

- name: Push aikit:latest to GHCR
run: |
docker buildx build . -t ghcr.io/sozercan/aikit:${TAG} -t ghcr.io/sozercan/aikit:latest --push --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
- name: Build and push
uses: docker/build-push-action@v5
id: build-and-push
with:
push: true
tags: |
ghcr.io/sozercan/aikit:${TAG}
ghcr.io/sozercan/aikit:latest
cache-from: type=gha,scope=aikit
cache-to: type=gha,scope=aikit,mode=max

# - name: release llama 2 image
# run: |
# docker buildx build -t ghcr.io/sozercan/llama2:7b -t ghcr.io/sozercan/llama2:7b-chat -t ghcr.io/sozercan/llama2:chat -t ghcr.io/sozercan/llama2:latest -f models/llama-2-chat-7b.yaml --push --cache-from=type=gha,scope=llama-2-7b-chat --cache-to=type=gha,scope=llama-2-7b-chat,mode=max
- name: Sign the images with GitHub OIDC Token
env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
run: cosign sign --yes "ghcr.io/sozercan/aikit@${DIGEST}"

- name: Verify image signature
env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
run: |
cosign verify ghcr.io/sozercan/aikit@${DIGEST} \
--certificate-oidc-issuer https://token.actions.githubusercontent.com \
--certificate-identity https://github.com/sozercan/aikit/.github/workflows/release.yaml@refs/heads/main
46 changes: 37 additions & 9 deletions .github/workflows/test-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,60 @@ jobs:

- name: cleanup disk space
run: |
df -H
docker system prune -f -a --volumes
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
df -H

- name: create buildx builder
run: docker buildx create --use --name builder --bootstrap
# need containerd image store for testing local images
- uses: crazy-max/ghaction-setup-docker@v2
with:
daemon-config: |
{
"debug": true,
"features": {
"containerd-snapshotter": true
}
}
- uses: crazy-max/ghaction-github-runtime@v3

- name: build aikit
run: docker buildx build . -t sozercan/aikit:test --load --cache-from=type=gha,scope=aikit --cache-to=type=gha,scope=aikit,mode=max
run: |
docker buildx build . -t aikit:test \
--load --progress plain \
--cache-from=type=gha,scope=aikit \
--cache-to=type=gha,scope=aikit,mode=max

- name: build test model
run: docker buildx build . -t sozercan/testmodel:test -f test/aikitfile.yaml --load --cache-from=type=gha,scope=testmodel --cache-to=type=gha,scope=testmodel,mode=max
run: |
docker buildx build . -t testmodel:test \
-f test/aikitfile.yaml \
--load --progress plain \
--cache-from=type=gha,scope=testmodel \
--cache-to=type=gha,scope=testmodel,mode=max

- name: list images
run: docker images

- name: run test model
run: docker run -d -p 8080:8080 sozercan/testmodel:test
run: docker run --name testmodel -d -p 8080:8080 testmodel:test

- name: install e2e dependencies
run: make test-e2e-dependencies

- name: run bats test
run: make test-e2e
- name: run test
run: |
curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "llama-2-7b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'

- name: save logs
if: always()
run: |
docker logs testmodel > docker.log

- name: publish logs
if: always()
uses: actions/upload-artifact@v3
with:
name: docker-logs
path: docker.log
59 changes: 45 additions & 14 deletions .github/workflows/update-models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,64 @@ on:
permissions:
contents: write
packages: write
id-token: write

jobs:
update-models:
strategy:
fail-fast: false
matrix:
model:
- llama-2-7b-chat
- llama-2-13b-chat
- orca-2-13b
- llama-2-7b-chat-cuda
- llama-2-13b-chat-cuda
- orca-2-13b-cuda
runs-on: ubuntu-latest
timeout-minutes: 360
steps:
- uses: actions/checkout@v4

- name: Install Cosign
uses: sigstore/cosign-installer@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- uses: crazy-max/ghaction-github-runtime@v3

- name: Login to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Push llama 2 models to GHCR
- name: parse matrix
run: |
echo "MODEL_NAME=$(echo ${{ matrix.model }} | cut -d'-' -f1-2 | sed 's/-//g')" >> $GITHUB_ENV
echo "MODEL_SIZE=$(echo ${{ matrix.model }} | rev | cut -d'-' -f2 | rev)" >> $GITHUB_ENV
echo "MODEL_RUNTIME=-$(echo ${{ matrix.model }} | rev | cut -d'-' -f1 | rev)" >> $GITHUB_ENV

- name: Build and push
uses: docker/build-push-action@v5
id: build-and-push
with:
push: true
tags: ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}
file: models/${{ matrix.model }}.yaml
cache-from: type=gha,scope=${{ env.MODEL_NAME }}-${{ env.MODEL_SIZE }}
cache-to: type=gha,scope=${{ env.MODEL_NAME }}-${{ env.MODEL_SIZE }},mode=max

- name: Sign the images with GitHub OIDC Token
env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
run: cosign sign --yes "ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST}"

- name: Verify image signature
env:
DIGEST: ${{ steps.build-and-push.outputs.digest }}
run: |
docker buildx create --use --name builder --bootstrap

# cpu avx
MODELS_PATH=models
docker buildx build . -t ghcr.io/sozercan/llama2:7b -f ${MODELS_PATH}/llama-2-7b-chat.yaml --push
docker buildx build . -t ghcr.io/sozercan/llama2:13b -f ${MODELS_PATH}/llama-2-13b-chat.yaml --push
docker buildx build . -t ghcr.io/sozercan/orca2:13b -f ${MODELS_PATH}/orca-2-13b.yaml --push

# cuda
CUDA_MODELS_PATH=models/cuda
docker buildx build . -t ghcr.io/sozercan/llama2:7b-cuda -f ${CUDA_MODELS_PATH}/llama-2-7b-chat.yaml --push
docker buildx build . -t ghcr.io/sozercan/llama2:13b-cuda -f ${CUDA_MODELS_PATH}/llama-2-13b-chat.yaml --push
docker buildx build . -t ghcr.io/sozercan/orca2:13b-cuda -f ${CUDA_MODELS_PATH}/orca-2-13b.yaml --push
cosign verify ghcr.io/sozercan/${{ env.MODEL_NAME }}:${{ env.MODEL_SIZE }}${{ env.MODEL_RUNTIME }}@${DIGEST} \
--certificate-oidc-issuer https://token.actions.githubusercontent.com \
--certificate-identity https://github.com/sozercan/aikit/.github/workflows/update-models.yaml@refs/heads/main
File renamed without changes.
File renamed without changes.
File renamed without changes.
7 changes: 4 additions & 3 deletions pkg/aikit2llb/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
s, merge = copyModels(c, s)
s, merge = addLocalAI(c, s, merge)
if c.Runtime == utils.RuntimeNVIDIA {
s = installCuda(s, merge)
merge = installCuda(s, merge)
}
imageCfg := NewImageConfig(c)
return s, imageCfg
return merge, imageCfg
}

func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
Expand Down Expand Up @@ -87,8 +87,9 @@ func installCuda(s llb.State, merge llb.State) llb.State {
llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst
)
s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root()
savedState := s
s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root()
s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root()

diff := llb.Diff(savedState, s)
merge = llb.Merge([]llb.State{merge, diff})
Expand Down
23 changes: 2 additions & 21 deletions test/aikitfile-cuda.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
#syntax=ghcr.io/sozercan/aikit:latest
#syntax=aikit:test
apiVersion: v1alpha1
debug: true
runtime: cuda
models:
- name: llama-2-7b-chat
source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
- name: mistral-7b-instruct
source: https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf
sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
config: |
- name: llama-2-7b-chat
backend: llama
Expand All @@ -20,21 +19,3 @@ config: |
f16: true
batch: 512
mmap: true
- name: mistral-7b-instruct
context_size: 4096
threads: 4
parameters:
model: mistral-7b-openorca.Q6_K.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
template:
chat_message: chatml
chat: chatml-block
completion: completion
stopwords:
- <|im_end|>
gpu_layers: 35
f16: true
batch: 512
mmap: true
2 changes: 1 addition & 1 deletion test/aikitfile.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#syntax=sozercan/aikit:test
#syntax=aikit:test
apiVersion: v1alpha1
debug: true
models:
Expand Down
4 changes: 2 additions & 2 deletions test/bats/test.bats
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ load helpers
WAIT_TIME=120
SLEEP_TIME=1

@test "send request to llama-2-7b-chat" {
@test "send request to tinyllama-1.1b-chat" {
run curl --retry 20 --retry-all-errors http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llama-2-7b-chat",
"model": "tinyllama-1.1b-chat",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}],
}'
assert_success
Expand Down
Loading