From 1324ee2154c7a0cef6848b5d2dc39aed423517af Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 4 Dec 2023 07:17:23 +0000 Subject: [PATCH] test Signed-off-by: Sertac Ozercan --- .github/workflows/test-docker.yaml | 2 +- pkg/aikit2llb/convert.go | 7 ++++--- test/aikitfile-cuda.yaml | 14 +++++++------- test/aikitfile.yaml | 19 +++++++++---------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml index f7900b2c..a70ae078 100644 --- a/.github/workflows/test-docker.yaml +++ b/.github/workflows/test-docker.yaml @@ -67,7 +67,7 @@ jobs: - name: run test run: | - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}' + curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}' - name: save logs if: always() diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go index 634f95f7..b90b1d03 100644 --- a/pkg/aikit2llb/convert.go +++ b/pkg/aikit2llb/convert.go @@ -25,10 +25,10 @@ func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) { s, merge = copyModels(c, s) s, merge = addLocalAI(c, s, merge) if c.Runtime == utils.RuntimeNVIDIA { - s = installCuda(s, merge) + merge = installCuda(s, merge) } imageCfg := NewImageConfig(c) - return s, imageCfg + return merge, imageCfg } func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) { @@ -87,8 +87,9 @@ func installCuda(s llb.State, merge llb.State) llb.State { llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst ) s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root() + s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root() savedState := s - s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root() + s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root() diff := llb.Diff(savedState, s) merge = llb.Merge([]llb.State{merge, diff}) diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml index 24bd2d4e..c282fc98 100644 --- a/test/aikitfile-cuda.yaml +++ b/test/aikitfile-cuda.yaml @@ -3,19 +3,19 @@ apiVersion: v1alpha1 debug: true runtime: cuda models: - - name: tinyllama-1.1b-chat - source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf - sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f" + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf + sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" config: | - - name: tinyllama-1.1b-chat + - name: llama-2-7b-chat backend: llama parameters: top_k: 80 temperature: 0.2 top_p: 0.7 - model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf - context_size: 1000 - gpu_layers: 10 + model: llama-2-7b-chat.Q4_K_M.gguf + context_size: 4096 + gpu_layers: 35 f16: true batch: 512 mmap: true diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml index aeefa534..d88de38b 100644 --- a/test/aikitfile.yaml +++ b/test/aikitfile.yaml @@ -2,16 +2,15 @@ apiVersion: v1alpha1 debug: true models: - - name: tinyllama-1.1b-chat - source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf - sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f" + - name: llama-2-7b-chat + source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf + sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" config: | - - name: tinyllama-1.1b-chat + - name: llama-2-7b-chat backend: llama parameters: - top_k: 40 - temperature: 0 - top_p: 0.1 - model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf - context_size: 1000 - mmap: false + top_k: 80 + temperature: 0.2 + top_p: 0.7 + model: llama-2-7b-chat.Q4_K_M.gguf + context_size: 4096