test

Signed-off-by: Sertac Ozercan <[email protected]>
sozercan · Dec 4, 2023 · 8b414fc · 8b414fc
1 parent 0ea86ce
commit 8b414fc
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 22 deletions.
diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
@@ -67,7 +67,7 @@ jobs:
 
       - name: run test
         run: |
-          curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
+          curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
 
       - name: save logs
         if: always()

diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go
@@ -25,10 +25,10 @@ func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
 	s, merge = copyModels(c, s)
 	s, merge = addLocalAI(c, s, merge)
 	if c.Runtime == utils.RuntimeNVIDIA {
-		s = installCuda(s, merge)
+		merge = installCuda(s, merge)
 	}
 	imageCfg := NewImageConfig(c)
-	return s, imageCfg
+	return merge, imageCfg
 }
 
 func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
@@ -87,8 +87,9 @@ func installCuda(s llb.State, merge llb.State) llb.State {
 		llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst
 	)
 	s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
+	s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root()
 	savedState := s
-	s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root()
+	s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root()
 
 	diff := llb.Diff(savedState, s)
 	merge = llb.Merge([]llb.State{merge, diff})

diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml
@@ -1,21 +1,20 @@
 #syntax=aikit:test
 apiVersion: v1alpha1
 debug: true
-runtime: cuda
 models:
-  - name: tinyllama-1.1b-chat
-    source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f"
+  - name: llama-2-7b-chat
+    source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+    sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
 config: |
-  - name: tinyllama-1.1b-chat
+  - name: llama-2-7b-chat
     backend: llama
     parameters:
       top_k: 80
       temperature: 0.2
       top_p: 0.7
-      model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    context_size: 1000
-    gpu_layers: 10
+      model: llama-2-7b-chat.Q4_K_M.gguf
+    context_size: 4096
+    gpu_layers: 35
     f16: true
     batch: 512
     mmap: true
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
@@ -2,16 +2,15 @@
 apiVersion: v1alpha1
 debug: true
 models:
-  - name: tinyllama-1.1b-chat
-    source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f"
+  - name: llama-2-7b-chat
+    source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+    sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
 config: |
-  - name: tinyllama-1.1b-chat
+  - name: llama-2-7b-chat
     backend: llama
     parameters:
-      top_k: 40
-      temperature: 0
-      top_p: 0.1
-      model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    context_size: 1000
-    mmap: false
+      top_k: 80
+      temperature: 0.2
+      top_p: 0.7
+      model: llama-2-7b-chat.Q4_K_M.gguf
+    context_size: 4096