From 1324ee2154c7a0cef6848b5d2dc39aed423517af Mon Sep 17 00:00:00 2001
From: Sertac Ozercan <sozercan@gmail.com>
Date: Mon, 4 Dec 2023 07:17:23 +0000
Subject: [PATCH] test

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/test-docker.yaml |  2 +-
 pkg/aikit2llb/convert.go           |  7 ++++---
 test/aikitfile-cuda.yaml           | 14 +++++++-------
 test/aikitfile.yaml                | 19 +++++++++----------
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/test-docker.yaml b/.github/workflows/test-docker.yaml
index f7900b2c..a70ae078 100644
--- a/.github/workflows/test-docker.yaml
+++ b/.github/workflows/test-docker.yaml
@@ -67,7 +67,7 @@ jobs:
 
       - name: run test
         run: |
-          curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
+          curl http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "tinyllama-1.1b-chat", "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]}'
 
       - name: save logs
         if: always()
diff --git a/pkg/aikit2llb/convert.go b/pkg/aikit2llb/convert.go
index 634f95f7..b90b1d03 100644
--- a/pkg/aikit2llb/convert.go
+++ b/pkg/aikit2llb/convert.go
@@ -25,10 +25,10 @@ func Aikit2LLB(c *config.Config) (llb.State, *specs.Image) {
 	s, merge = copyModels(c, s)
 	s, merge = addLocalAI(c, s, merge)
 	if c.Runtime == utils.RuntimeNVIDIA {
-		s = installCuda(s, merge)
+		merge = installCuda(s, merge)
 	}
 	imageCfg := NewImageConfig(c)
-	return s, imageCfg
+	return merge, imageCfg
 }
 
 func copyModels(c *config.Config, s llb.State) (llb.State, llb.State) {
@@ -87,8 +87,9 @@ func installCuda(s llb.State, merge llb.State) llb.State {
 		llb.WithCustomName("Copying "+fileNameFromURL(cudaKeyringURL)), //nolint: goconst
 	)
 	s = s.Run(shf("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb")).Root()
+	s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update"), llb.IgnoreCache).Root()
 	savedState := s
-	s = s.Run(shf("apt-get update && apt-get install -y ca-certificates && apt-get update && apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion), llb.IgnoreCache).Root()
+	s = s.Run(shf("apt-get install -y libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean", cudaVersion)).Root()
 
 	diff := llb.Diff(savedState, s)
 	merge = llb.Merge([]llb.State{merge, diff})
diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml
index 24bd2d4e..c282fc98 100644
--- a/test/aikitfile-cuda.yaml
+++ b/test/aikitfile-cuda.yaml
@@ -3,19 +3,19 @@ apiVersion: v1alpha1
 debug: true
 runtime: cuda
 models:
-  - name: tinyllama-1.1b-chat
-    source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f"
+  - name: llama-2-7b-chat
+    source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+    sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
 config: |
-  - name: tinyllama-1.1b-chat
+  - name: llama-2-7b-chat
     backend: llama
     parameters:
       top_k: 80
       temperature: 0.2
       top_p: 0.7
-      model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    context_size: 1000
-    gpu_layers: 10
+      model: llama-2-7b-chat.Q4_K_M.gguf
+    context_size: 4096
+    gpu_layers: 35
     f16: true
     batch: 512
     mmap: true
diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml
index aeefa534..d88de38b 100644
--- a/test/aikitfile.yaml
+++ b/test/aikitfile.yaml
@@ -2,16 +2,15 @@
 apiVersion: v1alpha1
 debug: true
 models:
-  - name: tinyllama-1.1b-chat
-    source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f"
+  - name: llama-2-7b-chat
+    source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
+    sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa"
 config: |
-  - name: tinyllama-1.1b-chat
+  - name: llama-2-7b-chat
     backend: llama
     parameters:
-      top_k: 40
-      temperature: 0
-      top_p: 0.1
-      model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf
-    context_size: 1000
-    mmap: false
+      top_k: 80
+      temperature: 0.2
+      top_p: 0.7
+      model: llama-2-7b-chat.Q4_K_M.gguf
+    context_size: 4096