From a5223b1a1bf8f8809788edc08bc42ed292dc0340 Mon Sep 17 00:00:00 2001 From: Sertac Ozercan Date: Mon, 4 Dec 2023 01:17:20 +0000 Subject: [PATCH] use tinyllama Signed-off-by: Sertac Ozercan --- test/aikitfile-cuda.yaml | 33 +++++++-------------------------- test/aikitfile.yaml | 12 ++++++------ 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/test/aikitfile-cuda.yaml b/test/aikitfile-cuda.yaml index 17888400..24bd2d4e 100644 --- a/test/aikitfile-cuda.yaml +++ b/test/aikitfile-cuda.yaml @@ -3,38 +3,19 @@ apiVersion: v1alpha1 debug: true runtime: cuda models: - - name: llama-2-7b-chat - source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf - - name: mistral-7b-instruct - source: https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF/resolve/main/mistral-7b-openorca.Q6_K.gguf + - name: tinyllama-1.1b-chat + source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf + sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f" config: | - - name: llama-2-7b-chat + - name: tinyllama-1.1b-chat backend: llama parameters: top_k: 80 temperature: 0.2 top_p: 0.7 - model: llama-2-7b-chat.Q4_K_M.gguf - context_size: 4096 - gpu_layers: 35 + model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf + context_size: 1000 + gpu_layers: 10 f16: true batch: 512 mmap: true - - name: mistral-7b-instruct - context_size: 4096 - threads: 4 - parameters: - model: mistral-7b-openorca.Q6_K.gguf - temperature: 0.2 - top_k: 40 - top_p: 0.95 - template: - chat_message: chatml - chat: chatml-block - completion: completion - stopwords: - - <|im_end|> - gpu_layers: 35 - f16: true - batch: 512 - mmap: true \ No newline at end of file diff --git a/test/aikitfile.yaml b/test/aikitfile.yaml index d88de38b..7b11030d 100644 --- a/test/aikitfile.yaml +++ b/test/aikitfile.yaml @@ -2,15 +2,15 @@ apiVersion: v1alpha1 debug: true models: - - name: llama-2-7b-chat - source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf - sha256: "08a5566d61d7cb6b420c3e4387a39e0078e1f2fe5f055f3a03887385304d4bfa" + - name: tinyllama-1.1b-chat + source: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf + sha256: "2d3bd82534bb6c6e0f4be1761b29b25bdcd65855a022513cb65f24ad3b25e41f" config: | - - name: llama-2-7b-chat + - name: tinyllama-1.1b-chat backend: llama parameters: top_k: 80 temperature: 0.2 top_p: 0.7 - model: llama-2-7b-chat.Q4_K_M.gguf - context_size: 4096 + model: tinyllama-1.1b-chat-v0.3.Q4_K_M.gguf + context_size: 1000