diff --git a/models/llama-3.1-70b-instruct.yaml b/models/llama-3.1-70b-instruct.yaml
index 50db3322..b5d945da 100644
--- a/models/llama-3.1-70b-instruct.yaml
+++ b/models/llama-3.1-70b-instruct.yaml
@@ -4,8 +4,8 @@ debug: true
runtime: cuda
models:
- name: llama-3.1-70b-instruct
- source: https://huggingface.co/lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf
- sha256: "34d1d88b70a67dc19088ca84e226673962766607791882a7a85959b92857bcb3"
+ source: https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+ sha256: "3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab"
promptTemplates:
- name: chatMsg
template: |
@@ -63,7 +63,7 @@ config: |
response_regex:
- \w+)>(?P.*)
parameters:
- model: Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf
+ model: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
context_size: 8192
f16: true
template:
@@ -74,5 +74,5 @@ config: |
stopwords:
- <|im_end|>
-
- - \"<|eot_id|>\"
+ - <|eot_id|>
- <|end_of_text|>