swapping standard mistral with granite as default

Signed-off-by: greg pereira <[email protected]>
containers · Apr 27, 2024 · 53a8ebe · 53a8ebe
1 parent 2444458
commit 53a8ebe
Show file tree

Hide file tree

Showing 13 changed files with 50 additions and 35 deletions.
diff --git a/.github/workflows/chatbot.yaml b/.github/workflows/chatbot.yaml
@@ -62,8 +62,8 @@ jobs:
         run: make install
 
       - name: Download model
-        working-directory: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }}
-        run: make download-model-mistral
+        working-directory: ./models
+        run: make download-model-granite
 
       - name: Run Functional Tests
         shell: bash

diff --git a/.github/workflows/model_servers.yaml b/.github/workflows/model_servers.yaml
@@ -27,13 +27,13 @@ jobs:
       matrix:
         include:
           - image_name: llamacpp_python
-            model: mistral
+            model: granite
             flavor: base
             directory: llamacpp_python
             platforms: linux/amd64,linux/arm64
             no_gpu: 1
           - image_name: llamacpp_python_cuda
-            model: mistral
+            model: granite
             flavor: cuda
             directory: llamacpp_python
             platforms: linux/amd64

diff --git a/.github/workflows/rag.yaml b/.github/workflows/rag.yaml
@@ -67,8 +67,8 @@ jobs:
         run: make install
 
       - name: Download model
-        working-directory: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }}
-        run: make download-model-mistral
+        working-directory: ./models
+        run: make download-model-granite
 
       - name: Run Functional Tests
         shell: bash

diff --git a/.github/workflows/summarizer.yaml b/.github/workflows/summarizer.yaml
@@ -62,8 +62,8 @@ jobs:
         run: make install
 
       - name: Download model
-        working-directory: ./recipes/natural_language_processing/${{ env.IMAGE_NAME }}
-        run: make download-model-mistral
+        working-directory: ./models
+        run: make download-model-granite
 
       - name: Run Functional Tests
         shell: bash

diff --git a/ailab-images.md b/ailab-images.md
@@ -1,19 +1,37 @@
-## Images (x86_64, aarch64) currently built from GH Actions in this repository
+## Model Server Images (amd64, arm64) currently built from GH Actions in this repository
 
 - quay.io/ai-lab/llamacpp_python:latest
-- quay.io/ai-lab/llamacpp_python_cuda:latest
-- quay.io/ai-lab/llamacpp_python_vulkan:latest
+- quay.io/ai-lab/llamacpp-python-cuda:latest
+- quay.io/ai-lab/llamacpp-python-vulkan:latest
+- quay.io/redhat-et/locallm-object-detection-server:latest
+
+## Recipe Images (amd64, arm64)
 - quay.io/ai-lab/summarizer:latest
 - quay.io/ai-lab/chatbot:latest
 - quay.io/ai-lab/rag:latest
 - quay.io/ai-lab/codegen:latest
-- quay.io/ai-lab/chromadb:latest
 - quay.io/redhat-et/locallm-object-detection-client:latest
-- quay.io/redhat-et/locallm-object-detection-server:latest
 
-## Model Images (x86_64, aarch64)
+## Dependency images
+
+Images used in the `Bootc` aspect of this repo or tooling images
+
+- quay.io/ai-lab/nvidia-builder:latest
+- quay.io/ai-lab/instructlab-nvidia:latest
+- quay.io/ai-lab/nvidia-bootc:latest
+
+- quay.io/ai-lab/chromadb:latest
+- quay.io/ai-lab/model-converter:latest
+
+## Model Images (amd64, arm64)
 
+- quay.io/ai-lab/merlinite-7b-lab:latest
+    - [model download link](https://huggingface.co/instructlab/merlinite-7b-lab-GGUF/resolve/main/merlinite-7b-lab-Q4_K_M.gguf)
+- quay.io/ai-lab/granite-7b-lab:latest
+    - [model download link](https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf)
 - quay.io/ai-lab/mistral-7b-instruct:latest
     - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf)
-- quay.io/ai-lab/codellama-7b:latest
-    - [model download link](https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf) 
+- quay.io/ai-lab/mistral-7b-code-16k-qlora:latest
+    - [model download link](https://huggingface.co/TheBloke/Mistral-7B-Code-16K-qlora-GGUF/resolve/main/mistral-7b-code-16k-qlora.Q4_K_M.gguf)
+- quay.io/ai-lab/whisper-small:latest
+    - [model download link](https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin)
diff --git a/model_servers/llamacpp_python/Makefile b/model_servers/llamacpp_python/Makefile
@@ -25,5 +25,5 @@ build-vulkan:
 
 .PHONY: download-model-granite # default model
 download-model-granite:
-	cd ../../models && \
-	make MODEL_NAME=granite-7b-lab-Q4_K_M.gguf MODEL_URL=https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf -f  Makefile download-model
+	cd ../../models/ && \
+	make download-model-granite
diff --git a/model_servers/llamacpp_python/tests/conftest.py b/model_servers/llamacpp_python/tests/conftest.py
@@ -13,7 +13,7 @@
     IMAGE_NAME = os.environ['IMAGE_NAME']
 
 if not 'MODEL_NAME' in os.environ:
-    MODEL_NAME = 'mistral-7b-instruct-v0.1.Q4_K_M.gguf'
+    MODEL_NAME = 'granite-7b-lab-Q4_K_M.gguf'
 else: 
     MODEL_NAME = os.environ['MODEL_NAME']
 

diff --git a/recipes/common/Makefile.common b/recipes/common/Makefile.common
@@ -4,7 +4,7 @@ REGISTRY_ORG ?= ai-lab
 IMAGE_NAME ?= $(REGISTRY_ORG)/${APP}:latest
 APP_IMAGE ?= $(REGISTRY)/$(IMAGE_NAME)
 CHROMADB_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/chromadb:latest
-MODEL_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/mistral-7b-instruct:latest
+MODEL_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/granite-7b-lab:latest
 SERVER_IMAGE ?= $(REGISTRY)/$(REGISTRY_ORG)/llamacpp_python:latest
 SSH_PUBKEY ?= $(shell cat ${HOME}/.ssh/id_rsa.pub;)
 BOOTC_IMAGE ?= quay.io/$(REGISTRY_ORG)/${APP}-bootc:latest
@@ -62,10 +62,10 @@ UNZIP_EXISTS ?= $(shell command -v unzip)
 RELATIVE_MODELS_PATH := ?=
 RELATIVE_TESTS_PATH := ?=
 
-MISTRAL_MODEL_NAME := mistral-7b-instruct-v0.1.Q4_K_M.gguf
-MISTRAL_MODEL_URL := https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
+GRANITE_MODEL_NAME := granite-7b-lab-Q4_K_M.gguf
+GRANITE_MODEL_URL := https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf
 
-MODEL_NAME ?= $(MISTRAL_MODEL_NAME)
+MODEL_NAME ?= $(GRANITE_MODEL_NAME)
 
 .PHONY: install
 install::

diff --git a/recipes/common/README.md b/recipes/common/README.md
@@ -28,7 +28,7 @@ used to override defaults for a variety of make targets.
 |BOOTC_IMAGE_BUILDER | Bootc Image Builder container image 	      	    | `quay.io/centos-bootc/bootc-image-builder`	      |
 |CHROMADB_IMAGE      | ChromaDB image to be used for application      	    | `$(REGISTRY)/$(REGISTRY_ORG)/chromadb:latest`	      |
 |DISK_TYPE           | Disk type to be created by BOOTC_IMAGE_BUILDER 	    | `qcow2` (Options: ami, iso, vmdk, raw)		      |
-|MODEL_IMAGE 	     | AI Model to be used by application             	    | `$(REGISTRY)/$(REGISTRY_ORG)/mistral-7b-instruct:latest`|
+|MODEL_IMAGE 	     | AI Model to be used by application             	    | `$(REGISTRY)/$(REGISTRY_ORG)/granite-7b-lab:latest`|
 |SERVER_IMAGE 	     | AI Model Server Application                    	    | `$(REGISTRY)/$(REGISTRY_ORG)/llamacpp_python:latest`    |
 |SSH_PUBKEY 	     | SSH Public key preloaded in bootc image.             | `$(shell cat ${HOME}/.ssh/id_rsa.pub;)`		      |
 |FROM 		     | Overrides first FROM instruction within Containerfile| `FROM` line defined in the Containerfile		      |

diff --git a/recipes/natural_language_processing/chatbot/bootc/Containerfile b/recipes/natural_language_processing/chatbot/bootc/Containerfile
@@ -14,7 +14,7 @@ RUN set -eu; mkdir -p /usr/ssh && \
     echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys
 
 ARG RECIPE=chatbot
-ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest
+ARG MODEL_IMAGE=quay.io/ai-lab/granite-7b-lab:latest
 ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest
 ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest
 ARG TARGETARCH

diff --git a/recipes/natural_language_processing/rag/README.md b/recipes/natural_language_processing/rag/README.md
@@ -52,18 +52,15 @@ In order to build this application we will need two models, a Vector Database, a
 
 ### Download models
 
-If you are just getting started, we recommend using [Mistral-7B-Instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1). This is a well
-performant mid-sized model with an apache-2.0 license. In order to use it with our Model Service we need it converted
-and quantized into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md). There are a number of
-ways to get a GGUF version of Mistral-7B, but the simplest is to download a pre-converted one from
-[huggingface.co](https://huggingface.co) here: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF.
+If you are just getting started, we recommend using [Granite-7B-Lab](https://huggingface.co/instructlab/granite-7b-lab-GGUF). This is a well
+performant mid-sized model with an apache-2.0 license that has been quanitzed and served into the [GGUF format](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md).
 
 The recommended model can be downloaded using the code snippet below:
 
 ```bash
-cd models
-wget https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf
-cd ../
+cd ../../../models
+curl -sLO https://huggingface.co/instructlab/granite-7b-lab-GGUF/resolve/main/granite-7b-lab-Q4_K_M.gguf
+cd ../recipes/natural_language_processing/rag
 ```
 
 _A full list of supported open models is forthcoming._  

diff --git a/recipes/natural_language_processing/rag/bootc/Containerfile b/recipes/natural_language_processing/rag/bootc/Containerfile
@@ -15,7 +15,7 @@ RUN set -eu; mkdir -p /usr/ssh && \
     echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys
 
 ARG RECIPE=rag
-ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest
+ARG MODEL_IMAGE=quay.io/ai-lab/granite-7b-lab:latest
 ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest
 ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest
 ARG CHROMADBImage=quay.io/ai-lab/chromadb

diff --git a/recipes/natural_language_processing/summarizer/bootc/Containerfile b/recipes/natural_language_processing/summarizer/bootc/Containerfile
@@ -14,7 +14,7 @@ RUN set -eu; mkdir -p /usr/ssh && \
     echo ${SSHPUBKEY} > /usr/ssh/root.keys && chmod 0600 /usr/ssh/root.keys
 
 ARG RECIPE=summarizer
-ARG MODEL_IMAGE=quay.io/ai-lab/mistral-7b-instruct:latest
+ARG MODEL_IMAGE=quay.io/ai-lab/granite-7b-lab:latest
 ARG APP_IMAGE=quay.io/ai-lab/${RECIPE}:latest
 ARG SERVER_IMAGE=quay.io/ai-lab/llamacpp_python:latest
 ARG TARGETARCH