From ffd4366d155e03def9fbf5998a790dcfea099221 Mon Sep 17 00:00:00 2001
From: //va <vabarbosa@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:22:33 -0500
Subject: [PATCH] Add local mode to Qiskit Code Assistant Guide (#2275)

this PR adds instruction for running the Qiskit Code Assistant locally.

Fixes https://github.com/Qiskit/documentation/issues/2251

this is a rough draft so feel free to edit and feedback is greatly
appreciated.


---
CC @cbjuan

---------

Co-authored-by: lucaburatti7 <32306492+lucaburatti7@users.noreply.github.com>
Co-authored-by: adarsh-tiwari17 <187846318+adarsh-tiwari17@users.noreply.github.com>
Co-authored-by: Juan Cruz-Benito <juan.cruz.benito@ibm.com>
Co-authored-by: abbycross <across@us.ibm.com>
Co-authored-by: Juan Cruz-Benito <2938045+cbjuan@users.noreply.github.com>
Co-authored-by: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com>
---
 docs/guides/_toc.json                       |   4 +
 docs/guides/qiskit-code-assistant-local.mdx | 201 ++++++++++++++++++++
 qiskit_bot.yaml                             |   7 +
 scripts/js/commands/checkPatternsIndex.ts   |   1 +
 4 files changed, 213 insertions(+)
 create mode 100644 docs/guides/qiskit-code-assistant-local.mdx
diff --git a/docs/guides/_toc.json b/docs/guides/_toc.json
index 738db97a12e..42d91cbb4f4 100644
--- a/docs/guides/_toc.json
+++ b/docs/guides/_toc.json
@@ -577,6 +577,10 @@
             {
               "title": "Use Qiskit Code Assistant in VS Code",
               "url": "/guides/qiskit-code-assistant-vscode"
+            },
+            {
+              "title": "Use Qiskit Code Assistant in local mode",
+              "url": "/guides/qiskit-code-assistant-local"
             }
           ]
         }
diff --git a/docs/guides/qiskit-code-assistant-local.mdx b/docs/guides/qiskit-code-assistant-local.mdx
new file mode 100644
index 00000000000..877cdf7d358
--- /dev/null
+++ b/docs/guides/qiskit-code-assistant-local.mdx
@@ -0,0 +1,201 @@
+---
+title: Use Qiskit Code Assistant in local mode
+description: Learn how to deploy and use the Qiskit Code Assistant model locally.
+---
+
+{/* cspell:ignore ollama, gguf */}
+
+# Use Qiskit Code Assistant in local mode
+
+Learn how to install, configure, and use the Qiskit Code Assistant model on your local machine.
+
+<Admonition type="note" title="Notes">
+    - Qiskit Code Assistant is in preview release status and is subject to change.
+    - If you have feedback or want to contact the developer team, use the [Qiskit Slack Workspace channel](https://qiskit.enterprise.slack.com/archives/C07LYA6PL83) or the related public GitHub repositories.
+</Admonition>
+
+## Download the Qiskit Code Assistant model
+
+The Qiskit Code Assistant model is available in  <DefinitionTooltip definition="GGUF is a binary format that is designed for fast loading and saving of models, and for ease of reading.">GGUF file format</DefinitionTooltip> and can be downloaded from the Hugging Face Hub in one of two ways.
+
+<details>
+
+<summary>Download from the Hugging Face website</summary>
+
+Follow these steps to download the Qiskit Code Assistant GGUF model from the Hugging Face website:
+
+1. Navigate to the [`granite-8b-qiskit`](https://huggingface.co/Qiskit/granite-8b-qiskit-GGUF) model page
+1. Go to the Files and Versions tab and download the GGUF model
+
+</details>
+
+
+<details>
+
+<summary>Download using the Hugging Face CLI</summary>
+
+To download the `granite-8b-qiskit` GGUF model using the Hugging Face CLI, follow these steps:
+
+1. Install the [Hugging Face CLI](https://huggingface.co/docs/huggingface_hub/main/en/guides/cli)
+1. Log in to your Hugging Face account
+
+    ```
+    huggingface-cli login
+    ```
+
+1. Download the `granite-8b-qiskit` GGUF model
+
+    ```
+    huggingface-cli download <HF REPO NAME> <GGUF PATH> --local-dir <LOCAL PATH>
+    ```
+
+</details>
+
+
+## Use the Qiskit Code Assistant model
+
+There are multiple ways to deploy and interact with the downloaded `granite-8b-qiskit` GGUF model. This guide demonstrates using [Ollama](https://ollama.com) as follows: either with the [Ollama application](#using-the-ollama-application) using the Hugging Face Hub integration or local model, or with the [`llama-cpp-python` package](#using-the-llama-cpp-python-package).
+
+### Using the Ollama application
+
+The Ollama application provides a simple solution to run the GGUF models locally. It is easy to use, with a CLI that makes the whole setup process, model management, and interaction fairly straightforward. It’s ideal for quick experimentation and for users that want fewer technical details to handle.
+
+#### Install Ollama
+
+1. Download the [Ollama application](https://ollama.com/download)
+1. Install the downloaded file
+1. Launch the installed Ollama application
+
+    <Admonition type="info">The application is running successfully when the Ollama icon appears in the desktop menu bar. You can also verify the service is running by going to http://localhost:11434/.</Admonition>
+
+1. Try Ollama in your terminal and start running models. For example:
+
+    ```
+    ollama run granite3-dense:8b
+    ```
+
+#### Set up Ollama using the Hugging Face Hub integration
+
+The [Ollama/Hugging Face Hub integration](https://huggingface.co/docs/hub/ollama) provides a way to interact with GGUF models hosted on the Hugging Face Hub without needing to create a new modelfile nor manually downloading the GGUF file. A default `template` and `params` files are already included for the GGUF model on the Hugging Face Hub.
+
+1. Make sure the Ollama application is running.
+
+1. Go the [`granite-8b-qiskit`](https://huggingface.co/Qiskit/granite-8b-qiskit-GGUF) GGUF model page, choose **ollama** from the **Use this model** dropdown.
+
+1. From your terminal, run the command:
+
+    ```
+    ollama run hf.co/Qiskit/granite-8b-qiskit-GGUF
+    ```
+
+#### Set up Ollama with the Qiskit Code Assistant GGUF model
+
+If you have manually downloaded the GGUF model and you want to experiment with different templates and parameters you can follow these steps to load it into your local Ollama application.
+
+1. Create a `Modelfile` entering the following content and be sure to update `<PATH-TO-GGUF-FILE>` to the actual path of your downloaded model.
+
+    ```
+    FROM <PATH-TO-GGUF-FILE>
+    TEMPLATE """{{ if .System }}
+    System:
+    {{ .System }}
+
+    {{ end }}{{ if .Prompt }}Question:
+    {{ .Prompt }}
+
+    {{ end }}Answer:
+    ```python{{ .Response }}
+    """
+
+    PARAMETER stop "Question:"
+    PARAMETER stop "Answer:"
+    PARAMETER stop "System:"
+    PARAMETER stop "```"
+
+    PARAMETER temperature 0
+    PARAMETER top_k 1
+    ```
+
+1. Run the following command to create a custom model instance based on the `Modelfile`.
+
+    ```
+    ollama create granite-8b-qiskit -f ./path-to-model-file
+    ```
+
+    <Admonition type="note">This process may take some time for Ollama to read the model file, initialize the model instance, and configure it according to the specifications provided.</Admonition>
+
+
+#### Run the Qiskit Code Assistant model in Ollama
+
+After the `granite-8b-qiskit` GGUF model has been set up in Ollama, run the following command to launch the model and interact with it in the terminal (in chat mode).
+
+```
+ollama run granite-8b-qiskit
+```
+
+Some useful commands:
+
+- `ollama list` - List models on your computer
+- `ollama rm granite-8b-qiskit` - Remove/delete the model
+- `ollama show granite-8b-qiskit` - Show model information
+- `ollama stop granite-8b-qiskit` - Stop a model that is currently running
+- `ollama ps` - List which models are currently loaded
+
+### Using the `llama-cpp-python` package
+
+An alternative to the Ollama application is the `llama-cpp-python` package, which is a Python binding for `llama.cpp`. It gives you more control and flexibility to run the GGUF model locally, and is ideal for users who wish to integrate the local model in their workflows and Python applications.
+
+1. Install [`llama-cpp-python`](https://pypi.org/project/llama-cpp-python/)
+1. Interact with the model from within your application using `llama_cpp`. For example:
+
+```python
+from llama_cpp import Llama
+
+model_path = <PATH-TO-GGUF-FILE>
+
+model = Llama(
+        model_path, 
+        seed=17, 
+        n_ctx=10000,
+        n_gpu_layers=37, # to offload in gpu, but put 0 if all in cpu
+    )
+
+input = 'Generate a quantum circuit with 2 qubits'
+raw_pred = model(input)["choices"][0]["text"]
+```
+
+You can also add text generation parameters to the model to customize the inference:
+
+```python
+generation_kwargs = {
+        "max_tokens": 512,
+        "echo": False, # Echo the prompt in the output
+        "top_k": 1
+    }
+
+raw_pred = model(input, **generation_kwargs)["choices"][0]["text"]
+```
+
+### Use the Qiskit Code Assistant extensions
+
+Use the VS Code extension and JupyterLab extension for the Qiskit Code Assistant to prompt the locally deployed `granite-8b-qiskit` GGUF model. Once you have the Ollama application [set up with the model](#using-the-ollama-application), you can configure the extensions to connect to the local service.
+
+
+#### Connect with the Qiskit Code Assistant VS Code extension
+
+With the Qiskit Code Assistant VS Code extension, you can interact with the model and perform code completion while writing your code. This can work well for users looking for assistance writing Qiskit code for their Python applications.
+
+1. Install the [Qiskit Code Assistant VS Code extension](/guides/qiskit-code-assistant-vscode).
+1. In VS Code, go to the **User Settings** and set the **Qiskit Code Assistant: Url** to the URL of your local Ollama deployment (for example, http://localhost:11434).
+1. Reload VS Code by going to **View > Command Palette...** and selecting **Developer: Reload Window**.
+
+The `granite-8b-qiskit` configured in Ollama should appear in the status bar and is then ready to use.
+
+#### Connect with the Qiskit Code Assistant JupyterLab extension
+
+With the Qiskit Code Assistant JupyterLab extension, you can interact with the model and perform code completion directly in your Jupyter Notebook. Users who predominantly work with Jupyter Notebooks can take advantage of this extension to further enhance their experience writing Qiskit code.
+
+1. Install the [Qiskit Code Assistant JupyterLab extension](/guides/qiskit-code-assistant-jupyterlab).
+1. In JupyterLab, go to the **Settings Editor** and set the **Qiskit Code Assistant Service API** to the URL of your local Ollama deployment (for example, http://localhost:11434).
+
+The `granite-8b-qiskit` configured in Ollama should appear in the status bar and is then ready to use.
diff --git a/qiskit_bot.yaml b/qiskit_bot.yaml
index 60b3901be2f..ab8482a05de 100644
--- a/qiskit_bot.yaml
+++ b/qiskit_bot.yaml
@@ -163,6 +163,13 @@ notifications:
     - "cbjuan"
     - "@abbycross"
     - "@beckykd"
+  "docs/guides/qiskit-code-assistant-local":
+    - "@cbjuan"
+    - "@vabarbosa"
+    - "@lucaburatti7"
+    - "@adarsh-tiwari17"
+    - "@abbycross"
+    - "@beckykd"
   "docs/guides/pulse":
     - "`@nkanazawa1989`"
     - "@abbycross"
diff --git a/scripts/js/commands/checkPatternsIndex.ts b/scripts/js/commands/checkPatternsIndex.ts
index 8fac25215f2..9800ddfd78d 100644
--- a/scripts/js/commands/checkPatternsIndex.ts
+++ b/scripts/js/commands/checkPatternsIndex.ts
@@ -20,6 +20,7 @@ const ALLOWLIST_MISSING_FROM_INDEX: Set<string> = new Set([
   "/guides/qiskit-code-assistant",
   "/guides/qiskit-code-assistant-jupyterlab",
   "/guides/qiskit-code-assistant-vscode",
+  "/guides/qiskit-code-assistant-local",
   "/guides/addons",
 ]);