From 4184417405811170b9e695ec531a14aa4fe46a50 Mon Sep 17 00:00:00 2001
From: Barun Saha <barun.saha04@gmail.com>
Date: Sun, 8 Dec 2024 11:33:57 +0530
Subject: [PATCH 1/4] Add support for offline LLMs via Ollama

---
 app.py                | 85 ++++++++++++++++++++++++++++++-------------
 global_config.py      | 24 +++++++++++-
 helpers/llm_helper.py | 39 +++++++++++++++-----
 requirements.txt      |  7 +++-
 4 files changed, 117 insertions(+), 38 deletions(-)

diff --git a/app.py b/app.py
index a013b8a..fd29b1a 100644
--- a/app.py
+++ b/app.py
@@ -3,23 +3,34 @@
 """
 import datetime
 import logging
+import os
 import pathlib
 import random
 import tempfile
 from typing import List, Union
 
+import httpx
 import huggingface_hub
 import json5
+import ollama
 import requests
 import streamlit as st
+from dotenv import load_dotenv
 from langchain_community.chat_message_histories import StreamlitChatMessageHistory
 from langchain_core.messages import HumanMessage
 from langchain_core.prompts import ChatPromptTemplate
 
+import global_config as gcfg
 from global_config import GlobalConfig
 from helpers import llm_helper, pptx_helper, text_helper
 
 
+load_dotenv()
+
+
+RUN_IN_OFFLINE_MODE = os.getenv('RUN_IN_OFFLINE_MODE', 'False').lower() == 'true'
+
+
 @st.cache_data
 def _load_strings() -> dict:
     """
@@ -135,25 +146,36 @@ def reset_api_key():
         horizontal=True
     )
 
-    # The LLMs
-    llm_provider_to_use = st.sidebar.selectbox(
-        label='2: Select an LLM to use:',
-        options=[f'{k} ({v["description"]})' for k, v in GlobalConfig.VALID_MODELS.items()],
-        index=GlobalConfig.DEFAULT_MODEL_INDEX,
-        help=GlobalConfig.LLM_PROVIDER_HELP,
-        on_change=reset_api_key
-    ).split(' ')[0]
-
-    # The API key/access token
-    api_key_token = st.text_input(
-        label=(
-            '3: Paste your API key/access token:\n\n'
-            '*Mandatory* for Cohere and Gemini LLMs.'
-            ' *Optional* for HF Mistral LLMs but still encouraged.\n\n'
-        ),
-        type='password',
-        key='api_key_input'
-    )
+    if RUN_IN_OFFLINE_MODE:
+        llm_provider_to_use = st.text_input(
+            label='2: Enter Ollama model name to use:',
+            help=(
+                'Specify a correct, locally available LLM, found by running `ollama list`, for'
+                ' example `mistral:v0.2` and `mistral-nemo:latest`. Having an Ollama-compatible'
+                ' and supported GPU is strongly recommended.'
+            )
+        )
+        api_key_token: str = ''
+    else:
+        # The LLMs
+        llm_provider_to_use = st.sidebar.selectbox(
+            label='2: Select an LLM to use:',
+            options=[f'{k} ({v["description"]})' for k, v in GlobalConfig.VALID_MODELS.items()],
+            index=GlobalConfig.DEFAULT_MODEL_INDEX,
+            help=GlobalConfig.LLM_PROVIDER_HELP,
+            on_change=reset_api_key
+        ).split(' ')[0]
+
+        # The API key/access token
+        api_key_token = st.text_input(
+            label=(
+                '3: Paste your API key/access token:\n\n'
+                '*Mandatory* for Cohere and Gemini LLMs.'
+                ' *Optional* for HF Mistral LLMs but still encouraged.\n\n'
+            ),
+            type='password',
+            key='api_key_input'
+        )
 
 
 def build_ui():
@@ -200,7 +222,11 @@ def set_up_chat_ui():
         placeholder=APP_TEXT['chat_placeholder'],
         max_chars=GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH
     ):
-        provider, llm_name = llm_helper.get_provider_model(llm_provider_to_use)
+        provider, llm_name = llm_helper.get_provider_model(
+            llm_provider_to_use,
+            use_ollama=RUN_IN_OFFLINE_MODE
+        )
+        print(f'{llm_provider_to_use=}, {provider=}, {llm_name=}, {api_key_token=}')
 
         if not are_all_inputs_valid(prompt, provider, llm_name, api_key_token):
             return
@@ -233,7 +259,7 @@ def set_up_chat_ui():
             llm = llm_helper.get_langchain_llm(
                 provider=provider,
                 model=llm_name,
-                max_new_tokens=GlobalConfig.VALID_MODELS[llm_provider_to_use]['max_new_tokens'],
+                max_new_tokens=gcfg.get_max_output_tokens(llm_provider_to_use),
                 api_key=api_key_token.strip(),
             )
 
@@ -252,18 +278,17 @@ def set_up_chat_ui():
                 # Update the progress bar with an approx progress percentage
                 progress_bar.progress(
                     min(
-                        len(response) / GlobalConfig.VALID_MODELS[
-                            llm_provider_to_use
-                        ]['max_new_tokens'],
+                        len(response) / gcfg.get_max_output_tokens(llm_provider_to_use),
                         0.95
                     ),
                     text='Streaming content...this might take a while...'
                 )
-        except requests.exceptions.ConnectionError:
+        except (httpx.ConnectError, requests.exceptions.ConnectionError):
             handle_error(
                 'A connection error occurred while streaming content from the LLM endpoint.'
                 ' Unfortunately, the slide deck cannot be generated. Please try again later.'
-                ' Alternatively, try selecting a different LLM from the dropdown list.',
+                ' Alternatively, try selecting a different LLM from the dropdown list. If you are'
+                ' using Ollama, make sure that Ollama is already running on your system.',
                 True
             )
             return
@@ -274,6 +299,14 @@ def set_up_chat_ui():
                 True
             )
             return
+        except ollama.ResponseError:
+            handle_error(
+                f'The model `{llm_name}` is unavailable with Ollama on your system.'
+                f' Make sure that you have provided the correct LLM name or pull it using'
+                f' `ollama pull {llm_name}`. View LLMs available locally by running `ollama list`.',
+                True
+            )
+            return
         except Exception as ex:
             handle_error(
                 f'An unexpected error occurred while generating the content: {ex}'
diff --git a/global_config.py b/global_config.py
index b4d9c71..fbf22c5 100644
--- a/global_config.py
+++ b/global_config.py
@@ -20,7 +20,13 @@ class GlobalConfig:
     PROVIDER_COHERE = 'co'
     PROVIDER_GOOGLE_GEMINI = 'gg'
     PROVIDER_HUGGING_FACE = 'hf'
-    VALID_PROVIDERS = {PROVIDER_COHERE, PROVIDER_GOOGLE_GEMINI, PROVIDER_HUGGING_FACE}
+    PROVIDER_OLLAMA = 'ol'
+    VALID_PROVIDERS = {
+        PROVIDER_COHERE,
+        PROVIDER_GOOGLE_GEMINI,
+        PROVIDER_HUGGING_FACE,
+        PROVIDER_OLLAMA
+    }
     VALID_MODELS = {
         '[co]command-r-08-2024': {
             'description': 'simpler, slower',
@@ -47,7 +53,7 @@ class GlobalConfig:
         'LLM provider codes:\n\n'
         '- **[co]**: Cohere\n'
         '- **[gg]**: Google Gemini API\n'
-        '- **[hf]**: Hugging Face Inference Endpoint\n'
+        '- **[hf]**: Hugging Face Inference API\n'
     )
     DEFAULT_MODEL_INDEX = 2
     LLM_MODEL_TEMPERATURE = 0.2
@@ -125,3 +131,17 @@ class GlobalConfig:
     format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
     datefmt='%Y-%m-%d %H:%M:%S'
 )
+
+
+def get_max_output_tokens(llm_name: str) -> int:
+    """
+    Get the max output tokens value configured for an LLM. Return a default value if not configured.
+
+    :param llm_name: The name of the LLM.
+    :return: Max output tokens or a default count.
+    """
+
+    try:
+        return GlobalConfig.VALID_MODELS[llm_name]['max_new_tokens']
+    except KeyError:
+        return 2048
diff --git a/helpers/llm_helper.py b/helpers/llm_helper.py
index 5d16d23..d7ef896 100644
--- a/helpers/llm_helper.py
+++ b/helpers/llm_helper.py
@@ -17,8 +17,9 @@
 
 
 LLM_PROVIDER_MODEL_REGEX = re.compile(r'\[(.*?)\](.*)')
+OLLAMA_MODEL_REGEX = re.compile(r'[a-zA-Z0-9._:-]+$')
 # 6-64 characters long, only containing alphanumeric characters, hyphens, and underscores
-API_KEY_REGEX = re.compile(r'^[a-zA-Z0-9\-_]{6,64}$')
+API_KEY_REGEX = re.compile(r'^[a-zA-Z0-9_-]{6,64}$')
 HF_API_HEADERS = {'Authorization': f'Bearer {GlobalConfig.HUGGINGFACEHUB_API_TOKEN}'}
 REQUEST_TIMEOUT = 35
 
@@ -39,20 +40,28 @@
 http_session.mount('http://', adapter)
 
 
-def get_provider_model(provider_model: str) -> Tuple[str, str]:
+def get_provider_model(provider_model: str, use_ollama: bool) -> Tuple[str, str]:
     """
     Parse and get LLM provider and model name from strings like `[provider]model/name-version`.
 
     :param provider_model: The provider, model name string from `GlobalConfig`.
-    :return: The provider and the model name.
+    :param use_ollama: Whether Ollama is used (i.e., running in offline mode).
+    :return: The provider and the model name; empty strings in case no matching pattern found.
     """
 
-    match = LLM_PROVIDER_MODEL_REGEX.match(provider_model)
+    provider_model = provider_model.strip()
 
-    if match:
-        inside_brackets = match.group(1)
-        outside_brackets = match.group(2)
-        return inside_brackets, outside_brackets
+    if use_ollama:
+        match = OLLAMA_MODEL_REGEX.match(provider_model)
+        if match:
+            return GlobalConfig.PROVIDER_OLLAMA, match.group(0)
+    else:
+        match = LLM_PROVIDER_MODEL_REGEX.match(provider_model)
+
+        if match:
+            inside_brackets = match.group(1)
+            outside_brackets = match.group(2)
+            return inside_brackets, outside_brackets
 
     return '', ''
 
@@ -152,6 +161,18 @@ def get_langchain_llm(
             streaming=True,
         )
 
+    if provider == GlobalConfig.PROVIDER_OLLAMA:
+        from langchain_ollama.llms import OllamaLLM
+
+        logger.debug('Getting LLM via Ollama: %s', model)
+        return OllamaLLM(
+            model=model,
+            temperature=GlobalConfig.LLM_MODEL_TEMPERATURE,
+            num_predict=max_new_tokens,
+            format='json',
+            streaming=True,
+        )
+
     return None
 
 
@@ -163,4 +184,4 @@ def get_langchain_llm(
     ]
 
     for text in inputs:
-        print(get_provider_model(text))
+        print(get_provider_model(text, use_ollama=False))
diff --git a/requirements.txt b/requirements.txt
index 2e5d55d..1f99de1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,9 +12,10 @@ langchain-core~=0.3.0
 langchain-community==0.3.0
 langchain-google-genai==2.0.6
 langchain-cohere==0.3.3
+langchain-ollama==0.2.1
 streamlit~=1.38.0
 
-python-pptx
+python-pptx~=0.6.21
 # metaphor-python
 json5~=0.9.14
 requests~=2.32.3
@@ -32,3 +33,7 @@ certifi==2024.8.30
 urllib3==2.2.3
 
 anyio==4.4.0
+
+httpx~=0.27.2
+huggingface-hub~=0.24.5
+ollama~=0.4.3
\ No newline at end of file

From 223053f7a732cd14f91af66fb95a1231291ea91b Mon Sep 17 00:00:00 2001
From: Barun Saha <barun.saha04@gmail.com>
Date: Sun, 8 Dec 2024 12:04:45 +0530
Subject: [PATCH 2/4] Add description on how to use the offline mode of
 SlideDeck AI

---
 README.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/README.md b/README.md
index 1ed70cb..4cbcab5 100644
--- a/README.md
+++ b/README.md
@@ -47,6 +47,8 @@ Different LLMs offer different styles of content generation. Use one of the foll
 
 The Mistral models do not mandatorily require an access token. However, you are encouraged to get and use your own Hugging Face access token.
 
+In addition, offline LLMs provided by Ollama can be used. Read below to know more. 
+
 
 # Icons
 
@@ -62,6 +64,28 @@ To run this project by yourself, you need to provide the `HUGGINGFACEHUB_API_TOK
 for example, in a `.env` file. Alternatively, you can provide the access token in the app's user interface itself (UI). For other LLM providers, the API key can only be specified in the UI.  For image search, the `PEXEL_API_KEY` should be made available as an environment variable. 
 Visit the respective websites to obtain the API keys.
 
+## Offline LLMs Using Ollama
+
+SlideDeck AI allows the use of offline LLMs to generate the contents of the slide decks. This is typically suitable for individuals or organizations who would like to use self-hosted LLMs for privacy concerns, for example.
+
+Offline LLMs are made available via Ollama. Therefore, a pre-requisite here is to have [Ollama installed](https://ollama.com/download) on the system and the desired [LLM](https://ollama.com/search) pulled locally.
+
+In addition, the `RUN_IN_OFFLINE_MODE` environment variable needs to be set to `True` to enable the offline mode. This, for example, can be done using a `.env` file or from the terminal. The typical steps to use SlideDeck AI in offline mode (in a `bash` shell) are as follows:
+
+```bash
+ollama list  # View locally available LLMs
+export RUN_IN_OFFLINE_MODE=True  # Enable the offline mode to use Ollama
+git clone https://github.com/barun-saha/slide-deck-ai.git
+cd slide-deck-ai
+streamlit run ./app.py  # Run the application
+```
+
+The `.env` file should be created inside the `slide-deck-ai` directory. 
+
+The UI is similar to the online mode. However, rather than selecting an LLM from a list, one has to write the name of the Ollama model to be used in a textbox. There is no API key asked here.
+
+Finally, the focus is on using offline LLMs, not going completely offline. So, Internet connectivity would still be required to fetch the images from Pexels. 
+
 
 # Live Demo
 

From f4faf43335908dbbfa9314dfd9340d6b17c3bded Mon Sep 17 00:00:00 2001
From: Barun Saha <barun.saha04@gmail.com>
Date: Sun, 8 Dec 2024 12:08:10 +0530
Subject: [PATCH 3/4] Update instructions on the offline mode

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 4cbcab5..4df3b3c 100644
--- a/README.md
+++ b/README.md
@@ -84,6 +84,8 @@ The `.env` file should be created inside the `slide-deck-ai` directory.
 
 The UI is similar to the online mode. However, rather than selecting an LLM from a list, one has to write the name of the Ollama model to be used in a textbox. There is no API key asked here.
 
+The online and offline modes are mutually exclusive. So, setting `RUN_IN_OFFLINE_MODE` to `False` will make SlideDeck AI use the online LLMs (i.e., the "original mode."). By default, `RUN_IN_OFFLINE_MODE` is set to `False`.
+
 Finally, the focus is on using offline LLMs, not going completely offline. So, Internet connectivity would still be required to fetch the images from Pexels. 
 
 

From 50f37bd59aaa48d62c51f3835c6f9860ea0731e3 Mon Sep 17 00:00:00 2001
From: Barun Saha <barun.saha04@gmail.com>
Date: Sun, 8 Dec 2024 12:27:12 +0530
Subject: [PATCH 4/4] Update installation steps

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 4df3b3c..1cff848 100644
--- a/README.md
+++ b/README.md
@@ -77,6 +77,9 @@ ollama list  # View locally available LLMs
 export RUN_IN_OFFLINE_MODE=True  # Enable the offline mode to use Ollama
 git clone https://github.com/barun-saha/slide-deck-ai.git
 cd slide-deck-ai
+python -m venv venv  # Create a virtual environment
+source venv/bin/activate  # On a Linux system
+pip install -r requirements.txt
 streamlit run ./app.py  # Run the application
 ```