DemocracyEarth · josebenitezg · Apr 4, 2024 · Apr 4, 2024 · Apr 4, 2024 · Apr 4, 2024
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,33 @@
+{
+  "name": "Python 3",
+  // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+  "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
+  "customizations": {
+    "codespaces": {
+      "openFiles": [
+        "README.md",
+        "app/app.py"
+      ]
+    },
+    "vscode": {
+      "settings": {},
+      "extensions": [
+        "ms-python.python",
+        "ms-python.vscode-pylance"
+      ]
+    }
+  },
+  "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
+  "postAttachCommand": {
+    "server": "streamlit run app/app.py --server.enableCORS false --server.enableXsrfProtection false"
+  },
+  "portsAttributes": {
+    "8501": {
+      "label": "Application",
+      "onAutoForward": "openPreview"
+    }
+  },
+  "forwardPorts": [
+    8501
+  ]
+}
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/app/app.py b/app/app.py
@@ -0,0 +1,74 @@
+import streamlit as st
+
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_openai import ChatOpenAI
+
+from rag import chunk_processing, embeddings
+
+pdf_path = './app/static/constitution.pdf'
+
+pdf = open(pdf_path, 'rb')
+
+processed_chunks = chunk_processing(pdf)
+
+embedded_chunks = embeddings(processed_chunks)
+
+def get_response(user_query):
+
+    retriever = embedded_chunks.as_retriever()
+
+    template = """
+    You are a helpful and funny assistant. Respond happy using emojis to the prompt based on the following context: {context}
+
+    User question: {user_question}
+    """
+
+    prompt = ChatPromptTemplate.from_template(template)
+
+    # Initialize ChatOpenAI model
+    model = ChatOpenAI(model_name='gpt-4-0125-preview')
+
+    # Define processing chain
+    chain = (
+        {"context": retriever, "user_question": RunnablePassthrough()}
+        | prompt
+        | model
+        | StrOutputParser()
+    )
+
+    return chain.stream(user_query)
+
+
+st.title("NationGPT 🌞")
+
+# session state
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = [
+        AIMessage(content="Hello, I am NationGPT. How can I help you today?"),
+    ]
+
+# conversation
+for message in st.session_state.chat_history:
+    if isinstance(message, AIMessage):
+        with st.chat_message("Nation Agent"):
+            st.write(message.content)
+    elif isinstance(message, HumanMessage):
+        with st.chat_message("Human"):
+            st.write(message.content)
+
+# user input
+user_query = st.chat_input("Ask me anything...")
+if user_query is not None and user_query != "":
+    st.session_state.chat_history.append(HumanMessage(content=user_query))
+
+    with st.chat_message("Human"):
+        st.markdown(user_query)
+
+    with st.chat_message("AI"):
+        response = st.write_stream(get_response(user_query))
+
+    st.session_state.chat_history.append(AIMessage(content=response))
+
diff --git a/app/rag.py b/app/rag.py
@@ -0,0 +1,70 @@
+import os
+import PyPDF2
+from PyPDF2 import PdfReader
+
+# Importing necessary modules from langchain and langchain_openai
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.callbacks import get_openai_callback
+from langchain_community.vectorstores import FAISS
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
+
+OPENAI_KEY = os.getenv("OPENAI_API_KEY")
+
+def chunk_processing(pdf):
+    """
+    Process a PDF file, extracting text and splitting it into chunks.
+    """
+    pdf_reader = PdfReader(pdf)
+
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+
+    # Split text into chunks
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=200,
+        length_function=len
+    )
+    chunks = text_splitter.split_text(text=text)
+    return chunks
+
+def embeddings(chunks):
+    """
+    Create embeddings for text chunks using OpenAI.
+    """
+    # Initialize OpenAI embeddings
+    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_KEY)
+    # Create vector store using FAISS
+    vector_store = FAISS.from_texts(chunks, embedding=embeddings)
+    return vector_store
+
+def generation(VectorStore):
+
+    """
+    Generate responses based on prompts and embeddings.
+    """
+    retriever = VectorStore.as_retriever()
+
+    # Define template for prompts
+    template = """Respond to the prompt based on the following context: {context}
+    Questions: {questions}
+    """
+    prompt = ChatPromptTemplate.from_template(template)
+
+    # Initialize ChatOpenAI model
+    model = ChatOpenAI(model_name='gpt-4-0125-preview', openai_api_key=OPENAI_KEY)
+
+    # Define processing chain
+    chain = (
+        {"context": retriever, "questions": RunnablePassthrough()}
+        | prompt
+        | model
+        | StrOutputParser()
+    )
+
+    return chain
+
diff --git a/app/requirements.txt b/app/requirements.txt
@@ -0,0 +1,5 @@
+pypdf2
+faiss-cpu
+langchain-community
+langchain-openai
+langchain
diff --git a/app/static/constitution.pdf b/app/static/constitution.pdf