Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RAG-based app for NationGPT #1

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
"customizations": {
"codespaces": {
"openFiles": [
"README.md",
"app/app.py"
]
},
"vscode": {
"settings": {},
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
},
"updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
"postAttachCommand": {
"server": "streamlit run app/app.py --server.enableCORS false --server.enableXsrfProtection false"
},
"portsAttributes": {
"8501": {
"label": "Application",
"onAutoForward": "openPreview"
}
},
"forwardPorts": [
8501
]
}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__
74 changes: 74 additions & 0 deletions app/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import streamlit as st

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import AIMessage, HumanMessage
from langchain_openai import ChatOpenAI

from rag import chunk_processing, embeddings

pdf_path = './app/static/constitution.pdf'

pdf = open(pdf_path, 'rb')

processed_chunks = chunk_processing(pdf)

embedded_chunks = embeddings(processed_chunks)

def get_response(user_query):

retriever = embedded_chunks.as_retriever()

template = """
You are a helpful and funny assistant. Respond happy using emojis to the prompt based on the following context: {context}

User question: {user_question}
"""

prompt = ChatPromptTemplate.from_template(template)

# Initialize ChatOpenAI model
model = ChatOpenAI(model_name='gpt-4-0125-preview')

# Define processing chain
chain = (
{"context": retriever, "user_question": RunnablePassthrough()}
| prompt
| model
| StrOutputParser()
)

return chain.stream(user_query)


st.title("NationGPT 🌞")

# session state
if "chat_history" not in st.session_state:
st.session_state.chat_history = [
AIMessage(content="Hello, I am NationGPT. How can I help you today?"),
]

# conversation
for message in st.session_state.chat_history:
if isinstance(message, AIMessage):
with st.chat_message("Nation Agent"):
st.write(message.content)
elif isinstance(message, HumanMessage):
with st.chat_message("Human"):
st.write(message.content)

# user input
user_query = st.chat_input("Ask me anything...")
if user_query is not None and user_query != "":
st.session_state.chat_history.append(HumanMessage(content=user_query))

with st.chat_message("Human"):
st.markdown(user_query)

with st.chat_message("AI"):
response = st.write_stream(get_response(user_query))

st.session_state.chat_history.append(AIMessage(content=response))

70 changes: 70 additions & 0 deletions app/rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import os
import PyPDF2
from PyPDF2 import PdfReader

# Importing necessary modules from langchain and langchain_openai
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.callbacks import get_openai_callback
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

OPENAI_KEY = os.getenv("OPENAI_API_KEY")

def chunk_processing(pdf):
"""
Process a PDF file, extracting text and splitting it into chunks.
"""
pdf_reader = PdfReader(pdf)

text = ""
for page in pdf_reader.pages:
text += page.extract_text()

# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text=text)
return chunks

def embeddings(chunks):
"""
Create embeddings for text chunks using OpenAI.
"""
# Initialize OpenAI embeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_KEY)
# Create vector store using FAISS
vector_store = FAISS.from_texts(chunks, embedding=embeddings)
return vector_store

def generation(VectorStore):

"""
Generate responses based on prompts and embeddings.
"""
retriever = VectorStore.as_retriever()

# Define template for prompts
template = """Respond to the prompt based on the following context: {context}
Questions: {questions}
"""
prompt = ChatPromptTemplate.from_template(template)

# Initialize ChatOpenAI model
model = ChatOpenAI(model_name='gpt-4-0125-preview', openai_api_key=OPENAI_KEY)

# Define processing chain
chain = (
{"context": retriever, "questions": RunnablePassthrough()}
| prompt
| model
| StrOutputParser()
)

return chain

5 changes: 5 additions & 0 deletions app/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pypdf2
faiss-cpu
langchain-community
langchain-openai
langchain
Binary file added app/static/constitution.pdf
Binary file not shown.