-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_ollama_rag.py
81 lines (66 loc) · 3.55 KB
/
test_ollama_rag.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Importing the libraries
import pandas as pd
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
# Importing the xlsx file
path = "/home/haruki/Desktop/VS_Code/ollama/AmazonProducts.xlsx"
dframe = pd.read_excel(path)
# Preparing the data
dframe = dframe[dframe['About This Item'].apply(lambda x: len(x) > 2)]
dframe = dframe.drop_duplicates()
# Getting the related content
data = dframe[["Title", "About This Item"]]
data.head()
# Storing the relevant data into text file
data.to_csv("amazon.txt", sep = "\n", index = False)
# Importing the stroed data as document
path = "/home/haruki/Desktop/VS_Code/ollama/data/amazon.txt"
loader = TextLoader(path)
docs = loader.load()
# divinding the given data into chunks to store into vector database(CHROMADB)
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1700, chunk_overlap = 100)
chunks = text_splitter.split_documents(docs)
# Checking the chunk
print(chunks[0])
# Index
model = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name = model)
# Storing the chunks in vector store
vectorstore = Chroma.from_documents( documents = chunks, collection_name = "rag_chroma", embedding = embeddings, )
retriever = vectorstore.as_retriever()
# checking the vector store similarity search
question = """ Eyeshadow stick which has the features listed below: stick set 8 color,
Multi-Purpose Makeup Eyeshadow Set: bold, creamy, crease resistant color with ease, durable and
waterproof without wrinkles, smooth as silk without lumps, does not fade and has uniform color, can be used as a
contouring or Highlight, has 8 colors , Easy to Use, No brushes required, just glide, highly pigmented,
retractable eyeshadow stick for last part, multi -combination eyeshadow, Classy & Buildable Base Color, its soft and
creamy texture keep it subtle for a natural look! """
docs = vectorstore.similarity_search(question)
print(len(docs))
# Prompt
template = """ You are an intelligent assistant helping users with their questions as a shopping assistant.
Use ONLY the following pieces of context to answer the question. Think step-by-step following the given points below and then answer.
Do not try to make up an answer:
- Check the numbers given in the question to answer the names of the product matching the criteria, and give names accordingly
- If the context is empty, response with “I do not know the answer to the question.”
- If the answer to the question cannot be determined from the context alone, response with “I cannot determine the answer to the question.”
- If the answer to the question can be determined from the context, response ONLY with "name and color" where <name> is the product
matching the criteria and the color it's available in.
Question: {question}
=====================================================================
Context: {context}
=====================================================================
"""
prompt = ChatPromptTemplate.from_template(template)
# Local LLM
model_local = ChatOllama(model = "gemma")
# Chain
chain = ( {"context": retriever, "question": RunnablePassthrough()} | prompt | model_local | StrOutputParser() )
answer = chain.invoke(input(""))
print(answer)