-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp-demo.py
158 lines (135 loc) · 6.79 KB
/
app-demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import streamlit as st
from snowflake.snowpark.context import get_active_session
from snowflake.snowpark import Session
from streamlit_extras import add_vertical_space as avs
import pandas as pd
pd.set_option("max_colwidth", None)
num_chunks = 3 # Num-chunks provided as context. Adjust as needed for your use case.
def create_session():
return Session.builder.configs(st.secrets["snowflake"]).create()
session = create_session()
st.set_page_config(
page_title="Ex-stream-ly Cool App",
page_icon="🧊",
layout="wide",
)
def load_data(table_name):
table = session.table(table_name)
table = table.limit(20)
table = table.collect()
return table
table_name = "CC_QUICKSTART_CORTEX_DOCS.DATA.comp_data_bkp"
def create_prompt(myquestion, rag, stage, selected_doc):
known_prefixes = ['docs/', 'history_docs/']
for prefix in known_prefixes:
if selected_doc.startswith(prefix):
selected_doc = selected_doc[len(prefix):]
break
stage_table = '@CC_QUICKSTART_CORTEX_DOCS.DATA.history_docs' if stage == 'Marketing Knowledge Base' else '@CC_QUICKSTART_CORTEX_DOCS.DATA.docs'
if rag:
cmd = f"""
WITH results AS (
SELECT RELATIVE_PATH, VECTOR_COSINE_SIMILARITY(docs_chunks_table.chunk_vec,
snowflake.cortex.embed_text_768('e5-base-v2', ?)) AS distance, chunk
FROM CC_QUICKSTART_CORTEX_DOCS.DATA.docs_chunks_table
where RELATIVE_PATH = ?
ORDER BY distance DESC
LIMIT {num_chunks}
)
SELECT chunk, relative_path FROM results
"""
df_context = session.sql(cmd, params=[myquestion, selected_doc]).to_pandas()
context_length = len(df_context) - 1
prompt_context = "".join(df_context.loc[:context_length, 'CHUNK'])
prompt_context = prompt_context.replace("'", "")
relative_path = df_context.loc[0, 'RELATIVE_PATH'] if not df_context.empty else "Not Found"
# Check if there is meaningful context
if prompt_context.strip():
prompt = f"""
'You are an expert assistance extracting information from context provided.
Answer the question based on the context. Be concise and do not hallucinate.
If you don’t have the information just say so.
Context: {prompt_context}
Question:
{myquestion}
Answer: '
"""
cmd2 = f"SELECT GET_PRESIGNED_URL({stage_table}, '{relative_path}', 360) AS URL_LINK FROM directory({stage_table})"
df_url_link = session.sql(cmd2).to_pandas()
url_link = df_url_link.loc[0, 'URL_LINK'] if not df_url_link.empty else "URL not available"
else:
prompt = f"'Question:\n{myquestion}\nAnswer: '"
url_link = "None"
#relative_path = "None"
else:
prompt = f"'Question:\n{myquestion}\nAnswer: '"
url_link = "None"
relative_path = "None"
return prompt, url_link, relative_path
def complete(myquestion, model_name, rag, stage, selected_doc):
prompt, url_link, relative_path = create_prompt(myquestion, rag, stage, selected_doc)
cmd = f"SELECT snowflake.cortex.complete(?,?) AS response"
df_response = session.sql(cmd, params=[model_name, prompt]).collect()
return df_response, url_link, relative_path
st.title("Build a Retrieval Augmented Generation (RAG) based LLM assistant using Snowflake Cortex and Streamlit:")
st.write("You can ask questions and decide if you want to use your documents for context or allow the model to create their own response.")
col11, thumb_col = st.columns([0.45, 1.5])
with col11:
model = st.selectbox('**Choose LLM model**:', (
'mistral-7b', 'mistral-large', 'mixtral-8x7b', 'gemma-7b'))
avs.add_vertical_space(4)
tab1, tab2 = st.tabs(["**Unstructured**", "**Structured**"])
with tab1:
col22, thumb_col = st.columns([0.45, 1.5])
with col22:
stage = st.selectbox('**Business Function(Stage)**', (
'History Knowledge Base',
'Technical Knowledge Base'
))
stage_table = '@CC_QUICKSTART_CORTEX_DOCS.DATA.history_docs' if stage == 'History Knowledge Base' else '@CC_QUICKSTART_CORTEX_DOCS.DATA.docs'
docs_available = session.sql(f"ls {stage_table}").collect()
list_docs = [doc["name"] for doc in docs_available]
col33, thumb_col = st.columns([0.45, 1.5])
with col33:
selected_doc = st.selectbox('**Context(Dataset)**', list_docs)
avs.add_vertical_space(4)
tab1, tab2 = st.tabs(["**Interactive**", "**Comparison**"])
with tab1:
prompt_options = ["", "What is calculated columns", "Is there any special lubricant to be used with the premium bike?", "What is the warranty for the premium bike?", "What is the impact of The East India Company on India in 1900"]
col44, thumb_col = st.columns([1, 1.5])
with col44:
prompt = st.selectbox('**Choose prompt**', prompt_options, index=0, format_func=lambda x: 'Select prompt...' if x == '' else x)
col55, thumb_col = st.columns([1, 1.5])
with col55:
question = st.text_input("**Or Enter Your Own Prompt**")
if st.button(':red[**Submit**]'):
actual_question = prompt if prompt else question
if actual_question:
st.session_state['actual_question'] = actual_question
st.session_state['stage'] = stage
st.session_state['selected_doc'] = selected_doc
st.session_state['submitted'] = True
if 'submitted' in st.session_state:
col1, thumb_col, col2 = st.columns([3.5, 1, 3.5])
with col1:
st.header("Vanilla Response from LLM")
response, _, _ = complete(st.session_state['actual_question'], model, 0, st.session_state['stage'], st.session_state['selected_doc'])
st.markdown(response[0].RESPONSE)
with thumb_col:
st.header("Like")
if st.button('👍'):
if 'clicked' not in st.session_state:
st.session_state['clicked'] = True
st.snow()
with col2:
st.header("RAG powered Response from LLM")
response, url_link, relative_path = complete(st.session_state['actual_question'], model, 1, st.session_state['stage'], st.session_state['selected_doc'])
st.markdown(response[0].RESPONSE)
if not (st.session_state['actual_question'] == "What is the warranty for the premium bike?" and st.session_state['selected_doc'] == "history_docs/A History of India 2nd ed.pdf"):
if url_link != "None":
display_url = f"Link to [{relative_path}]({url_link}) that may be useful"
st.markdown(display_url)
with tab2:
df = load_data(table_name)
st.write("This table shows the responses from different models to the same prompt.")
st.dataframe(df)