ak0601 commited on
Commit
ececcd6
·
verified ·
1 Parent(s): a5f6193

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -168
app.py CHANGED
@@ -1,168 +1,171 @@
1
- import os
2
- import streamlit as st
3
- import google.generativeai as genai
4
- # from langchain_openai import OpenAI /
5
- from langchain_openai import OpenAIEmbeddings
6
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
7
- from langchain_google_genai import ChatGoogleGenerativeAI
8
- # from langchain_openai import OpenAIEmbeddings
9
- from langchain_community.document_loaders import Docx2txtLoader
10
- from langchain.text_splitter import RecursiveCharacterTextSplitter
11
- from langchain_community.vectorstores import Chroma
12
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
13
- from langchain_core.messages import HumanMessage, SystemMessage
14
- from langchain.chains import create_history_aware_retriever, create_retrieval_chain
15
- from langchain.chains.combine_documents import create_stuff_documents_chain
16
- from dotenv import load_dotenv
17
- from langchain.embeddings import HuggingFaceEmbeddings
18
- import pysqlite3
19
- import sys
20
- sys.modules['sqlite3'] = pysqlite3
21
-
22
- import os
23
- os.environ["TRANSFORMERS_OFFLINE"] = "1"
24
-
25
- # Retrieve OpenAI API key from the .env file
26
- GOOGLE_API_KEY = "AIzaSyC1-QUzA45IlCosX__sKlzNAgVZGEaHc0c"
27
- # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
28
- # OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
29
-
30
- if not GOOGLE_API_KEY:
31
- raise ValueError("Gemini API key not found. Please set it in the .env file.")
32
-
33
- # Set OpenAI API key
34
- os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
35
- # os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
36
- # Streamlit app configuration
37
- st.set_page_config(page_title="College Data Chatbot", layout="centered")
38
- st.title("PreCollege Chatbot GEMINI+ HuggingFace Embeddings")
39
-
40
- # Initialize OpenAI LLM
41
- llm = ChatGoogleGenerativeAI(
42
- model="gemini-1.5-pro-latest",
43
- temperature=0.2, # Slightly higher for varied responses
44
- max_tokens=None,
45
- timeout=None,
46
- max_retries=2,
47
- )
48
-
49
- # Initialize embeddings using OpenAI
50
- embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
51
-
52
- def load_preprocessed_vectorstore():
53
- try:
54
- loader = Docx2txtLoader("./Updated_structred_aman.docx")
55
- documents = loader.load()
56
-
57
- text_splitter = RecursiveCharacterTextSplitter(
58
- separators=["\n\n", "\n", ". ", " ", ""],
59
- chunk_size=3000,
60
- chunk_overlap=1000)
61
-
62
- document_chunks = text_splitter.split_documents(documents)
63
-
64
- vector_store = Chroma.from_documents(
65
-
66
- embedding=embeddings,
67
- documents=document_chunks,
68
- persist_directory="./data32"
69
- )
70
- return vector_store
71
- except Exception as e:
72
- st.error(f"Error creating vector store: {e}")
73
- return None
74
-
75
- def get_context_retriever_chain(vector_store):
76
- """Creates a history-aware retriever chain."""
77
- retriever = vector_store.as_retriever()
78
-
79
- # Define the prompt for the retriever chain
80
- prompt = ChatPromptTemplate.from_messages([
81
- MessagesPlaceholder(variable_name="chat_history"),
82
- ("human", "{input}"),
83
- ("system", """Given the chat history and the latest user question, which might reference context in the chat history,
84
- formulate a standalone question that can be understood without the chat history.
85
- If the question is directly addressed within the provided document, provide a relevant answer.
86
- If the question is not explicitly addressed in the document, return the following message:
87
- 'This question is beyond the scope of the available information. Please contact your mentor for further assistance.'
88
- Do NOT answer the question directly, just reformulate it if needed and otherwise return it as is.""")
89
- ])
90
-
91
- retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
92
- return retriever_chain
93
-
94
- def get_conversational_chain(retriever_chain):
95
- """Creates a conversational chain using the retriever chain."""
96
- prompt = ChatPromptTemplate.from_messages([
97
- ("system", """Hello! I'm your PreCollege AI assistant, here to help you with your JEE Mains journey.
98
- Please provide your JEE Mains rank and preferred engineering branches or colleges,
99
- and I'll give you tailored advice based on our verified database.
100
- Note: I will only provide information that is available within our database to ensure accuracy. Let's get started!
101
- """
102
- "\n\n"
103
- "{context}"),
104
- MessagesPlaceholder(variable_name="chat_history"),
105
- ("human", "{input}")
106
- ])
107
-
108
- stuff_documents_chain = create_stuff_documents_chain(llm, prompt)
109
- return create_retrieval_chain(retriever_chain, stuff_documents_chain)
110
-
111
- def get_response(user_query):
112
- retriever_chain = get_context_retriever_chain(st.session_state.vector_store)
113
- conversation_rag_chain = get_conversational_chain(retriever_chain)
114
-
115
- formatted_chat_history = []
116
- for message in st.session_state.chat_history:
117
- if isinstance(message, HumanMessage):
118
- formatted_chat_history.append({"author": "user", "content": message.content})
119
- elif isinstance(message, SystemMessage):
120
- formatted_chat_history.append({"author": "assistant", "content": message.content})
121
-
122
- response = conversation_rag_chain.invoke({
123
- "chat_history": formatted_chat_history,
124
- "input": user_query
125
- })
126
-
127
- return response['answer']
128
-
129
- # Load the preprocessed vector store from the local directory
130
- st.session_state.vector_store = load_preprocessed_vectorstore()
131
-
132
- # Initialize chat history if not present
133
- if "chat_history" not in st.session_state:
134
- st.session_state.chat_history = [
135
- {"author": "assistant", "content": "Hello, I am Precollege. How can I help you?"}
136
- ]
137
-
138
- # Main app logic
139
- if st.session_state.get("vector_store") is None:
140
- st.error("Failed to load preprocessed data. Please ensure the data exists in './data' directory.")
141
- else:
142
- # Display chat history
143
- with st.container():
144
- for message in st.session_state.chat_history:
145
- if message["author"] == "assistant":
146
- with st.chat_message("system"):
147
- st.write(message["content"])
148
- elif message["author"] == "user":
149
- with st.chat_message("human"):
150
- st.write(message["content"])
151
-
152
- # Add user input box below the chat
153
- with st.container():
154
- with st.form(key="chat_form", clear_on_submit=True):
155
- user_query = st.text_input("Type your message here...", key="user_input")
156
- submit_button = st.form_submit_button("Send")
157
-
158
- if submit_button and user_query:
159
- # Get bot response
160
- response = get_response(user_query)
161
- st.session_state.chat_history.append({"author": "user", "content": user_query})
162
- st.session_state.chat_history.append({"author": "assistant", "content": response})
163
-
164
- # Rerun the app to refresh the chat display
165
- st.rerun()
166
-
167
-
168
- """"""
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import google.generativeai as genai
4
+ # from langchain_openai import OpenAI /
5
+ from langchain_openai import OpenAIEmbeddings
6
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
7
+ from langchain_google_genai import ChatGoogleGenerativeAI
8
+ # from langchain_openai import OpenAIEmbeddings
9
+ from langchain_community.document_loaders import Docx2txtLoader
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain_community.vectorstores import Chroma
12
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
13
+ from langchain_core.messages import HumanMessage, SystemMessage
14
+ from langchain.chains import create_history_aware_retriever, create_retrieval_chain
15
+ from langchain.chains.combine_documents import create_stuff_documents_chain
16
+ from dotenv import load_dotenv
17
+ from langchain.embeddings import HuggingFaceEmbeddings
18
+
19
+ from sentence_transformers import SentenceTransformer
20
+
21
+ import pysqlite3
22
+ import sys
23
+ sys.modules['sqlite3'] = pysqlite3
24
+
25
+ import os
26
+ os.environ["TRANSFORMERS_OFFLINE"] = "1"
27
+
28
+ # Retrieve OpenAI API key from the .env file
29
+ GOOGLE_API_KEY = "AIzaSyC1-QUzA45IlCosX__sKlzNAgVZGEaHc0c"
30
+ # GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
31
+ # OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
32
+
33
+ if not GOOGLE_API_KEY:
34
+ raise ValueError("Gemini API key not found. Please set it in the .env file.")
35
+
36
+ # Set OpenAI API key
37
+ os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
38
+ # os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
39
+ # Streamlit app configuration
40
+ st.set_page_config(page_title="College Data Chatbot", layout="centered")
41
+ st.title("PreCollege Chatbot GEMINI+ HuggingFace Embeddings")
42
+
43
+ # Initialize OpenAI LLM
44
+ llm = ChatGoogleGenerativeAI(
45
+ model="gemini-1.5-pro-latest",
46
+ temperature=0.2, # Slightly higher for varied responses
47
+ max_tokens=None,
48
+ timeout=None,
49
+ max_retries=2,
50
+ )
51
+
52
+ # Initialize embeddings using OpenAI
53
+ embeddings = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
54
+
55
+ def load_preprocessed_vectorstore():
56
+ try:
57
+ loader = Docx2txtLoader("./Updated_structred_aman.docx")
58
+ documents = loader.load()
59
+
60
+ text_splitter = RecursiveCharacterTextSplitter(
61
+ separators=["\n\n", "\n", ". ", " ", ""],
62
+ chunk_size=3000,
63
+ chunk_overlap=1000)
64
+
65
+ document_chunks = text_splitter.split_documents(documents)
66
+
67
+ vector_store = Chroma.from_documents(
68
+
69
+ embedding=embeddings,
70
+ documents=document_chunks,
71
+ persist_directory="./data32"
72
+ )
73
+ return vector_store
74
+ except Exception as e:
75
+ st.error(f"Error creating vector store: {e}")
76
+ return None
77
+
78
+ def get_context_retriever_chain(vector_store):
79
+ """Creates a history-aware retriever chain."""
80
+ retriever = vector_store.as_retriever()
81
+
82
+ # Define the prompt for the retriever chain
83
+ prompt = ChatPromptTemplate.from_messages([
84
+ MessagesPlaceholder(variable_name="chat_history"),
85
+ ("human", "{input}"),
86
+ ("system", """Given the chat history and the latest user question, which might reference context in the chat history,
87
+ formulate a standalone question that can be understood without the chat history.
88
+ If the question is directly addressed within the provided document, provide a relevant answer.
89
+ If the question is not explicitly addressed in the document, return the following message:
90
+ 'This question is beyond the scope of the available information. Please contact your mentor for further assistance.'
91
+ Do NOT answer the question directly, just reformulate it if needed and otherwise return it as is.""")
92
+ ])
93
+
94
+ retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
95
+ return retriever_chain
96
+
97
+ def get_conversational_chain(retriever_chain):
98
+ """Creates a conversational chain using the retriever chain."""
99
+ prompt = ChatPromptTemplate.from_messages([
100
+ ("system", """Hello! I'm your PreCollege AI assistant, here to help you with your JEE Mains journey.
101
+ Please provide your JEE Mains rank and preferred engineering branches or colleges,
102
+ and I'll give you tailored advice based on our verified database.
103
+ Note: I will only provide information that is available within our database to ensure accuracy. Let's get started!
104
+ """
105
+ "\n\n"
106
+ "{context}"),
107
+ MessagesPlaceholder(variable_name="chat_history"),
108
+ ("human", "{input}")
109
+ ])
110
+
111
+ stuff_documents_chain = create_stuff_documents_chain(llm, prompt)
112
+ return create_retrieval_chain(retriever_chain, stuff_documents_chain)
113
+
114
+ def get_response(user_query):
115
+ retriever_chain = get_context_retriever_chain(st.session_state.vector_store)
116
+ conversation_rag_chain = get_conversational_chain(retriever_chain)
117
+
118
+ formatted_chat_history = []
119
+ for message in st.session_state.chat_history:
120
+ if isinstance(message, HumanMessage):
121
+ formatted_chat_history.append({"author": "user", "content": message.content})
122
+ elif isinstance(message, SystemMessage):
123
+ formatted_chat_history.append({"author": "assistant", "content": message.content})
124
+
125
+ response = conversation_rag_chain.invoke({
126
+ "chat_history": formatted_chat_history,
127
+ "input": user_query
128
+ })
129
+
130
+ return response['answer']
131
+
132
+ # Load the preprocessed vector store from the local directory
133
+ st.session_state.vector_store = load_preprocessed_vectorstore()
134
+
135
+ # Initialize chat history if not present
136
+ if "chat_history" not in st.session_state:
137
+ st.session_state.chat_history = [
138
+ {"author": "assistant", "content": "Hello, I am Precollege. How can I help you?"}
139
+ ]
140
+
141
+ # Main app logic
142
+ if st.session_state.get("vector_store") is None:
143
+ st.error("Failed to load preprocessed data. Please ensure the data exists in './data' directory.")
144
+ else:
145
+ # Display chat history
146
+ with st.container():
147
+ for message in st.session_state.chat_history:
148
+ if message["author"] == "assistant":
149
+ with st.chat_message("system"):
150
+ st.write(message["content"])
151
+ elif message["author"] == "user":
152
+ with st.chat_message("human"):
153
+ st.write(message["content"])
154
+
155
+ # Add user input box below the chat
156
+ with st.container():
157
+ with st.form(key="chat_form", clear_on_submit=True):
158
+ user_query = st.text_input("Type your message here...", key="user_input")
159
+ submit_button = st.form_submit_button("Send")
160
+
161
+ if submit_button and user_query:
162
+ # Get bot response
163
+ response = get_response(user_query)
164
+ st.session_state.chat_history.append({"author": "user", "content": user_query})
165
+ st.session_state.chat_history.append({"author": "assistant", "content": response})
166
+
167
+ # Rerun the app to refresh the chat display
168
+ st.rerun()
169
+
170
+
171
+ """"""