masadonline commited on
Commit
ef85737
Β·
verified Β·
1 Parent(s): c300567

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -116
app.py CHANGED
@@ -1,117 +1,95 @@
1
- import streamlit as st
2
  import os
3
- import glob
4
  import time
 
5
  import threading
6
  from datetime import datetime
7
- from twilio.rest import Client
8
-
9
- from dotenv import load_dotenv
10
 
11
- from langchain_community.document_loaders import UnstructuredFileLoader
12
- from langchain_text_splitters import RecursiveCharacterTextSplitter
13
  from langchain.embeddings import HuggingFaceEmbeddings
14
- from langchain_community.vectorstores import FAISS
15
- from langchain_groq import ChatGroq
16
- from langchain.prompts import PromptTemplate
17
- from langchain.schema.runnable import RunnablePassthrough
18
- from langchain.schema.output_parser import StrOutputParser
19
-
20
- # Load env vars if available locally (for local dev)
21
- load_dotenv()
22
 
23
- groq_api_key = os.getenv("GROQ_API_KEY")
24
- twilio_sid = os.getenv("TWILIO_ACCOUNT_SID")
25
- twilio_token = os.getenv("TWILIO_AUTH_TOKEN")
26
-
27
- POLL_INTERVAL_SECONDS = 30
28
  DOCS_DIR = "docs"
29
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 
30
  APP_START_TIME = datetime.utcnow()
31
 
32
- GENERAL_QA_PROMPT = """
33
- You are an AI assistant for our internal knowledge base.
34
- Your goal is to provide accurate and concise answers based ONLY on the provided context.
35
- Do not make up information. If the answer is not found in the context, state that clearly.
36
- Ensure your answers are directly supported by the text.
37
- Accuracy is paramount.
38
-
39
- Context:
40
- {context}
41
-
42
- Question: {question}
43
-
44
- Answer:
45
- """
46
-
47
- def get_loader(file_path):
48
- _, ext = os.path.splitext(file_path)
49
- ext = ext.lower()
50
- if ext in ['.pdf', '.docx', '.doc', '.xlsx', '.xls', '.json', '.txt', '.md', '.html']:
51
- return UnstructuredFileLoader(file_path, mode="elements", strategy="fast")
52
- else:
53
- st.warning(f"Unsupported file type: {ext}. Skipping {os.path.basename(file_path)}")
54
- return None
55
-
56
- @st.cache_resource(show_spinner=False)
57
- def load_and_process_documents(docs_path):
58
- documents = []
59
- doc_files = []
60
- for ext in ["*.pdf", "*.docx", "*.xlsx", "*.json", "*.txt", "*.md"]:
61
- doc_files.extend(glob.glob(os.path.join(docs_path, ext)))
62
-
63
- for file_path in doc_files:
64
- loader = get_loader(file_path)
65
- if loader:
66
- docs = loader.load()
67
- for doc in docs:
68
- doc.metadata["source"] = os.path.basename(file_path)
69
- documents.extend(docs)
70
-
71
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
72
- return splitter.split_documents(documents)
73
-
74
- @st.cache_resource(show_spinner=False)
75
- def create_vector_store(documents, model_name):
76
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
77
- return FAISS.from_documents(documents, embedding=embeddings)
78
-
79
- @st.cache_resource(show_spinner=False)
80
- def get_llm(api_key, model_name="llama3-8b-8192"):
81
- return ChatGroq(temperature=0, groq_api_key=api_key, model_name=model_name)
82
-
83
- def get_rag_chain(llm, retriever, prompt_template):
84
- prompt = PromptTemplate.from_template(prompt_template)
85
- return (
86
- {"context": retriever, "question": RunnablePassthrough()}
87
- | prompt
88
- | llm
89
- | StrOutputParser()
90
- )
91
-
92
- def fetch_latest_incoming_message(client, convo_sid):
93
- messages = client.conversations.v1.conversations(convo_sid).messages.list(order="desc", limit=1)
94
- for msg in messages:
95
- if msg.author != "system" and msg.direction == "inbound":
96
- return {"body": msg.body, "author": msg.author, "timestamp": msg.date_created}
97
  return None
98
 
99
- def retrieve_chunks(question, index, model, chunks, top_k=3):
100
- retriever = index.as_retriever(search_type="similarity", search_kwargs={"k": top_k})
101
- rag_chain = get_rag_chain(model, retriever, GENERAL_QA_PROMPT)
102
- answer = rag_chain.invoke({"question": question})
103
- return [answer]
104
 
105
  def generate_answer_with_groq(question, context):
106
- # This uses the LLM directly with context and question prompt
107
- llm = get_llm(groq_api_key)
108
- prompt = GENERAL_QA_PROMPT.format(context=context, question=question)
109
- return llm.invoke(prompt)
 
 
 
 
 
 
110
 
111
- def send_twilio_message(client, convo_sid, message):
112
- client.conversations.v1.conversations(convo_sid).messages.create(body=message)
 
 
 
113
 
114
- def start_conversation_monitor(client, index, model, chunks):
115
  processed_convos = set()
116
  last_processed_timestamp = {}
117
 
@@ -124,15 +102,19 @@ def start_conversation_monitor(client, index, model, chunks):
124
  if convo_sid not in last_processed_timestamp or msg_time > last_processed_timestamp[convo_sid]:
125
  last_processed_timestamp[convo_sid] = msg_time
126
  question = latest_msg["body"]
127
- context = "\n\n".join(retrieve_chunks(question, index, model, chunks))
 
 
128
  answer = generate_answer_with_groq(question, context)
129
  send_twilio_message(client, convo_sid, answer)
 
130
  time.sleep(3)
131
  except Exception as e:
132
- print(f"Error polling convo {convo_sid}: {e}")
133
  time.sleep(5)
134
 
135
  def poll_new_conversations():
 
136
  while True:
137
  try:
138
  conversations = client.conversations.v1.conversations.list(limit=20)
@@ -141,28 +123,49 @@ def start_conversation_monitor(client, index, model, chunks):
141
  if convo.sid not in processed_convos and convo_full.date_created > APP_START_TIME:
142
  participants = client.conversations.v1.conversations(convo.sid).participants.list()
143
  for p in participants:
144
- address = p.messaging_binding.get("address", "")
145
  if address.startswith("whatsapp:"):
 
146
  processed_convos.add(convo.sid)
147
  threading.Thread(target=poll_conversation, args=(convo.sid,), daemon=True).start()
148
  except Exception as e:
149
- print(f"Error polling conversations: {e}")
150
- time.sleep(POLL_INTERVAL_SECONDS)
151
 
152
  threading.Thread(target=poll_new_conversations, daemon=True).start()
153
 
154
- st.title("πŸ€– WhatsApp Chatbot with RAG")
155
-
156
- if st.button("πŸš€ Start WhatsApp Chatbot"):
157
- if not all([groq_api_key, twilio_sid, twilio_token]):
158
- st.error("Please set GROQ_API_KEY, TWILIO_ACCOUNT_SID, and TWILIO_AUTH_TOKEN environment variables!")
159
- else:
160
- st.success("🟒 Loading Knowledge Base and Initializing LLM...")
161
- client = Client(twilio_sid, twilio_token)
162
- chunks = load_and_process_documents(DOCS_DIR)
163
- index = create_vector_store(chunks, EMBEDDING_MODEL_NAME)
164
- model = get_llm(groq_api_key)
165
-
166
- st.success("βœ… Ready! Monitoring new WhatsApp conversations...")
167
- start_conversation_monitor(client, index, model, chunks)
168
- st.info(f"⏳ Will check for new messages every {POLL_INTERVAL_SECONDS} seconds.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import time
3
+ import json
4
  import threading
5
  from datetime import datetime
 
 
 
6
 
7
+ import streamlit as st
8
+ from twilio.rest import Client
9
  from langchain.embeddings import HuggingFaceEmbeddings
10
+ from langchain.vectorstores import FAISS
11
+ from langchain.document_loaders import PyMuPDFLoader, TextLoader, UnstructuredExcelLoader, UnstructuredWordDocumentLoader
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from langchain.llms import OpenAI
 
 
 
 
14
 
15
+ # Config
 
 
 
 
16
  DOCS_DIR = "docs"
17
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
18
+ MONITOR_INTERVAL_SECONDS = 30
19
  APP_START_TIME = datetime.utcnow()
20
 
21
+ # Helper functions
22
+
23
+ def load_and_process_documents(folder_path):
24
+ loaders = {
25
+ ".pdf": PyMuPDFLoader,
26
+ ".txt": TextLoader,
27
+ ".xlsx": UnstructuredExcelLoader,
28
+ ".docx": UnstructuredWordDocumentLoader
29
+ }
30
+ docs = []
31
+ for filename in os.listdir(folder_path):
32
+ file_path = os.path.join(folder_path, filename)
33
+ ext = os.path.splitext(filename)[-1].lower()
34
+ loader_cls = loaders.get(ext)
35
+ if loader_cls:
36
+ try:
37
+ loader = loader_cls(file_path)
38
+ docs.extend(loader.load())
39
+ except Exception as e:
40
+ print(f"❌ Failed to load {filename}: {e}")
41
+ if not docs:
42
+ st.error("No documents loaded.")
43
+ return []
44
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
45
+ return text_splitter.split_documents(docs)
46
+
47
+ def create_vector_store(docs, model_name):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
49
+ return FAISS.from_documents(docs, embeddings)
50
+
51
+ def get_llm(api_key):
52
+ os.environ["OPENAI_API_KEY"] = api_key
53
+ return OpenAI(model_name="gpt-3.5-turbo", temperature=0)
54
+
55
+ def fetch_latest_incoming_message(client, conversation_sid):
56
+ try:
57
+ messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=5)
58
+ for msg in reversed(messages):
59
+ if msg.direction == "inbound" and msg.author and msg.body:
60
+ return {
61
+ "author": msg.author,
62
+ "body": msg.body.strip(),
63
+ "timestamp": msg.date_created
64
+ }
65
+ except Exception as e:
66
+ print(f"❌ Error fetching messages: {e}")
 
 
67
  return None
68
 
69
+ def retrieve_chunks(query, index, embed_model, text_chunks, k=4):
70
+ query_embedding = embed_model.embed_query(query)
71
+ docs_and_scores = index.similarity_search_by_vector(query_embedding, k=k)
72
+ return [doc.page_content for doc in docs_and_scores]
 
73
 
74
  def generate_answer_with_groq(question, context):
75
+ from groq import Groq
76
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
77
+ chat_completion = client.chat.completions.create(
78
+ model="llama3-8b-8192",
79
+ messages=[
80
+ {"role": "system", "content": "You are a helpful assistant for a toy shop. Respond to customer queries based on provided order and product info."},
81
+ {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}
82
+ ]
83
+ )
84
+ return chat_completion.choices[0].message.content.strip()
85
 
86
+ def send_twilio_message(client, conversation_sid, reply):
87
+ try:
88
+ client.conversations.v1.conversations(conversation_sid).messages.create(body=reply)
89
+ except Exception as e:
90
+ print(f"❌ Failed to send message: {e}")
91
 
92
+ def start_conversation_monitor(client, index, embed_model, text_chunks):
93
  processed_convos = set()
94
  last_processed_timestamp = {}
95
 
 
102
  if convo_sid not in last_processed_timestamp or msg_time > last_processed_timestamp[convo_sid]:
103
  last_processed_timestamp[convo_sid] = msg_time
104
  question = latest_msg["body"]
105
+ sender = latest_msg["author"]
106
+ print(f"\nπŸ“₯ New message from {sender} in {convo_sid}: {question}")
107
+ context = "\n\n".join(retrieve_chunks(question, index, embed_model, text_chunks))
108
  answer = generate_answer_with_groq(question, context)
109
  send_twilio_message(client, convo_sid, answer)
110
+ print(f"πŸ“€ Replied to {sender}: {answer}")
111
  time.sleep(3)
112
  except Exception as e:
113
+ print(f"❌ Error in convo {convo_sid} polling:", e)
114
  time.sleep(5)
115
 
116
  def poll_new_conversations():
117
+ print("➑️ Monitoring for new WhatsApp conversations...")
118
  while True:
119
  try:
120
  conversations = client.conversations.v1.conversations.list(limit=20)
 
123
  if convo.sid not in processed_convos and convo_full.date_created > APP_START_TIME:
124
  participants = client.conversations.v1.conversations(convo.sid).participants.list()
125
  for p in participants:
126
+ address = p.messaging_binding.get("address", "") if p.messaging_binding else ""
127
  if address.startswith("whatsapp:"):
128
+ print(f"πŸ†• New WhatsApp convo found: {convo.sid}")
129
  processed_convos.add(convo.sid)
130
  threading.Thread(target=poll_conversation, args=(convo.sid,), daemon=True).start()
131
  except Exception as e:
132
+ print("❌ Error polling conversations:", e)
133
+ time.sleep(MONITOR_INTERVAL_SECONDS)
134
 
135
  threading.Thread(target=poll_new_conversations, daemon=True).start()
136
 
137
+ # Main Streamlit UI
138
+ def main():
139
+ st.set_page_config(page_title="ToyShop Assistant", layout="wide")
140
+ st.title("🧸 ToyShop Assistant – WhatsApp Chatbot (RAG + Twilio)")
141
+
142
+ if st.button("πŸš€ Start"):
143
+ with st.spinner("Loading and processing documents..."):
144
+ docs = load_and_process_documents(DOCS_DIR)
145
+ if not docs:
146
+ return
147
+
148
+ with st.spinner("Creating vector store..."):
149
+ vector_store = create_vector_store(docs, EMBEDDING_MODEL_NAME)
150
+ if not vector_store:
151
+ return
152
+
153
+ with st.spinner("Initializing LLM..."):
154
+ llm = get_llm(os.getenv("OPENAI_API_KEY"))
155
+ if not llm:
156
+ return
157
+
158
+ account_sid = os.getenv("TWILIO_ACCOUNT_SID")
159
+ auth_token = os.getenv("TWILIO_AUTH_TOKEN")
160
+ if not account_sid or not auth_token:
161
+ st.error("Twilio credentials not found in environment variables.")
162
+ return
163
+
164
+ client = Client(account_sid, auth_token)
165
+
166
+ st.success("βœ… Setup complete. Monitoring WhatsApp conversations...")
167
+ start_conversation_monitor(client, vector_store, HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME), docs)
168
+ st.info(f"πŸ“‘ Watching for messages every {MONITOR_INTERVAL_SECONDS} seconds...")
169
+
170
+ if __name__ == "__main__":
171
+ main()