masadonline commited on
Commit
6238ddd
·
verified ·
1 Parent(s): e9a47d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +774 -277
app.py CHANGED
@@ -1,295 +1,792 @@
 
1
  import os
2
  import time
3
- import threading
4
- import streamlit as st
5
- from twilio.rest import Client
6
  from sentence_transformers import SentenceTransformer
7
- from transformers import AutoTokenizer
8
  import faiss
9
  import numpy as np
10
- import docx
11
  from groq import Groq
12
- import requests
13
- from io import StringIO
14
- from pdfminer.high_level import extract_text_to_fp
15
- from pdfminer.layout import LAParams
16
- from twilio.base.exceptions import TwilioRestException # Add this at the top
17
- import pdfplumber
18
- import datetime
19
- import csv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- APP_START_TIME = datetime.datetime.now(datetime.timezone.utc)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- os.environ["PYTORCH_JIT"] = "0"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- # --- PDF Extraction ---
26
- def _extract_tables_from_page(page):
27
- """Extracts tables from a single page of a PDF."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- tables = page.extract_tables()
30
- if not tables:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  return []
32
 
33
- formatted_tables = []
34
- for table in tables:
35
- formatted_table = []
36
- for row in table:
37
- if row: # Filter out empty rows
38
- formatted_row = [cell if cell is not None else "" for cell in row] # Replace None with ""
39
- formatted_table.append(formatted_row)
40
- else:
41
- formatted_table.append([""]) # Append an empty row if the row is None
42
- formatted_tables.append(formatted_table)
43
- return formatted_tables
 
44
 
45
- def extract_text_from_pdf(pdf_path):
46
- text_output = StringIO()
47
- all_tables = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  try:
49
- with pdfplumber.open(pdf_path) as pdf:
50
- for page in pdf.pages:
51
- # Extract tables
52
- page_tables = _extract_tables_from_page(page)
53
- if page_tables:
54
- all_tables.extend(page_tables)
55
- # Extract text
56
- text = page.extract_text()
57
- if text:
58
- text_output.write(text + "\n\n")
 
 
59
  except Exception as e:
60
- print(f"Error extracting with pdfplumber: {e}")
61
- # Fallback to pdfminer if pdfplumber fails
62
- with open(pdf_path, 'rb') as file:
63
- extract_text_to_fp(file, text_output, laparams=LAParams(), output_type='text', codec=None)
64
- extracted_text = text_output.getvalue()
65
- return extracted_text, all_tables # Return text and list of tables
66
-
67
- def clean_extracted_text(text):
68
- lines = text.splitlines()
69
- cleaned = []
70
- for line in lines:
71
- line = line.strip()
72
- if line:
73
- line = ' '.join(line.split())
74
- cleaned.append(line)
75
- return '\n'.join(cleaned)
76
-
77
- def _format_tables_internal(tables):
78
- """Formats extracted tables into a string representation."""
79
-
80
- formatted_tables_str = []
81
- for table in tables:
82
- # Use csv writer to handle commas and quotes correctly
83
- with StringIO() as csvfile:
84
- csvwriter = csv.writer(csvfile)
85
- csvwriter.writerows(table)
86
- formatted_tables_str.append(csvfile.getvalue())
87
- return "\n\n".join(formatted_tables_str)
88
-
89
- # --- DOCX Extraction ---
90
- def extract_text_from_docx(docx_path):
91
  try:
92
- doc = docx.Document(docx_path)
93
- return '\n'.join(para.text for para in doc.paragraphs)
94
- except Exception:
95
- return ""
96
-
97
- # --- Chunking ---
98
- def chunk_text(text, tokenizer, chunk_size=128, chunk_overlap=32, max_tokens=512):
99
- tokens = tokenizer.tokenize(text)
100
- chunks = []
101
- start = 0
102
- while start < len(tokens):
103
- end = min(start + chunk_size, len(tokens))
104
- chunk_tokens = tokens[start:end]
105
- chunk_text = tokenizer.convert_tokens_to_string(chunk_tokens)
106
- chunks.append(chunk_text)
107
- if end == len(tokens):
108
- break
109
- start += chunk_size - chunk_overlap
110
- return chunks
111
-
112
- def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
113
- question_embedding = embed_model.encode(question)
114
- D, I = index.search(np.array([question_embedding]), k)
115
- return [text_chunks[i] for i in I[0]]
116
-
117
- # --- Groq Answer Generator ---
118
- def generate_answer_with_groq(question, context):
119
- url = "https://api.groq.com/openai/v1/chat/completions"
120
- api_key = os.environ.get("GROQ_API_KEY")
121
- headers = {
122
- "Authorization": f"Bearer {api_key}",
123
- "Content-Type": "application/json",
124
- }
125
- prompt = (
126
- f"Customer asked: '{question}'\n\n"
127
- f"Here is the relevant product or policy info to help:\n{context}\n\n"
128
- f"Respond in a friendly and helpful tone as a toy shop support agent."
129
- )
130
- payload = {
131
- "model": "llama3-8b-8192",
132
- "messages": [
133
- {
134
- "role": "system",
135
- "content": (
136
- "You are ToyBot, a friendly and helpful WhatsApp assistant for an online toy shop. "
137
- "Your goal is to politely answer customer questions, help them choose the right toys, "
138
- "provide order or delivery information, explain return policies, and guide them through purchases."
139
- )
140
- },
141
- {"role": "user", "content": prompt},
142
- ],
143
- "temperature": 0.5,
144
- "max_tokens": 300,
145
- }
146
- response = requests.post(url, headers=headers, json=payload)
147
- response.raise_for_status()
148
- return response.json()['choices'][0]['message']['content'].strip()
149
-
150
- # --- Twilio Functions ---
151
- def fetch_latest_incoming_message(client, conversation_sid):
152
  try:
153
- messages = client.conversations.v1.conversations(conversation_sid).messages.list()
154
- for msg in reversed(messages):
155
- if msg.author.startswith("whatsapp:"):
156
- return {
157
- "sid": msg.sid,
158
- "body": msg.body,
159
- "author": msg.author,
160
- "timestamp": msg.date_created,
161
- }
162
- except TwilioRestException as e:
163
- if e.status == 404:
164
- print(f"Conversation {conversation_sid} not found, skipping...")
165
- else:
166
- print(f"Twilio error fetching messages for {conversation_sid}:", e)
167
  except Exception as e:
168
- #print(f"Unexpected error in fetch_latest_incoming_message for {conversation_sid}:", e)
169
- pass
170
-
171
- return None
172
-
173
- def send_twilio_message(client, conversation_sid, body):
174
- return client.conversations.v1.conversations(conversation_sid).messages.create(
175
- author="system", body=body
176
- )
177
-
178
- # --- Load Knowledge Base ---
179
- def setup_knowledge_base():
180
- folder_path = "docs"
181
- all_text = ""
182
-
183
- # Process PDFs
184
- for filename in ["FAQ.pdf", "ProductReturnPolicy.pdf"]:
185
- pdf_path = os.path.join(folder_path, filename)
186
- text, tables = extract_text_from_pdf(pdf_path)
187
- all_text += clean_extracted_text(text) + "\n"
188
- all_text += _format_tables_internal(tables) + "\n"
189
-
190
- # Process CSVs
191
- for filename in ["CustomerOrders.csv"]:
192
- csv_path = os.path.join(folder_path, filename)
193
- try:
194
- with open(csv_path, newline='', encoding='utf-8') as csvfile:
195
- reader = csv.DictReader(csvfile)
196
- for row in reader:
197
- line = f"Order ID: {row.get('OrderID')} | Customer Name: {row.get('CustomerName')} | Order Date: {row.get('OrderDate')} | ProductID: {row.get('ProductID')} | Date: {row.get('OrderDate')} | Quantity: {row.get('Quantity')} | UnitPrice(USD): {row.get('UnitPrice(USD)')} | TotalPrice(USD): {row.get('TotalPrice(USD)')} | ShippingAddress: {row.get('ShippingAddress')} | OrderStatus: {row.get('OrderStatus')}"
198
- all_text += line + "\n"
199
- except Exception as e:
200
- print(f" Error reading {filename}: {e}")
201
-
202
- for filename in ["Products.csv"]:
203
- csv_path = os.path.join(folder_path, filename)
204
- try:
205
- with open(csv_path, newline='', encoding='utf-8') as csvfile:
206
- reader = csv.DictReader(csvfile)
207
- for row in reader:
208
- line = f"Product ID: {row.get('ProductID')} | Toy Name: {row.get('ToyName')} | Category: {row.get('Category')} | Price(USD): {row.get('Price(USD)')} | Stock Quantity: {row.get('StockQuantity')} | Description: {row.get('Description')}"
209
- all_text += line + "\n"
210
- except Exception as e:
211
- print(f" Error reading {filename}: {e}")
212
-
213
- # Tokenization & chunking
214
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
215
- chunks = chunk_text(all_text, tokenizer)
216
- model = SentenceTransformer('all-mpnet-base-v2')
217
- embeddings = model.encode(chunks, show_progress_bar=False, truncation=True, max_length=512)
218
- dim = embeddings[0].shape[0]
219
- index = faiss.IndexFlatL2(dim)
220
- index.add(np.array(embeddings).astype('float32'))
221
- return index, model, chunks
222
-
223
-
224
-
225
- # --- Monitor Conversations ---
226
- def start_conversation_monitor(client, index, embed_model, text_chunks):
227
- processed_convos = set()
228
- last_processed_timestamp = {}
229
-
230
- def poll_conversation(convo_sid):
231
- while True:
232
- try:
233
- latest_msg = fetch_latest_incoming_message(client, convo_sid)
234
- if latest_msg:
235
- msg_time = latest_msg["timestamp"]
236
- if convo_sid not in last_processed_timestamp or msg_time > last_processed_timestamp[convo_sid]:
237
- last_processed_timestamp[convo_sid] = msg_time
238
- question = latest_msg["body"]
239
- sender = latest_msg["author"]
240
- print(f"\n📥 New message from {sender} in {convo_sid}: {question}")
241
- context = "\n\n".join(retrieve_chunks(question, index, embed_model, text_chunks))
242
- answer = generate_answer_with_groq(question, context)
243
- send_twilio_message(client, convo_sid, answer)
244
- print(f"📤 Replied to {sender}: {answer}")
245
- time.sleep(3)
246
- except Exception as e:
247
- print(f" Error in convo {convo_sid} polling:", e)
248
- time.sleep(5)
249
-
250
- def poll_new_conversations():
251
- print("➡️ Monitoring for new WhatsApp conversations...")
252
- while True:
253
- try:
254
- conversations = client.conversations.v1.conversations.list(limit=20)
255
- for convo in conversations:
256
- convo_full = client.conversations.v1.conversations(convo.sid).fetch()
257
- if convo.sid not in processed_convos and convo_full.date_created > APP_START_TIME:
258
- participants = client.conversations.v1.conversations(convo.sid).participants.list()
259
- for p in participants:
260
- address = p.messaging_binding.get("address", "") if p.messaging_binding else ""
261
- if address.startswith("whatsapp:"):
262
- print(f"🆕 New WhatsApp convo found: {convo.sid}")
263
- processed_convos.add(convo.sid)
264
- threading.Thread(target=poll_conversation, args=(convo.sid,), daemon=True).start()
265
- except Exception as e:
266
- print(" Error polling conversations:", e)
267
- time.sleep(5)
268
-
269
- # Launch conversation polling monitor
270
- threading.Thread(target=poll_new_conversations, daemon=True).start()
271
-
272
-
273
-
274
- # --- Streamlit UI ---
275
- st.set_page_config(page_title="Quasa – AI Powered WhatsApp Chatbot", layout="wide")
276
- st.title("📱 Quasa – AI Powered WhatsApp Chatbot")
277
-
278
- account_sid = st.secrets.get("TWILIO_SID")
279
- auth_token = st.secrets.get("TWILIO_TOKEN")
280
- GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
281
-
282
- if not all([account_sid, auth_token, GROQ_API_KEY]):
283
- st.warning("⚠️ Provide all credentials below:")
284
- account_sid = st.text_input("Twilio SID", value=account_sid or "")
285
- auth_token = st.text_input("Twilio Token", type="password", value=auth_token or "")
286
- GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
287
-
288
- if all([account_sid, auth_token, GROQ_API_KEY]):
289
- os.environ["GROQ_API_KEY"] = GROQ_API_KEY
290
- index, model, chunks = setup_knowledge_base()
291
- st.success("Knowledge base loaded.")
292
- st.success("🟢 Monitoring new WhatsApp conversations...")
293
- client = Client(account_sid, auth_token)
294
- threading.Thread(target=start_conversation_monitor, args=(client, index, model, chunks), daemon=True).start()
295
- st.info("⏳ Waiting for new messages...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import os
3
  import time
4
+ from datetime import datetime, timezone
5
+ import json
6
+ import PyPDF2
7
  from sentence_transformers import SentenceTransformer
 
8
  import faiss
9
  import numpy as np
10
+ from twilio.rest import Client
11
  from groq import Groq
12
+ import re
13
+
14
+ # --- Page Configuration ---
15
+ st.set_page_config(page_title="RAG Customer Support Chatbot", layout="wide")
16
+
17
+ # --- Default Configurations & File Paths ---
18
+ DEFAULT_TWILIO_ACCOUNT_SID_FALLBACK = ""
19
+ DEFAULT_TWILIO_AUTH_TOKEN_FALLBACK = ""
20
+ DEFAULT_GROQ_API_KEY_FALLBACK = ""
21
+
22
+ #DEFAULT_TWILIO_CONVERSATION_SERVICE_SID = ""
23
+ DEFAULT_TWILIO_BOT_WHATSAPP_IDENTITY = st.secrets.get("TWILIO_PHONE_NUMBER", "whatsapp:+14155238886")
24
+ DEFAULT_EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
25
+ DEFAULT_POLLING_INTERVAL_S = 30
26
+ DOCS_FOLDER = "docs/"
27
+ CUSTOMER_ORDERS_FILE = os.path.join(DOCS_FOLDER, "CustomerOrders.json")
28
+ PRODUCTS_FILE = os.path.join(DOCS_FOLDER, "Products.json")
29
+ POLICY_PDF_FILE = os.path.join(DOCS_FOLDER, "ProductReturnPolicy.pdf")
30
+ FAQ_PDF_FILE = os.path.join(DOCS_FOLDER, "FAQ.pdf")
31
+
32
+ # --- Application Secrets Configuration ---
33
+ APP_TWILIO_ACCOUNT_SID = st.secrets.get("TWILIO_ACCOUNT_SID")
34
+ APP_TWILIO_AUTH_TOKEN = st.secrets.get("TWILIO_AUTH_TOKEN")
35
+ APP_GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
36
+
37
+ #APP_TWILIO_CONVERSATION_SERVICE_SID_SECRET = st.secrets.get("TWILIO_CONVERSATION_SERVICE_SID")
38
+ APP_TWILIO_BOT_WHATSAPP_IDENTITY_SECRET = st.secrets.get("TWILIO_BOT_WHATSAPP_IDENTITY")
39
+
40
+ # --- RAG Processing Utilities ---
41
+ def load_json_data(file_path):
42
+ try:
43
+ with open(file_path, 'r', encoding='utf-8') as f:
44
+ data = json.load(f)
45
+ return data
46
+ except FileNotFoundError:
47
+ st.error(f"Error: JSON file not found at {file_path}")
48
+ return None
49
+ except json.JSONDecodeError:
50
+ st.error(f"Error: Could not decode JSON from {file_path}")
51
+ return None
52
+ except Exception as e:
53
+ st.error(f"An unexpected error occurred while loading {file_path}: {e}")
54
+ return None
55
 
56
+ def load_pdf_data(file_path):
57
+ try:
58
+ with open(file_path, 'rb') as f:
59
+ reader = PyPDF2.PdfReader(f)
60
+ text_pages = []
61
+ for page_num in range(len(reader.pages)):
62
+ page = reader.pages[page_num]
63
+ text_pages.append(page.extract_text() or "")
64
+ return text_pages
65
+ except FileNotFoundError:
66
+ st.error(f"Error: PDF file not found at {file_path}")
67
+ return []
68
+ except Exception as e:
69
+ st.error(f"An error occurred while processing PDF {file_path}: {e}")
70
+ return []
71
 
72
+ def chunk_text(text_pages, chunk_size=1000, chunk_overlap=200):
73
+ full_text = "\n".join(text_pages)
74
+ if not full_text.strip():
75
+ return []
76
+ chunks = []
77
+ start = 0
78
+ while start < len(full_text):
79
+ end = start + chunk_size
80
+ chunks.append(full_text[start:end])
81
+ if end >= len(full_text):
82
+ break
83
+ start += (chunk_size - chunk_overlap)
84
+ if start >= len(full_text):
85
+ break
86
+ return [chunk for chunk in chunks if chunk.strip()]
87
 
88
+ @st.cache_resource(show_spinner="Initializing embedding model...")
89
+ def initialize_embedding_model(model_name=DEFAULT_EMBEDDING_MODEL_NAME):
90
+ try:
91
+ model = SentenceTransformer(model_name)
92
+ return model
93
+ except Exception as e:
94
+ st.error(f"Error initializing embedding model '{model_name}': {e}")
95
+ return None
96
+
97
+ @st.cache_resource(show_spinner="Building FAISS index for PDF documents...")
98
+ def create_faiss_index(_text_chunks, _embedding_model):
99
+ if not _text_chunks or _embedding_model is None:
100
+ st.warning("Cannot create FAISS index: No text chunks or embedding model available.")
101
+ return None, []
102
+ try:
103
+ valid_chunks = [str(chunk) for chunk in _text_chunks if chunk and isinstance(chunk, str) and chunk.strip()]
104
+ if not valid_chunks:
105
+ st.warning("No valid text chunks to embed for FAISS index.")
106
+ return None, []
107
+ embeddings = _embedding_model.encode(valid_chunks, convert_to_tensor=False)
108
+ if embeddings.ndim == 1:
109
+ embeddings = embeddings.reshape(1, -1)
110
+ if embeddings.shape[0] == 0:
111
+ st.warning("No embeddings were generated for FAISS index.")
112
+ return None, []
113
+ dimension = embeddings.shape[1]
114
+ index = faiss.IndexFlatL2(dimension)
115
+ index.add(np.array(embeddings, dtype=np.float32))
116
+ return index, valid_chunks
117
+ except Exception as e:
118
+ st.error(f"Error creating FAISS index: {e}")
119
+ return None, []
120
 
121
+ def search_faiss_index(index, query_text, embedding_model, indexed_chunks, k=3):
122
+ if index is None or embedding_model is None or not query_text:
123
+ return []
124
+ try:
125
+ query_embedding = embedding_model.encode([query_text], convert_to_tensor=False)
126
+ if query_embedding.ndim == 1:
127
+ query_embedding = query_embedding.reshape(1, -1)
128
+ distances, indices = index.search(np.array(query_embedding, dtype=np.float32), k)
129
+ results = []
130
+ for i in range(len(indices[0])):
131
+ idx = indices[0][i]
132
+ if 0 <= idx < len(indexed_chunks):
133
+ results.append(indexed_chunks[idx])
134
+ return results
135
+ except Exception as e:
136
+ st.error(f"Error searching FAISS index: {e}")
137
  return []
138
 
139
+ def get_order_details(order_id, customer_orders_data):
140
+ if not customer_orders_data:
141
+ return "Customer order data is not loaded."
142
+ for order in customer_orders_data:
143
+ if order.get("order_id") == order_id:
144
+ return json.dumps(order, indent=2)
145
+ return f"No order found with ID: {order_id}."
146
+
147
+ def get_product_info(query, products_data):
148
+ if not products_data:
149
+ st.warning("Product data is not loaded or is empty in get_product_info.")
150
+ return "Product data is not loaded."
151
 
152
+ query_lower = query.lower()
153
+ found_products = []
154
+
155
+ for product in products_data:
156
+ if not isinstance(product, dict):
157
+ continue
158
+
159
+ product_id_lower = str(product.get("Product_ID", "")).lower()
160
+ product_name_lower = str(product.get("Product_Name", "")).lower()
161
+ product_type_lower = str(product.get("Product_Type", "")).lower()
162
+
163
+ match = False
164
+ if product_id_lower and product_id_lower in query_lower:
165
+ match = True
166
+
167
+ if not match and product_name_lower:
168
+ if query_lower in product_name_lower or product_name_lower in query_lower:
169
+ match = True
170
+
171
+ if not match and product_type_lower:
172
+ if query_lower in product_type_lower or product_type_lower in query_lower:
173
+ match = True
174
+
175
+ if match:
176
+ found_products.append(product)
177
+
178
+ if found_products:
179
+ return json.dumps(found_products, indent=2)
180
+ return f"No product information found matching your query: '{query}'."
181
+
182
+ # --- LLM Operations ---
183
+ @st.cache_data(show_spinner="Generating response with LLaMA3...")
184
+ def generate_response_groq(_groq_client, query, context, model="llama3-8b-8192",
185
+ intent=None, customer_name=None, item_name=None,
186
+ shipping_address=None, delivery_date=None, order_id=None, order_status=None):
187
+ if not _groq_client:
188
+ return "GROQ client not initialized. Please check API key."
189
+ if not query:
190
+ return "Query is empty."
191
+
192
+ system_message = "You are a helpful customer support assistant."
193
+ user_prompt = ""
194
+
195
+ if intent == "ORDER_STATUS" and order_id and customer_name and order_status:
196
+ system_message = (
197
+ f"You are an exceptionally friendly and helpful customer support assistant. "
198
+ f"Your current task is to provide a single, complete, and human-like sentence as a response to {customer_name} "
199
+ f"about their order {order_id}. You MUST incorporate all relevant order details provided into this single sentence."
200
+ )
201
+
202
+ item_description = item_name if item_name else "the ordered item(s)"
203
+
204
+ # Construct the core information string that the LLM needs to build upon
205
+ core_info_parts = [
206
+ f"your order {order_id}",
207
+ f"for {item_description}",
208
+ f"has a status of '{order_status}'"
209
+ ]
210
+
211
+ if order_status.lower() == "delivered":
212
+ if shipping_address:
213
+ core_info_parts.append(f"and was delivered to {shipping_address}")
214
+ else:
215
+ core_info_parts.append("and was delivered (address not specified)")
216
+ if delivery_date:
217
+ core_info_parts.append(f"on {delivery_date}")
218
+ else:
219
+ core_info_parts.append("(delivery date not specified)")
220
+
221
+ core_information_to_include = ", ".join(core_info_parts[:-1]) + (f" {core_info_parts[-1]}" if len(core_info_parts) > 1 else "")
222
+ if not order_status.lower() == "delivered" and len(core_info_parts) > 1 : # for non-delivered, avoid 'and' before status
223
+ core_information_to_include = f"your order {order_id} for {item_description} has a status of '{order_status}'"
224
+
225
+
226
+ user_prompt = (
227
+ f"Customer: {customer_name}\n"
228
+ f"Order ID: {order_id}\n"
229
+ f"Item(s): {item_description}\n"
230
+ f"Status: {order_status}\n"
231
+ )
232
+ if order_status.lower() == "delivered":
233
+ user_prompt += f"Shipping Address: {shipping_address if shipping_address else 'Not specified'}\n"
234
+ user_prompt += f"Delivered On: {delivery_date if delivery_date else 'Not specified'}\n"
235
+
236
+ user_prompt += f"\nOriginal user query for context: '{query}'\n\n"
237
+ user_prompt += (
238
+ f"Your task: Generate a single, complete, and human-like sentence that starts with a greeting to {customer_name}. "
239
+ f"This sentence MUST convey the following essential information: {core_information_to_include}.\n"
240
+ f"For example, if all details are present for a delivered order: 'Hi {customer_name}, {core_information_to_include}.'\n"
241
+ f"For example, for a non-delivered order: 'Hi {customer_name}, {core_information_to_include}.'\n"
242
+ f"IMPORTANT: Do not ask questions. Do not add any extra conversational fluff. Just provide the single, informative sentence as requested. "
243
+ f"Ensure the sentence flows naturally and uses the details you've been given.\n"
244
+ f"Respond now with ONLY that single sentence."
245
+ )
246
+ # For LLM's deeper reference, though the primary instruction is above:
247
+ # user_prompt += f"\n\nFull database context for your reference if needed: {context}"
248
+
249
+ else: # Default prompt structure for other intents or if details are missing
250
+ system_message = "You are a helpful customer support assistant."
251
+ user_prompt = f"""Use the following context to answer the user's question.
252
+ If the context doesn't contain the answer, state that you don't have enough information or ask clarifying questions.
253
+ Do not make up information. Be concise and polite.
254
+
255
+ Context:
256
+ {context}
257
+
258
+ User Question: {query}
259
+
260
+ Assistant Answer:
261
+ """
262
  try:
263
+ chat_completion = _groq_client.chat.completions.create(
264
+ messages=[
265
+ {"role": "system", "content": system_message},
266
+ {"role": "user", "content": user_prompt}
267
+ ],
268
+ model=model,
269
+ temperature=0.5, # Slightly lower temperature might help with stricter adherence
270
+ max_tokens=1024,
271
+ top_p=1
272
+ )
273
+ response = chat_completion.choices[0].message.content.strip() # Added strip()
274
+ return response
275
  except Exception as e:
276
+ st.error(f"Error calling GROQ API: {e}")
277
+ return "Sorry, I encountered an error while trying to generate a response."
278
+
279
+
280
+ def initialize_groq_client(api_key_val):
281
+ if not api_key_val:
282
+ st.warning("GROQ API Key is missing.")
283
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  try:
285
+ client = Groq(api_key=api_key_val)
286
+ return client
287
+ except Exception as e:
288
+ st.error(f"Failed to initialize GROQ client: {e}")
289
+ return None
290
+
291
+ # --- Twilio Operations ---
292
+ def initialize_twilio_client(acc_sid, auth_tkn):
293
+ if not acc_sid or not auth_tkn:
294
+ st.warning("Twilio Account SID or Auth Token is missing.")
295
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  try:
297
+ client = Client(acc_sid, auth_tkn)
298
+ return client
 
 
 
 
 
 
 
 
 
 
 
 
299
  except Exception as e:
300
+ st.error(f"Failed to initialize Twilio client: {e}")
301
+ return None
302
+
303
+ def get_new_whatsapp_messages(twilio_client, bot_start_time_utc, processed_message_sids, bot_whatsapp_identity_val):
304
+ if not twilio_client:
305
+ st.warning("Twilio client not initialized.")
306
+ return []
307
+ if not bot_whatsapp_identity_val:
308
+ st.warning("Twilio Bot WhatsApp Identity not provided.")
309
+ return []
310
+
311
+ new_messages_to_process = []
312
+ try:
313
+ # Get all conversations (not limited to a specific service)
314
+ conversations = twilio_client.conversations.v1.conversations.list(limit=50)
315
+
316
+ for conv in conversations:
317
+ if conv.date_updated and conv.date_updated > bot_start_time_utc:
318
+ messages = twilio_client.conversations.v1 \
319
+ .conversations(conv.sid) \
320
+ .messages \
321
+ .list(order='desc', limit=10)
322
+
323
+ for msg in messages:
324
+ if msg.sid in processed_message_sids:
325
+ continue
326
+
327
+ # Check if message is from WhatsApp and not from the bot
328
+ if msg.author and msg.author.lower() != bot_whatsapp_identity_val.lower() and \
329
+ msg.date_created and msg.date_created > bot_start_time_utc and \
330
+ msg.author.startswith('whatsapp:'):
331
+ new_messages_to_process.append({
332
+ "conversation_sid": conv.sid, "message_sid": msg.sid,
333
+ "author_identity": msg.author, "message_body": msg.body,
334
+ "timestamp_utc": msg.date_created
335
+ })
336
+ break
337
+ except Exception as e:
338
+ st.error(f"Error fetching Twilio messages: {e}")
339
+ return sorted(new_messages_to_process, key=lambda m: m['timestamp_utc'])
340
+
341
+ def send_whatsapp_message(twilio_client, conversation_sid, message_body, bot_identity_val):
342
+ if not twilio_client:
343
+ st.error("Twilio client not initialized for sending message.")
344
+ return False
345
+ if not bot_identity_val:
346
+ st.error("Bot identity not provided for sending message.")
347
+ return False
348
+ try:
349
+ twilio_client.conversations.v1 \
350
+ .conversations(conversation_sid) \
351
+ .messages \
352
+ .create(author=bot_identity_val, body=message_body)
353
+ st.success(f"Sent reply to conversation {conversation_sid}")
354
+ st.write(f"Twilio response to send: {message_body}")
355
+ print(f"[Twilio Send] Sending response: {message_body}")
356
+ return True
357
+ except Exception as e:
358
+ st.error(f"Error sending Twilio message to {conversation_sid}: {e}")
359
+ return False
360
+
361
+ # --- Main Application Logic & UI ---
362
+ st.title("🤖 RAG-Based Customer Support Chatbot")
363
+ st.markdown("Powered by Streamlit, Twilio, GROQ LLaMA3, and FAISS.")
364
+
365
+ # --- Sidebar for Configurations ---
366
+ st.sidebar.title("⚙️ Configurations")
367
+
368
+ if APP_TWILIO_ACCOUNT_SID:
369
+ st.sidebar.text_input("Twilio Account SID (from Secrets)", value="********" + APP_TWILIO_ACCOUNT_SID[-4:] if len(APP_TWILIO_ACCOUNT_SID) > 4 else "********", disabled=True)
370
+ twilio_account_sid_to_use = APP_TWILIO_ACCOUNT_SID
371
+ else:
372
+ st.sidebar.warning("Secret 'TWILIO_ACCOUNT_SID' not found.")
373
+ twilio_account_sid_to_use = st.sidebar.text_input("Twilio Account SID (Enter Manually)", value=DEFAULT_TWILIO_ACCOUNT_SID_FALLBACK, type="password")
374
+
375
+ if APP_TWILIO_AUTH_TOKEN:
376
+ st.sidebar.text_input("Twilio Auth Token (from Secrets)", value="********", disabled=True)
377
+ twilio_auth_token_to_use = APP_TWILIO_AUTH_TOKEN
378
+ else:
379
+ st.sidebar.warning("Secret 'TWILIO_AUTH_TOKEN' not found.")
380
+ twilio_auth_token_to_use = st.sidebar.text_input("Twilio Auth Token (Enter Manually)", value=DEFAULT_TWILIO_AUTH_TOKEN_FALLBACK, type="password")
381
+
382
+ if APP_GROQ_API_KEY:
383
+ st.sidebar.text_input("GROQ API Key (from Secrets)", value="gsk_********" + APP_GROQ_API_KEY[-4:] if len(APP_GROQ_API_KEY) > 8 else "********", disabled=True)
384
+ groq_api_key_to_use = APP_GROQ_API_KEY
385
+ else:
386
+ st.sidebar.warning("Secret 'GROQ_API_KEY' not found.")
387
+ groq_api_key_to_use = st.sidebar.text_input("GROQ API Key (Enter Manually)", value=DEFAULT_GROQ_API_KEY_FALLBACK, type="password")
388
+
389
+ # twilio_conversation_service_sid_to_use = st.sidebar.text_input(
390
+ # "Twilio Conversation Service SID (IS...)",
391
+ # value=APP_TWILIO_CONVERSATION_SERVICE_SID_SECRET or DEFAULT_TWILIO_CONVERSATION_SERVICE_SID,
392
+ # type="password",
393
+ # help="The SID of your Twilio Conversations Service. Can be set by 'TWILIO_CONVERSATION_SERVICE_SID' secret."
394
+ # )
395
+ twilio_bot_whatsapp_identity_to_use = st.sidebar.text_input(
396
+ "Twilio Bot WhatsApp Identity",
397
+ value=APP_TWILIO_BOT_WHATSAPP_IDENTITY_SECRET or DEFAULT_TWILIO_BOT_WHATSAPP_IDENTITY,
398
+ help="e.g., 'whatsapp:+1234567890'. Can be set by 'TWILIO_BOT_WHATSAPP_IDENTITY' secret."
399
+ )
400
+ embedding_model_name_to_use = st.sidebar.text_input(
401
+ "Embedding Model Name",
402
+ value=DEFAULT_EMBEDDING_MODEL_NAME
403
+ )
404
+ polling_interval_to_use = st.sidebar.number_input(
405
+ "Twilio Polling Interval (seconds)",
406
+ min_value=10, max_value=300,
407
+ value=DEFAULT_POLLING_INTERVAL_S,
408
+ step=5
409
+ )
410
+
411
+ # --- Initialize Session State ---
412
+ if "app_started" not in st.session_state: st.session_state.app_started = False
413
+ if "bot_started" not in st.session_state: st.session_state.bot_started = False
414
+ if "rag_pipeline_ready" not in st.session_state: st.session_state.rag_pipeline_ready = False
415
+ if "last_twilio_poll_time" not in st.session_state: st.session_state.last_twilio_poll_time = time.time()
416
+ if "bot_start_time_utc" not in st.session_state: st.session_state.bot_start_time_utc = None
417
+ if "processed_message_sids" not in st.session_state: st.session_state.processed_message_sids = set()
418
+ if "manual_chat_history" not in st.session_state: st.session_state.manual_chat_history = []
419
+
420
+ # --- Helper: Simple Intent Classifier ---
421
+ def simple_intent_classifier(query):
422
+ query_lower = query.lower()
423
+ order_keywords = ["order", "status", "track", "delivery"]
424
+ order_id_match = re.search(r'\b(ord\d{3,})\b', query_lower, re.IGNORECASE)
425
+
426
+ if any(k in query_lower for k in order_keywords):
427
+ if order_id_match:
428
+ return "ORDER_STATUS", order_id_match.group(1).upper()
429
+ return "ORDER_STATUS", None
430
+
431
+ product_keywords = ["product", "item", "buy", "price", "feature", "stock"]
432
+ product_id_match = re.search(r'\b(prd\d{3,})\b', query_lower, re.IGNORECASE)
433
+ if any(k in query_lower for k in product_keywords) or product_id_match:
434
+ return "PRODUCT_INFO", None
435
+
436
+ if any(k in query_lower for k in ["return", "policy", "refund", "exchange", "faq", "question", "how to", "support"]):
437
+ return "GENERAL_POLICY_FAQ", None
438
+
439
+ return "UNKNOWN", None
440
+
441
+ # --- Main Application Controls ---
442
+ col1, col2, col3, col4 = st.columns(4)
443
+ with col1:
444
+ if st.button("🚀 Start App", disabled=st.session_state.app_started, use_container_width=True):
445
+ if not groq_api_key_to_use:
446
+ st.error("GROQ API Key is required.")
447
+ else:
448
+ with st.spinner("Initializing RAG pipeline..."):
449
+ st.session_state.embedding_model = initialize_embedding_model(embedding_model_name_to_use)
450
+ st.session_state.customer_orders_data = load_json_data(CUSTOMER_ORDERS_FILE)
451
+ st.session_state.products_data = load_json_data(PRODUCTS_FILE)
452
+ policy_pdf_pages = load_pdf_data(POLICY_PDF_FILE)
453
+ faq_pdf_pages = load_pdf_data(FAQ_PDF_FILE)
454
+ all_pdf_text_pages = policy_pdf_pages + faq_pdf_pages
455
+ st.session_state.pdf_text_chunks_raw = chunk_text(all_pdf_text_pages)
456
+
457
+ if st.session_state.embedding_model and st.session_state.pdf_text_chunks_raw:
458
+ st.session_state.faiss_index_pdfs, st.session_state.indexed_pdf_chunks = \
459
+ create_faiss_index(st.session_state.pdf_text_chunks_raw, st.session_state.embedding_model)
460
+ else:
461
+ st.session_state.faiss_index_pdfs, st.session_state.indexed_pdf_chunks = None, []
462
+ st.warning("FAISS index for PDFs could not be created (model or chunks missing).")
463
+
464
+ st.session_state.groq_client = initialize_groq_client(groq_api_key_to_use)
465
+
466
+ if st.session_state.embedding_model and \
467
+ st.session_state.groq_client and \
468
+ st.session_state.customer_orders_data is not None and \
469
+ st.session_state.products_data is not None and \
470
+ (st.session_state.faiss_index_pdfs is not None or not all_pdf_text_pages):
471
+ st.session_state.rag_pipeline_ready = True
472
+ st.session_state.app_started = True
473
+ st.success("RAG Application Started!")
474
+ st.rerun()
475
+ else:
476
+ error_messages = []
477
+ if not st.session_state.embedding_model: error_messages.append("Embedding model failed to initialize.")
478
+ if not st.session_state.groq_client: error_messages.append("GROQ client failed to initialize.")
479
+ if st.session_state.customer_orders_data is None: error_messages.append(f"CustomerOrders.json ({CUSTOMER_ORDERS_FILE}) failed to load.")
480
+ if st.session_state.products_data is None: error_messages.append(f"Products.json ({PRODUCTS_FILE}) failed to load.")
481
+ if all_pdf_text_pages and st.session_state.faiss_index_pdfs is None: error_messages.append("PDF FAISS index failed to create.")
482
+ st.error("Failed to initialize RAG pipeline. Issues:\n- " + "\n- ".join(error_messages) + "\nCheck configurations and ensure all data files are present in 'docs/'.")
483
+ st.session_state.app_started = False
484
+ with col2:
485
+ if st.button("🛑 Stop App", disabled=not st.session_state.app_started, use_container_width=True):
486
+ keys_to_reset = ["app_started", "bot_started", "rag_pipeline_ready", "embedding_model",
487
+ "customer_orders_data", "products_data", "pdf_text_chunks_raw",
488
+ "faiss_index_pdfs", "indexed_pdf_chunks", "groq_client", "twilio_client",
489
+ "bot_start_time_utc", "processed_message_sids", "manual_chat_history"]
490
+ for key in keys_to_reset:
491
+ if key in st.session_state: del st.session_state[key]
492
+ st.session_state.app_started = False
493
+ st.session_state.bot_started = False
494
+ st.session_state.rag_pipeline_ready = False
495
+ st.session_state.processed_message_sids = set()
496
+ st.session_state.manual_chat_history = []
497
+ st.success("Application Stopped.")
498
+ st.rerun()
499
+ with col3:
500
+ if st.button("💬 Start WhatsApp Bot", disabled=not st.session_state.app_started or st.session_state.bot_started, use_container_width=True):
501
+ if not all([twilio_account_sid_to_use, twilio_auth_token_to_use, twilio_bot_whatsapp_identity_to_use]):
502
+ st.error("Twilio Account SID, Auth Token, Conversation Service SID, and Bot WhatsApp Identity are all required.")
503
+ else:
504
+ st.session_state.twilio_client = initialize_twilio_client(twilio_account_sid_to_use, twilio_auth_token_to_use)
505
+ if st.session_state.twilio_client:
506
+ st.session_state.bot_started = True
507
+ st.session_state.bot_start_time_utc = datetime.now(timezone.utc)
508
+ st.session_state.processed_message_sids = set()
509
+ st.session_state.last_twilio_poll_time = time.time() - polling_interval_to_use - 1
510
+ st.success("WhatsApp Bot Started!")
511
+ st.rerun()
512
+ else:
513
+ st.error("Failed to initialize Twilio client. WhatsApp Bot not started.")
514
+ with col4:
515
+ if st.button("🔕 Stop WhatsApp Bot", disabled=not st.session_state.bot_started, use_container_width=True):
516
+ st.session_state.bot_started = False
517
+ st.info("WhatsApp Bot Stopped.")
518
+ st.rerun()
519
+ st.divider()
520
+
521
+ # --- Manual Query Interface ---
522
+ if st.session_state.get("app_started") and st.session_state.get("rag_pipeline_ready"):
523
+ st.subheader("💬 Manual Query")
524
+ for chat_entry in st.session_state.manual_chat_history:
525
+ with st.chat_message(chat_entry["role"]):
526
+ st.markdown(chat_entry["content"])
527
+ if "context" in chat_entry and chat_entry["context"]:
528
+ with st.expander("Retrieved Context"):
529
+ try:
530
+ if isinstance(chat_entry["context"], str) and \
531
+ (chat_entry["context"].strip().startswith('{') or chat_entry["context"].strip().startswith('[')):
532
+ st.json(json.loads(chat_entry["context"]))
533
+ elif isinstance(chat_entry["context"], list):
534
+ st.json(chat_entry["context"])
535
+ else:
536
+ st.text(str(chat_entry["context"]))
537
+ except (json.JSONDecodeError, TypeError):
538
+ st.text(str(chat_entry["context"]))
539
+
540
+ user_query_manual = st.chat_input("Ask a question:")
541
+ if user_query_manual:
542
+ st.session_state.manual_chat_history.append({"role": "user", "content": user_query_manual})
543
+ with st.chat_message("user"): st.markdown(user_query_manual)
544
+
545
+ with st.spinner("Thinking..."):
546
+ intent_result = simple_intent_classifier(user_query_manual)
547
+ intent = intent_result[0]
548
+ potential_oid_from_intent = intent_result[1]
549
+
550
+ context_for_llm, raw_context_data = "No specific context could be retrieved.", None
551
+
552
+ extracted_customer_name, extracted_item_name, extracted_shipping_address, \
553
+ extracted_delivery_date, extracted_order_id, extracted_order_status = [None] * 6
554
+
555
+
556
+ if intent == "ORDER_STATUS":
557
+ order_id_to_check = None
558
+ if potential_oid_from_intent:
559
+ order_id_to_check = potential_oid_from_intent
560
+ else:
561
+ match_manual = re.search(r'\b(ord\d{3,})\b', user_query_manual.lower(), re.IGNORECASE)
562
+ if match_manual:
563
+ order_id_to_check = match_manual.group(1).upper()
564
+
565
+ if order_id_to_check:
566
+ raw_context_data = get_order_details(order_id_to_check, st.session_state.customer_orders_data)
567
+ # context_for_llm will be used as the 'context' parameter in generate_response_groq
568
+ # For ORDER_STATUS, this raw_context_data (JSON string) is still useful for LLM's reference,
569
+ # even though specific fields are extracted for the specialized prompt.
570
+ context_for_llm = raw_context_data
571
+
572
+ if isinstance(raw_context_data, str) and not raw_context_data.startswith("No order found") and not raw_context_data.startswith("Customer order data is not loaded"):
573
+ try:
574
+ order_data_dict = json.loads(raw_context_data)
575
+ extracted_customer_name = order_data_dict.get("customer_name")
576
+ items = order_data_dict.get("items")
577
+ if items and len(items) > 0 and isinstance(items[0], dict):
578
+ extracted_item_name = items[0].get("name", "your item(s)")
579
+ else:
580
+ extracted_item_name = "your item(s)" # Fallback
581
+ extracted_shipping_address = order_data_dict.get("shipping_address")
582
+ extracted_delivery_date = order_data_dict.get("delivered_on")
583
+ extracted_order_status = order_data_dict.get("status")
584
+ extracted_order_id = order_data_dict.get("order_id") # Should be same as order_id_to_check
585
+ except json.JSONDecodeError:
586
+ st.warning(f"Could not parse order details JSON for {order_id_to_check} for personalization.")
587
+ context_for_llm = f"Error parsing order details for {order_id_to_check}. Raw data: {raw_context_data}"
588
+ elif isinstance(raw_context_data, str): # Handle "No order found" or "data not loaded"
589
+ context_for_llm = raw_context_data # LLM will state this
590
+ else:
591
+ context_for_llm = "To check an order status, please provide a valid Order ID (e.g., ORD123)."
592
+ raw_context_data = {"message": "Order ID needed or not found in query."}
593
+
594
+ elif intent == "PRODUCT_INFO":
595
+ raw_context_data = get_product_info(user_query_manual, st.session_state.products_data)
596
+ context_for_llm = raw_context_data # Product info is directly used as context
597
+
598
+ elif intent == "GENERAL_POLICY_FAQ" or intent == "UNKNOWN":
599
+ if st.session_state.faiss_index_pdfs and st.session_state.embedding_model and st.session_state.indexed_pdf_chunks:
600
+ k_val = 3 if intent == "GENERAL_POLICY_FAQ" else 2
601
+ retrieved_chunks = search_faiss_index(st.session_state.faiss_index_pdfs, user_query_manual,
602
+ st.session_state.embedding_model, st.session_state.indexed_pdf_chunks, k=k_val)
603
+ if retrieved_chunks:
604
+ context_for_llm = "Relevant information from documents:\n\n" + "\n\n---\n\n".join(retrieved_chunks)
605
+ raw_context_data = retrieved_chunks
606
+ else:
607
+ context_for_llm = "I couldn't find specific information in our policy or FAQ documents regarding your query."
608
+ raw_context_data = {"message": "No relevant PDF chunks found."}
609
+ else:
610
+ context_for_llm = "Our policy and FAQ documents are currently unavailable for search."
611
+ raw_context_data = {"message": "PDF index or embedding model not ready."}
612
+
613
+ llm_response = generate_response_groq(
614
+ _groq_client=st.session_state.groq_client,
615
+ query=user_query_manual,
616
+ context=context_for_llm,
617
+ intent=intent,
618
+ customer_name=extracted_customer_name,
619
+ item_name=extracted_item_name,
620
+ shipping_address=extracted_shipping_address,
621
+ delivery_date=extracted_delivery_date,
622
+ order_id=extracted_order_id, # This will be the specific order ID from user query
623
+ order_status=extracted_order_status
624
+ )
625
+
626
+ with st.chat_message("assistant"):
627
+ st.markdown(llm_response)
628
+ if raw_context_data:
629
+ with st.expander("Retrieved Context For Assistant"):
630
+ try:
631
+ if isinstance(raw_context_data, str) and \
632
+ (raw_context_data.strip().startswith('{') or raw_context_data.strip().startswith('[')):
633
+ st.json(json.loads(raw_context_data))
634
+ elif isinstance(raw_context_data, list):
635
+ st.json(raw_context_data)
636
+ else:
637
+ st.text(str(raw_context_data))
638
+ except (json.JSONDecodeError, TypeError):
639
+ st.text(str(raw_context_data))
640
+ st.session_state.manual_chat_history.append({"role": "assistant", "content": llm_response, "context": raw_context_data})
641
+ st.rerun()
642
+
643
+ # --- Twilio Bot Polling Logic ---
644
+ if st.session_state.get("bot_started") and st.session_state.get("rag_pipeline_ready"):
645
+ current_time = time.time()
646
+ if "last_twilio_poll_time" not in st.session_state:
647
+ st.session_state.last_twilio_poll_time = current_time - polling_interval_to_use - 1
648
+
649
+ if (current_time - st.session_state.last_twilio_poll_time) > polling_interval_to_use:
650
+ st.session_state.last_twilio_poll_time = current_time
651
+
652
+ if not st.session_state.get("twilio_client") or not twilio_bot_whatsapp_identity_to_use or not st.session_state.get("bot_start_time_utc"):
653
+ st.warning("Twilio client/config missing for polling. Ensure bot is started and WhatsApp identity is set.")
654
+ else:
655
+ with st.spinner(f"Checking WhatsApp messages (last poll: {datetime.fromtimestamp(st.session_state.last_twilio_poll_time).strftime('%H:%M:%S')})..."):
656
+ new_messages = get_new_whatsapp_messages(
657
+ st.session_state.twilio_client,
658
+ st.session_state.bot_start_time_utc,
659
+ st.session_state.processed_message_sids,
660
+ twilio_bot_whatsapp_identity_to_use
661
+ )
662
+
663
+ if new_messages:
664
+ st.info(f"Found {len(new_messages)} new WhatsApp message(s) to process.")
665
+ for msg_data in new_messages:
666
+ user_query_whatsapp, conv_sid, msg_sid, author_id = \
667
+ msg_data["message_body"], msg_data["conversation_sid"], \
668
+ msg_data["message_sid"], msg_data["author_identity"]
669
+
670
+ st.write(f"Processing WhatsApp message from {author_id} in conversation {conv_sid}: '{user_query_whatsapp}' (SID: {msg_sid})")
671
+
672
+ intent_result_whatsapp = simple_intent_classifier(user_query_whatsapp)
673
+ intent_whatsapp = intent_result_whatsapp[0]
674
+ potential_oid_whatsapp = intent_result_whatsapp[1]
675
+
676
+ context_for_llm_whatsapp = "No specific context could be retrieved."
677
+ raw_context_data_whatsapp = None
678
+
679
+ wa_customer_name, wa_item_name, wa_shipping_address, \
680
+ wa_delivery_date, wa_order_id, wa_order_status = [None] * 6
681
+
682
+
683
+ if intent_whatsapp == "ORDER_STATUS":
684
+ order_id_to_check_whatsapp = None
685
+ if potential_oid_whatsapp:
686
+ order_id_to_check_whatsapp = potential_oid_whatsapp
687
+ else:
688
+ match_whatsapp = re.search(r'\b(ord\d{3,})\b', user_query_whatsapp.lower(), re.IGNORECASE)
689
+ if match_whatsapp:
690
+ order_id_to_check_whatsapp = match_whatsapp.group(1).upper()
691
+
692
+ if order_id_to_check_whatsapp:
693
+ raw_context_data_whatsapp = get_order_details(order_id_to_check_whatsapp, st.session_state.customer_orders_data)
694
+ context_for_llm_whatsapp = raw_context_data_whatsapp # Full JSON string as context
695
+
696
+ if isinstance(raw_context_data_whatsapp, str) and not raw_context_data_whatsapp.startswith("No order found") and not raw_context_data_whatsapp.startswith("Customer order data is not loaded"):
697
+ try:
698
+ order_data_dict_wa = json.loads(raw_context_data_whatsapp)
699
+ wa_customer_name = order_data_dict_wa.get("customer_name")
700
+ items_wa = order_data_dict_wa.get("items")
701
+ if items_wa and len(items_wa) > 0 and isinstance(items_wa[0], dict):
702
+ wa_item_name = items_wa[0].get("name", "your item(s)")
703
+ else:
704
+ wa_item_name = "your item(s)"
705
+ wa_shipping_address = order_data_dict_wa.get("shipping_address")
706
+ wa_delivery_date = order_data_dict_wa.get("delivered_on")
707
+ wa_order_status = order_data_dict_wa.get("status")
708
+ wa_order_id = order_data_dict_wa.get("order_id")
709
+ except json.JSONDecodeError:
710
+ st.warning(f"Could not parse order details JSON for {order_id_to_check_whatsapp} (WhatsApp) for personalization.")
711
+ context_for_llm_whatsapp = f"Error parsing order details for {order_id_to_check_whatsapp}. Raw data: {raw_context_data_whatsapp}"
712
+ elif isinstance(raw_context_data_whatsapp, str):
713
+ context_for_llm_whatsapp = raw_context_data_whatsapp
714
+ else:
715
+ context_for_llm_whatsapp = "To check an order status, please provide a valid Order ID (e.g., ORD123)."
716
+ raw_context_data_whatsapp = {"message": "Order ID needed or not found in query."}
717
+
718
+
719
+ elif intent_whatsapp == "PRODUCT_INFO":
720
+ raw_context_data_whatsapp = get_product_info(user_query_whatsapp, st.session_state.products_data)
721
+ context_for_llm_whatsapp = raw_context_data_whatsapp
722
+
723
+ elif intent_whatsapp == "GENERAL_POLICY_FAQ" or intent_whatsapp == "UNKNOWN":
724
+ if st.session_state.faiss_index_pdfs and st.session_state.embedding_model and st.session_state.indexed_pdf_chunks:
725
+ k_val_whatsapp = 3 if intent_whatsapp == "GENERAL_POLICY_FAQ" else 2
726
+ chunks_whatsapp = search_faiss_index(st.session_state.faiss_index_pdfs, user_query_whatsapp,
727
+ st.session_state.embedding_model, st.session_state.indexed_pdf_chunks, k=k_val_whatsapp)
728
+ if chunks_whatsapp:
729
+ context_for_llm_whatsapp = "Relevant information from documents:\n\n" + "\n\n---\n\n".join(chunks_whatsapp)
730
+ raw_context_data_whatsapp = chunks_whatsapp
731
+ else:
732
+ context_for_llm_whatsapp = "I couldn't find specific information in our policy or FAQ documents regarding your query."
733
+ raw_context_data_whatsapp = {"message": "No relevant PDF chunks found."}
734
+ else:
735
+ context_for_llm_whatsapp = "Our policy and FAQ documents are currently unavailable for search."
736
+ raw_context_data_whatsapp = {"message": "PDF index or embedding model not ready."}
737
+
738
+ response_whatsapp = generate_response_groq(
739
+ _groq_client=st.session_state.groq_client,
740
+ query=user_query_whatsapp,
741
+ context=context_for_llm_whatsapp,
742
+ intent=intent_whatsapp,
743
+ customer_name=wa_customer_name,
744
+ item_name=wa_item_name,
745
+ shipping_address=wa_shipping_address,
746
+ delivery_date=wa_delivery_date,
747
+ order_id=wa_order_id,
748
+ order_status=wa_order_status
749
+ ).strip().replace('\n', ' ')
750
+
751
+ if send_whatsapp_message(
752
+ st.session_state.twilio_client,
753
+ conv_sid,
754
+ response_whatsapp,
755
+ twilio_bot_whatsapp_identity_to_use
756
+ ):
757
+ st.session_state.processed_message_sids.add(msg_sid)
758
+ #print(f"[Twilio Send] Sending response: {message_body}")
759
+ st.success(f"Successfully responded to WhatsApp message SID {msg_sid} from {author_id}.")
760
+ else:
761
+ st.error(f"Failed to send WhatsApp response for message SID {msg_sid} from {author_id}.")
762
+ st.rerun()
763
+
764
+
765
+ # --- Footer & Status ---
766
+ st.sidebar.markdown("---")
767
+ st.sidebar.info("Ensure all keys and SIDs are correctly configured. Primary API keys (Twilio SID/Token, GROQ Key) are loaded from secrets if available.")
768
+ if st.session_state.get("app_started"):
769
+ status_color = "green" if st.session_state.get("rag_pipeline_ready") else "orange"
770
+ app_status_text = "App RUNNING" if st.session_state.get("rag_pipeline_ready") else "App Initializing/Error"
771
+ bot_status_text = "WhatsApp Bot RUNNING" if st.session_state.get("bot_started") else "WhatsApp Bot STOPPED"
772
+ st.sidebar.markdown(f"<span style='color:{status_color};'>{app_status_text}</span>. {bot_status_text}.", unsafe_allow_html=True)
773
+
774
+ else:
775
+ st.sidebar.warning("App is STOPPED.")
776
+
777
+ #Chatbot is sending multiple messages with twilio. I want same response as per manual query.
778
+
779
+ # --- Simulated background loop using rerun ---
780
+ if st.session_state.get("bot_started") and st.session_state.get("rag_pipeline_ready"):
781
+ current_time = time.time()
782
+ last_poll = st.session_state.get("last_twilio_poll_time", 0)
783
+ interval = polling_interval_to_use # 30 by default from sidebar
784
+
785
+ if current_time - last_poll >= interval:
786
+ st.session_state.last_twilio_poll_time = current_time
787
+ st.rerun()
788
+ else:
789
+ # Wait the remaining time before rerunning
790
+ time_remaining = interval - (current_time - last_poll)
791
+ time.sleep(min(5, time_remaining)) # Avoid sleeping too long
792
+ st.rerun()