masadonline commited on
Commit
93ae75d
·
verified ·
1 Parent(s): 3ec8fe0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +430 -430
app.py CHANGED
@@ -9,8 +9,8 @@ import faiss
9
  import numpy as np
10
  from twilio.rest import Client
11
  from groq import Groq
12
- import re # Import re module
13
-
14
  # --- Page Configuration ---
15
  st.set_page_config(page_title="RAG Customer Support Chatbot", layout="wide")
16
 
@@ -42,142 +42,142 @@ APP_TWILIO_BOT_WHATSAPP_IDENTITY_SECRET = st.secrets.get("TWILIO_BOT_WHATSAPP_ID
42
 
43
  # --- RAG Processing Utilities ---
44
  def load_json_data(file_path):
45
-     """Loads data from a JSON file."""
46
-     try:
47
-         with open(file_path, 'r', encoding='utf-8') as f:
48
-             data = json.load(f)
49
-         return data
50
-     except FileNotFoundError:
51
-         st.error(f"Error: JSON file not found at {file_path}")
52
-         return None
53
-     except json.JSONDecodeError:
54
-         st.error(f"Error: Could not decode JSON from {file_path}")
55
-         return None
56
-     except Exception as e:
57
-         st.error(f"An unexpected error occurred while loading {file_path}: {e}")
58
-         return None
59
 
60
  def load_pdf_data(file_path):
61
-     """Extracts text from a PDF file, page by page."""
62
-     try:
63
-         with open(file_path, 'rb') as f:
64
-             reader = PyPDF2.PdfReader(f)
65
-             text_pages = []
66
-             for page_num in range(len(reader.pages)):
67
-                 page = reader.pages[page_num]
68
-                 text_pages.append(page.extract_text() or "")
69
-         return text_pages
70
-     except FileNotFoundError:
71
-         st.error(f"Error: PDF file not found at {file_path}")
72
-         return []
73
-     except Exception as e:
74
-         st.error(f"An error occurred while processing PDF {file_path}: {e}")
75
-         return []
76
 
77
  def chunk_text(text_pages, chunk_size=1000, chunk_overlap=200):
78
-     """Chunks text from PDF pages into smaller, overlapping pieces."""
79
-     full_text = "\n".join(text_pages)
80
-     if not full_text.strip():
81
-         return []
82
-     chunks = []
83
-     start = 0
84
-     while start < len(full_text):
85
-         end = start + chunk_size
86
-         chunks.append(full_text[start:end])
87
-         if end >= len(full_text):
88
-             break
89
-         start += (chunk_size - chunk_overlap)
90
-         if start >= len(full_text):
91
-             break
92
-     return [chunk for chunk in chunks if chunk.strip()]
93
 
94
  @st.cache_resource(show_spinner="Initializing embedding model...")
95
  def initialize_embedding_model(model_name=DEFAULT_EMBEDDING_MODEL_NAME):
96
-     """Initializes and returns a SentenceTransformer model."""
97
-     try:
98
-         model = SentenceTransformer(model_name)
99
-         return model
100
-     except Exception as e:
101
-         st.error(f"Error initializing embedding model '{model_name}': {e}")
102
-         return None
103
 
104
  @st.cache_resource(show_spinner="Building FAISS index for PDF documents...")
105
  def create_faiss_index(_text_chunks, _embedding_model):
106
-     """Creates a FAISS index from text chunks and an embedding model."""
107
-     if not _text_chunks or _embedding_model is None:
108
-         st.warning("Cannot create FAISS index: No text chunks or embedding model available.")
109
-         return None, []
110
-     try:
111
-         valid_chunks = [str(chunk) for chunk in _text_chunks if chunk and isinstance(chunk, str) and chunk.strip()]
112
-         if not valid_chunks:
113
-             st.warning("No valid text chunks to embed for FAISS index.")
114
-             return None, []
115
-         embeddings = _embedding_model.encode(valid_chunks, convert_to_tensor=False)
116
-         if embeddings.ndim == 1:
117
-             embeddings = embeddings.reshape(1, -1)
118
-         if embeddings.shape[0] == 0:
119
-              st.warning("No embeddings were generated for FAISS index.")
120
-              return None, []
121
-         dimension = embeddings.shape[1]
122
-         index = faiss.IndexFlatL2(dimension)
123
-         index.add(np.array(embeddings, dtype=np.float32))
124
-         return index, valid_chunks
125
-     except Exception as e:
126
-         st.error(f"Error creating FAISS index: {e}")
127
-         return None, []
128
 
129
  def search_faiss_index(index, query_text, embedding_model, indexed_chunks, k=3):
130
-     """Searches the FAISS index and returns top_k relevant chunk texts."""
131
-     if index is None or embedding_model is None or not query_text:
132
-         return []
133
-     try:
134
-         query_embedding = embedding_model.encode([query_text], convert_to_tensor=False)
135
-         if query_embedding.ndim == 1:
136
-             query_embedding = query_embedding.reshape(1, -1)
137
-         distances, indices = index.search(np.array(query_embedding, dtype=np.float32), k)
138
-         results = []
139
-         for i in range(len(indices[0])):
140
-             idx = indices[0][i]
141
-             if 0 <= idx < len(indexed_chunks):
142
-                 results.append(indexed_chunks[idx])
143
-         return results
144
-     except Exception as e:
145
-         st.error(f"Error searching FAISS index: {e}")
146
-         return []
147
 
148
  def get_order_details(order_id, customer_orders_data):
149
-     """Retrieves order details for a given order_id."""
150
-     if not customer_orders_data:
151
-         return "Customer order data is not loaded."
152
-     for order in customer_orders_data:
153
-         if order.get("order_id") == order_id:
154
-             return json.dumps(order, indent=2)
155
-     return f"No order found with ID: {order_id}."
156
 
157
  def get_product_info(query, products_data):
158
-     """Retrieves product information based on a query."""
159
-     if not products_data:
160
-         return "Product data is not loaded."
161
-     query_lower = query.lower()
162
-     found_products = []
163
-     for product in products_data:
164
-         if query_lower in (product.get("name", "").lower()) or \
165
-            query_lower in (product.get("description", "").lower()) or \
166
-            query_lower == (product.get("product_id", "").lower()):
167
-             found_products.append(product)
168
-     if found_products:
169
-         return json.dumps(found_products, indent=2)
170
-     return f"No product information found matching your query: '{query}'."
171
 
172
  # --- LLM Operations ---
173
  @st.cache_data(show_spinner="Generating response with LLaMA3...")
174
  def generate_response_groq(_groq_client, query, context, model="llama3-8b-8192"):
175
-     """Generates a response using GROQ LLaMA3 API."""
176
-     if not _groq_client:
177
-         return "GROQ client not initialized. Please check API key."
178
-     if not query:
179
-         return "Query is empty."
180
-     prompt = f"""You are a helpful customer support assistant.
181
  Use the following context to answer the user's question.
182
  If the context doesn't contain the answer, state that you don't have enough information.
183
  Do not make up information. Be concise and polite.
@@ -189,107 +189,107 @@ User Question: {query}
189
 
190
  Assistant Answer:
191
  """
192
-     try:
193
-         chat_completion = _groq_client.chat.completions.create(
194
-             messages=[
195
-                 {"role": "system", "content": "You are a helpful customer support assistant."},
196
-                 {"role": "user", "content": prompt}
197
-             ],
198
-             model=model, temperature=0.7, max_tokens=1024, top_p=1
199
-         )
200
-         response = chat_completion.choices[0].message.content
201
-         return response
202
-     except Exception as e:
203
-         st.error(f"Error calling GROQ API: {e}")
204
-         return "Sorry, I encountered an error while trying to generate a response."
205
 
206
  def initialize_groq_client(api_key_val):
207
-     """Initializes the GROQ client."""
208
-     if not api_key_val: # Changed parameter name to avoid conflict
209
-         st.warning("GROQ API Key is missing.")
210
-         return None
211
-     try:
212
-         client = Groq(api_key=api_key_val)
213
-         return client
214
-     except Exception as e:
215
-         st.error(f"Failed to initialize GROQ client: {e}")
216
-         return None
217
 
218
  # --- Twilio Operations ---
219
  def initialize_twilio_client(acc_sid, auth_tkn): # Changed parameter names
220
-     """Initializes the Twilio client."""
221
-     if not acc_sid or not auth_tkn:
222
-         st.warning("Twilio Account SID or Auth Token is missing.")
223
-         return None
224
-     try:
225
-         client = Client(acc_sid, auth_tkn)
226
-         return client
227
-     except Exception as e:
228
-         st.error(f"Failed to initialize Twilio client: {e}")
229
-         return None
230
 
231
  def get_new_whatsapp_messages(twilio_client, conversation_service_sid_val, bot_start_time_utc, # Renamed
232
-                               processed_message_sids, bot_whatsapp_identity_val): # Renamed
233
-     """Fetches new, unanswered WhatsApp messages from Twilio Conversations."""
234
-     if not twilio_client:
235
-         st.warning("Twilio client not initialized.")
236
-         return []
237
-     if not conversation_service_sid_val:
238
-         st.warning("Twilio Conversation Service SID not provided.")
239
-         return []
240
-
241
-     new_messages_to_process = []
242
-     try:
243
-         conversations = twilio_client.conversations.v1 \
244
-             .services(conversation_service_sid_val) \
245
-             .conversations \
246
-             .list(limit=50)
247
-
248
-         for conv in conversations:
249
-             if conv.date_updated and conv.date_updated > bot_start_time_utc:
250
-                 messages = twilio_client.conversations.v1 \
251
-                     .services(conversation_service_sid_val) \
252
-                     .conversations(conv.sid) \
253
-                     .messages \
254
-                     .list(order='desc', limit=10)
255
-
256
-                 for msg in messages:
257
-                     if msg.sid in processed_message_sids:
258
-                         continue
259
-                     if msg.author and msg.author.lower() != bot_whatsapp_identity_val.lower() and \
260
-                        msg.date_created and msg.date_created > bot_start_time_utc:
261
-                         new_messages_to_process.append({
262
-                             "conversation_sid": conv.sid, "message_sid": msg.sid,
263
-                             "author_identity": msg.author, "message_body": msg.body,
264
-                             "timestamp_utc": msg.date_created 
265
-                         })
266
-                         break 
267
-     except Exception as e:
268
-         st.error(f"Error fetching Twilio messages: {e}")
269
-     return sorted(new_messages_to_process, key=lambda m: m['timestamp_utc'])
270
 
271
  def send_whatsapp_message(twilio_client, conversation_service_sid_val, conversation_sid, message_body, bot_identity_val): # Renamed
272
-     """Sends a message to a Twilio Conversation from the bot's identity."""
273
-     if not twilio_client:
274
-         st.error("Twilio client not initialized for sending message.")
275
-         return False
276
-     if not conversation_service_sid_val:
277
-         st.error("Twilio Conversation Service SID not provided for sending message.")
278
-         return False
279
-     if not bot_identity_val:
280
-         st.error("Bot identity not provided for sending message.")
281
-         return False
282
-     try:
283
-         twilio_client.conversations.v1 \
284
-             .services(conversation_service_sid_val) \
285
-             .conversations(conversation_sid) \
286
-             .messages \
287
-             .create(author=bot_identity_val, body=message_body)
288
-         st.success(f"Sent reply to conversation {conversation_sid}")
289
-         return True
290
-     except Exception as e:
291
-         st.error(f"Error sending Twilio message to {conversation_sid}: {e}")
292
-         return False
293
 
294
  # --- Main Application Logic & UI ---
295
  st.title("🤖 RAG-Based Customer Support Chatbot")
@@ -300,47 +300,47 @@ st.sidebar.title("⚙️ Configurations")
300
 
301
  # Use APP_ prefixed variables for values from secrets, then allow manual input if not found
302
  if APP_TWILIO_ACCOUNT_SID:
303
-     st.sidebar.text_input("Twilio Account SID (from Secrets)", value="********" + APP_TWILIO_ACCOUNT_SID[-4:] if len(APP_TWILIO_ACCOUNT_SID) > 4 else "********", disabled=True)
304
-     twilio_account_sid_to_use = APP_TWILIO_ACCOUNT_SID
305
  else:
306
-     st.sidebar.warning("Secret 'TWILIO_SID' not found.")
307
-     twilio_account_sid_to_use = st.sidebar.text_input("Twilio Account SID (Enter Manually)", value=DEFAULT_TWILIO_ACCOUNT_SID_FALLBACK, type="password")
308
 
309
  if APP_TWILIO_AUTH_TOKEN:
310
-     st.sidebar.text_input("Twilio Auth Token (from Secrets)", value="********", disabled=True)
311
-     twilio_auth_token_to_use = APP_TWILIO_AUTH_TOKEN
312
  else:
313
-     st.sidebar.warning("Secret 'TWILIO_TOKEN' not found.")
314
-     twilio_auth_token_to_use = st.sidebar.text_input("Twilio Auth Token (Enter Manually)", value=DEFAULT_TWILIO_AUTH_TOKEN_FALLBACK, type="password")
315
 
316
  if APP_GROQ_API_KEY:
317
-     st.sidebar.text_input("GROQ API Key (from Secrets)", value="gsk_********" + APP_GROQ_API_KEY[-4:] if len(APP_GROQ_API_KEY) > 8 else "********", disabled=True)
318
-     groq_api_key_to_use = APP_GROQ_API_KEY
319
  else:
320
-     st.sidebar.warning("Secret 'GROQ_API_KEY' not found.")
321
-     groq_api_key_to_use = st.sidebar.text_input("GROQ API Key (Enter Manually)", value=DEFAULT_GROQ_API_KEY_FALLBACK, type="password")
322
 
323
  # For other configurations that can be overridden if secrets not found or for user preference
324
  twilio_conversation_service_sid_to_use = st.sidebar.text_input(
325
-     "Twilio Conversation Service SID (IS...)", 
326
-     value=APP_TWILIO_CONVERSATION_SERVICE_SID_SECRET or DEFAULT_TWILIO_CONVERSATION_SERVICE_SID, 
327
-     type="password", 
328
-     help="The SID of your Twilio Conversations Service. Can be set by 'TWILIO_CONVERSATION_SERVICE_SID' secret."
329
  )
330
  twilio_bot_whatsapp_identity_to_use = st.sidebar.text_input(
331
-     "Twilio Bot WhatsApp Identity", 
332
-     value=APP_TWILIO_BOT_WHATSAPP_IDENTITY_SECRET or DEFAULT_TWILIO_BOT_WHATSAPP_IDENTITY,
333
-     help="e.g., 'whatsapp:+1234567890'. Can be set by 'TWILIO_BOT_WHATSAPP_IDENTITY' secret."
334
  )
335
  embedding_model_name_to_use = st.sidebar.text_input( # Renamed
336
-     "Embedding Model Name", 
337
-     value=DEFAULT_EMBEDDING_MODEL_NAME
338
  )
339
  polling_interval_to_use = st.sidebar.number_input( # Renamed
340
-     "Twilio Polling Interval (seconds)", 
341
-     min_value=10, max_value=300, 
342
-     value=DEFAULT_POLLING_INTERVAL_S, 
343
-     step=5
344
  )
345
 
346
  # --- Initialize Session State ---
@@ -354,197 +354,197 @@ if "manual_chat_history" not in st.session_state: st.session_state.manual_chat_h
354
 
355
  # --- Helper: Simple Intent Classifier ---
356
  def simple_intent_classifier(query):
357
-     query_lower = query.lower()
358
-     if any(k in query_lower for k in ["order", "status", "track", "delivery"]):
359
-         # More specific regex to find 'ORD' followed by digits (assuming order IDs are like ORD1001)
360
-         match = re.search(r'\b(ord\d{3,})\b', query_lower) # Matches 'ord' followed by at least 3 digits, as a whole word
361
-         if match:
362
-             return "ORDER_STATUS", match.group(1).upper() # Return intent and extracted ID
363
-         # Fallback if specific order ID not found but still an order-related query
364
-         return "ORDER_STATUS", None # Indicate order status intent but no specific ID found yet
365
-
366
-     if any(k in query_lower for k in ["product", "item", "buy", "price", "feature", "stock"]): return "PRODUCT_INFO", None
367
-     if any(k in query_lower for k in ["return", "policy", "refund", "exchange", "faq", "question", "how to", "support"]): return "GENERAL_POLICY_FAQ", None
368
-     return "UNKNOWN", None # Return intent and None for ID if unknown
369
 
370
  # --- Main Application Controls ---
371
  col1, col2, col3, col4 = st.columns(4)
372
  with col1:
373
-     if st.button("🚀 Start App", disabled=st.session_state.app_started, use_container_width=True):
374
-         if not groq_api_key_to_use: # Use the correct variable
375
-             st.error("GROQ API Key is required.")
376
-         else:
377
-             with st.spinner("Initializing RAG pipeline..."):
378
-                 st.session_state.embedding_model = initialize_embedding_model(embedding_model_name_to_use) # Use correct var
379
-                 st.session_state.customer_orders_data = load_json_data(CUSTOMER_ORDERS_FILE)
380
-                 st.session_state.products_data = load_json_data(PRODUCTS_FILE)
381
-                 policy_pdf_pages = load_pdf_data(POLICY_PDF_FILE)
382
-                 faq_pdf_pages = load_pdf_data(FAQ_PDF_FILE)
383
-                 all_pdf_text_pages = policy_pdf_pages + faq_pdf_pages
384
-                 st.session_state.pdf_text_chunks_raw = chunk_text(all_pdf_text_pages)
385
-
386
-                 if st.session_state.embedding_model and st.session_state.pdf_text_chunks_raw:
387
-                     st.session_state.faiss_index_pdfs, st.session_state.indexed_pdf_chunks = \
388
-                         create_faiss_index(st.session_state.pdf_text_chunks_raw, st.session_state.embedding_model)
389
-                 else:
390
-                     st.session_state.faiss_index_pdfs, st.session_state.indexed_pdf_chunks = None, []
391
-                     st.warning("FAISS index for PDFs could not be created.")
392
-                 
393
-                 st.session_state.groq_client = initialize_groq_client(groq_api_key_to_use) # Use correct var
394
-
395
-                 if st.session_state.embedding_model and st.session_state.groq_client and \
396
-                    st.session_state.customer_orders_data and st.session_state.products_data:
397
-                     st.session_state.rag_pipeline_ready = True
398
-                     st.session_state.app_started = True
399
-                     st.success("RAG Application Started!")
400
-                     st.rerun()
401
-                 else:
402
-                     st.error("Failed to initialize RAG pipeline. Check configurations and ensure all data files are present in 'docs/'.")
403
-                     st.session_state.app_started = False
404
  with col2:
405
-     if st.button("🛑 Stop App", disabled=not st.session_state.app_started, use_container_width=True):
406
-         keys_to_reset = ["app_started", "bot_started", "rag_pipeline_ready", "embedding_model", 
407
-                          "customer_orders_data", "products_data", "pdf_text_chunks_raw", 
408
-                          "faiss_index_pdfs", "indexed_pdf_chunks", "groq_client", "twilio_client", 
409
-                          "bot_start_time_utc", "processed_message_sids", "manual_chat_history"]
410
-         for key in keys_to_reset:
411
-             if key in st.session_state: del st.session_state[key]
412
-         st.session_state.app_started = False
413
-         st.session_state.bot_started = False
414
-         st.session_state.rag_pipeline_ready = False
415
-         st.session_state.processed_message_sids = set()
416
-         st.session_state.manual_chat_history = []
417
-         st.success("Application Stopped.")
418
-         st.rerun()
419
  with col3:
420
-     if st.button("💬 Start WhatsApp Bot", disabled=not st.session_state.app_started or st.session_state.bot_started, use_container_width=True):
421
-         if not all([twilio_account_sid_to_use, twilio_auth_token_to_use, twilio_conversation_service_sid_to_use, twilio_bot_whatsapp_identity_to_use]): # Use correct vars
422
-             st.error("Twilio credentials, Service SID, and Bot Identity are required.")
423
-         else:
424
-             st.session_state.twilio_client = initialize_twilio_client(twilio_account_sid_to_use, twilio_auth_token_to_use) # Use correct vars
425
-             if st.session_state.twilio_client:
426
-                 st.session_state.bot_started = True
427
-                 st.session_state.bot_start_time_utc = datetime.now(timezone.utc)
428
-                 st.session_state.processed_message_sids = set()
429
-                 st.session_state.last_twilio_poll_time = time.time() - polling_interval_to_use -1 # Use correct var
430
-                 st.success("WhatsApp Bot Started!")
431
-                 st.rerun()
432
-             else:
433
-                 st.error("Failed to initialize Twilio client.")
434
  with col4:
435
-     if st.button("🔕 Stop WhatsApp Bot", disabled=not st.session_state.bot_started, use_container_width=True):
436
-         st.session_state.bot_started = False
437
-         st.info("WhatsApp Bot Stopped.")
438
-         st.rerun()
439
  st.divider()
440
 
441
  # --- Manual Query Interface ---
442
  if st.session_state.get("app_started") and st.session_state.get("rag_pipeline_ready"):
443
-     st.subheader("💬 Manual Query")
444
-     for chat_entry in st.session_state.manual_chat_history:
445
-         with st.chat_message(chat_entry["role"]):
446
-             st.markdown(chat_entry["content"])
447
-             if "context" in chat_entry and chat_entry["context"]:
448
-                 with st.expander("Retrieved Context"):
449
-                     try:
450
-                         # Attempt to parse as JSON only if it looks like a JSON string
451
-                         if isinstance(chat_entry["context"], str) and (chat_entry["context"].strip().startswith('{') or chat_entry["context"].strip().startswith('[')):
452
-                             st.json(json.loads(chat_entry["context"]))
453
-                         else:
454
-                             # Otherwise, display as plain text
455
-                             st.text(str(chat_entry["context"]))
456
-                     except (json.JSONDecodeError, TypeError):
457
-                         # Fallback for any other parsing errors
458
-                         st.text(str(chat_entry["context"]))
459
-
460
-     user_query_manual = st.chat_input("Ask a question:")
461
-     if user_query_manual:
462
-         st.session_state.manual_chat_history.append({"role": "user", "content": user_query_manual})
463
-         with st.chat_message("user"): st.markdown(user_query_manual)
464
-
465
-         with st.spinner("Thinking..."):
466
-             intent_result = simple_intent_classifier(user_query_manual) # Get both intent and potential_id
467
-             intent = intent_result[0]
468
-             potential_oid_from_intent = intent_result[1] # This is the extracted ID if any
469
-
470
-             context_for_llm, raw_context_data = "No specific context.", None
471
-
472
-             if intent == "ORDER_STATUS":
473
-                 order_id_to_check = None
474
-                 if potential_oid_from_intent:
475
-                     order_id_to_check = potential_oid_from_intent
476
-                 else:
477
-                     # Fallback for edge cases, though the regex should catch most
478
-                     words = user_query_manual.upper().split()
479
-                     # This regex specifically looks for 'ORD' followed by digits
480
-                     possible_match = next((w for w in words if re.match(r'ORD\d+', w)), None)
481
-                     if possible_match:
482
-                         order_id_to_check = possible_match
483
-
484
-
485
-                 if order_id_to_check:
486
-                     raw_context_data = get_order_details(order_id_to_check.upper(), st.session_state.customer_orders_data)
487
-                     context_for_llm = f"Order Details: {raw_context_data}"
488
-                 else:
489
-                     context_for_llm = "Please provide a valid Order ID (e.g., ORD1234)."
490
-                     raw_context_data = {"message": "Order ID needed."}
491
-             elif intent == "PRODUCT_INFO":
492
-                 raw_context_data = get_product_info(user_query_manual, st.session_state.products_data)
493
-                 context_for_llm = f"Product Information: {raw_context_data}"
494
-             elif intent == "GENERAL_POLICY_FAQ" or intent == "UNKNOWN":
495
-                 # ... (rest of your existing logic for these intents) ...
496
-                 if st.session_state.faiss_index_pdfs and st.session_state.embedding_model:
497
-                     k_val = 2 if intent == "GENERAL_POLICY_FAQ" else 1
498
-                     retrieved_chunks = search_faiss_index(st.session_state.faiss_index_pdfs, user_query_manual,
499
-                                                           st.session_state.embedding_model, st.session_state.indexed_pdf_chunks, k=k_val)
500
-                     if retrieved_chunks:
501
-                         context_for_llm = "\n\n".join(retrieved_chunks)
502
-                         raw_context_data = retrieved_chunks
503
-                     else:
504
-                         context_for_llm = "No specific policy/FAQ info found." if intent == "GENERAL_POLICY_FAQ" else "Could not find relevant info."
505
-                         raw_context_data = {"message": "No relevant PDF chunks found."}
506
-                 else:
507
-                     context_for_llm = "Policy/FAQ documents unavailable."
508
-                     raw_context_data = {"message": "PDF index not ready."}
509
-
510
-             llm_response = generate_response_groq(st.session_state.groq_client, user_query_manual, context_for_llm)
511
-             with st.chat_message("assistant"):
512
-                 st.markdown(llm_response)
513
-                 if raw_context_data:
514
-                     with st.expander("Retrieved Context"):
515
-                         try:
516
-                             if isinstance(raw_context_data, str) and (raw_context_data.strip().startswith('{') or raw_context_data.strip().startswith('[')):
517
-                                 st.json(json.loads(raw_context_data))
518
-                             else:
519
-                                 st.text(str(raw_context_data))
520
-                         except (json.JSONDecodeError, TypeError):
521
-                             st.text(str(raw_context_data))
522
-             st.session_state.manual_chat_history.append({"role": "assistant", "content": llm_response, "context": raw_context_data})
523
 
524
  # --- Twilio Bot Polling Logic ---
525
  if st.session_state.get("bot_started") and st.session_state.get("rag_pipeline_ready"):
526
-     current_time = time.time()
527
-     if (current_time - st.session_state.get("last_twilio_poll_time", 0)) > polling_interval_to_use: # Use correct var
528
-         st.session_state.last_twilio_poll_time = current_time
529
-         with st.spinner("Checking WhatsApp messages..."):
530
-             if not st.session_state.get("twilio_client") or not twilio_conversation_service_sid_to_use or not twilio_bot_whatsapp_identity_to_use: # Use correct vars
531
-                 st.warning("Twilio client/config missing for polling.")
532
-             else:
533
-                 new_messages = get_new_whatsapp_messages(st.session_state.twilio_client, twilio_conversation_service_sid_to_use, 
534
-                                                          st.session_state.bot_start_time_utc, st.session_state.processed_message_sids,
535
-                                                          twilio_bot_whatsapp_identity_to_use) # Use correct vars
536
-                 if new_messages:
537
-                     st.info(f"Found {len(new_messages)} new WhatsApp message(s).")
538
-                     for msg_data in new_messages:
539
-                         user_query_whatsapp, conv_sid, msg_sid, author_id = msg_data["message_body"], msg_data["conversation_sid"], msg_data["message_sid"], msg_data["author_identity"]
540
-                         st.write(f"Processing from {author_id} in {conv_sid}: '{user_query_whatsapp}'")
541
-
542
-                         intent_result_whatsapp = simple_intent_classifier(user_query_whatsapp) # Use the updated classifier
543
-                         intent_whatsapp = intent_result_whatsapp[0]
544
-                         potential_oid_whatsapp = intent_result_whatsapp[1] # Extracted ID from intent classifier
545
-
546
-                         context_whatsapp = "No specific context."
547
-                         if intent_whatsapp == "ORDER_STATUS":
548
  order_id_to_check_whatsapp = None
549
  if potential_oid_whatsapp:
550
  order_id_to_check_whatsapp = potential_oid_whatsapp
@@ -555,29 +555,29 @@ if st.session_state.get("bot_started") and st.session_state.get("rag_pipeline_re
555
  order_id_to_check_whatsapp = possible_match_whatsapp
556
 
557
  if order_id_to_check_whatsapp:
558
-                             context_whatsapp = f"Order Details: {get_order_details(order_id_to_check_whatsapp.upper(), st.session_state.customer_orders_data)}"
559
  else:
560
  context_whatsapp = "Please provide a valid Order ID."
561
-                         elif intent_whatsapp == "PRODUCT_INFO":
562
-                             context_whatsapp = f"Product Info: {get_product_info(user_query_whatsapp, st.session_state.products_data)}"
563
-                         elif intent_whatsapp == "GENERAL_POLICY_FAQ" or intent_whatsapp == "UNKNOWN":
564
-                             if st.session_state.faiss_index_pdfs and st.session_state.embedding_model:
565
-                                 k_val = 2 if intent_whatsapp == "GENERAL_POLICY_FAQ" else 1
566
-                                 chunks = search_faiss_index(st.session_state.faiss_index_pdfs, user_query_whatsapp, st.session_state.embedding_model, st.session_state.indexed_pdf_chunks, k=k_val)
567
-                                 context_whatsapp = "\n\n".join(chunks) if chunks else ("No policy/FAQ info." if intent_whatsapp == "GENERAL_POLICY_FAQ" else "No relevant info.")
568
-                             else: context_whatsapp = "Policy/FAQ docs unavailable."
569
-                         
570
-                         response_whatsapp = generate_response_groq(st.session_state.groq_client, user_query_whatsapp, context_whatsapp)
571
-                         if send_whatsapp_message(st.session_state.twilio_client, twilio_conversation_service_sid_to_use, conv_sid, response_whatsapp, twilio_bot_whatsapp_identity_to_use): # Use correct vars
572
-                             st.session_state.processed_message_sids.add(msg_sid)
573
-                             st.success(f"Responded to {msg_sid} from {author_id}")
574
-                         else: st.error(f"Failed to send response for {msg_sid}")
575
-                     st.experimental_rerun()
576
 
577
  # --- Footer & Status ---
578
  st.sidebar.markdown("---")
579
  st.sidebar.info("Ensure all keys and SIDs are correctly configured. Primary API keys (Twilio SID/Token, GROQ Key) are loaded from secrets if available.")
580
  if st.session_state.get("app_started"):
581
-     st.sidebar.success(f"App RUNNING. WhatsApp Bot {'RUNNING' if st.session_state.get('bot_started') else 'STOPPED'}.")
582
  else:
583
-     st.sidebar.warning("App is STOPPED.")
 
9
  import numpy as np
10
  from twilio.rest import Client
11
  from groq import Groq
12
+ import re
13
+
14
  # --- Page Configuration ---
15
  st.set_page_config(page_title="RAG Customer Support Chatbot", layout="wide")
16
 
 
42
 
43
  # --- RAG Processing Utilities ---
44
  def load_json_data(file_path):
45
+ """Loads data from a JSON file."""
46
+ try:
47
+ with open(file_path, 'r', encoding='utf-8') as f:
48
+ data = json.load(f)
49
+ return data
50
+ except FileNotFoundError:
51
+ st.error(f"Error: JSON file not found at {file_path}")
52
+ return None
53
+ except json.JSONDecodeError:
54
+ st.error(f"Error: Could not decode JSON from {file_path}")
55
+ return None
56
+ except Exception as e:
57
+ st.error(f"An unexpected error occurred while loading {file_path}: {e}")
58
+ return None
59
 
60
  def load_pdf_data(file_path):
61
+ """Extracts text from a PDF file, page by page."""
62
+ try:
63
+ with open(file_path, 'rb') as f:
64
+ reader = PyPDF2.PdfReader(f)
65
+ text_pages = []
66
+ for page_num in range(len(reader.pages)):
67
+ page = reader.pages[page_num]
68
+ text_pages.append(page.extract_text() or "")
69
+ return text_pages
70
+ except FileNotFoundError:
71
+ st.error(f"Error: PDF file not found at {file_path}")
72
+ return []
73
+ except Exception as e:
74
+ st.error(f"An error occurred while processing PDF {file_path}: {e}")
75
+ return []
76
 
77
  def chunk_text(text_pages, chunk_size=1000, chunk_overlap=200):
78
+ """Chunks text from PDF pages into smaller, overlapping pieces."""
79
+ full_text = "\n".join(text_pages)
80
+ if not full_text.strip():
81
+ return []
82
+ chunks = []
83
+ start = 0
84
+ while start < len(full_text):
85
+ end = start + chunk_size
86
+ chunks.append(full_text[start:end])
87
+ if end >= len(full_text):
88
+ break
89
+ start += (chunk_size - chunk_overlap)
90
+ if start >= len(full_text):
91
+ break
92
+ return [chunk for chunk in chunks if chunk.strip()]
93
 
94
  @st.cache_resource(show_spinner="Initializing embedding model...")
95
  def initialize_embedding_model(model_name=DEFAULT_EMBEDDING_MODEL_NAME):
96
+ """Initializes and returns a SentenceTransformer model."""
97
+ try:
98
+ model = SentenceTransformer(model_name)
99
+ return model
100
+ except Exception as e:
101
+ st.error(f"Error initializing embedding model '{model_name}': {e}")
102
+ return None
103
 
104
  @st.cache_resource(show_spinner="Building FAISS index for PDF documents...")
105
  def create_faiss_index(_text_chunks, _embedding_model):
106
+ """Creates a FAISS index from text chunks and an embedding model."""
107
+ if not _text_chunks or _embedding_model is None:
108
+ st.warning("Cannot create FAISS index: No text chunks or embedding model available.")
109
+ return None, []
110
+ try:
111
+ valid_chunks = [str(chunk) for chunk in _text_chunks if chunk and isinstance(chunk, str) and chunk.strip()]
112
+ if not valid_chunks:
113
+ st.warning("No valid text chunks to embed for FAISS index.")
114
+ return None, []
115
+ embeddings = _embedding_model.encode(valid_chunks, convert_to_tensor=False)
116
+ if embeddings.ndim == 1:
117
+ embeddings = embeddings.reshape(1, -1)
118
+ if embeddings.shape[0] == 0:
119
+ st.warning("No embeddings were generated for FAISS index.")
120
+ return None, []
121
+ dimension = embeddings.shape[1]
122
+ index = faiss.IndexFlatL2(dimension)
123
+ index.add(np.array(embeddings, dtype=np.float32))
124
+ return index, valid_chunks
125
+ except Exception as e:
126
+ st.error(f"Error creating FAISS index: {e}")
127
+ return None, []
128
 
129
  def search_faiss_index(index, query_text, embedding_model, indexed_chunks, k=3):
130
+ """Searches the FAISS index and returns top_k relevant chunk texts."""
131
+ if index is None or embedding_model is None or not query_text:
132
+ return []
133
+ try:
134
+ query_embedding = embedding_model.encode([query_text], convert_to_tensor=False)
135
+ if query_embedding.ndim == 1:
136
+ query_embedding = query_embedding.reshape(1, -1)
137
+ distances, indices = index.search(np.array(query_embedding, dtype=np.float32), k)
138
+ results = []
139
+ for i in range(len(indices[0])):
140
+ idx = indices[0][i]
141
+ if 0 <= idx < len(indexed_chunks):
142
+ results.append(indexed_chunks[idx])
143
+ return results
144
+ except Exception as e:
145
+ st.error(f"Error searching FAISS index: {e}")
146
+ return []
147
 
148
  def get_order_details(order_id, customer_orders_data):
149
+ """Retrieves order details for a given order_id."""
150
+ if not customer_orders_data:
151
+ return "Customer order data is not loaded."
152
+ for order in customer_orders_data:
153
+ if order.get("order_id") == order_id:
154
+ return json.dumps(order, indent=2)
155
+ return f"No order found with ID: {order_id}."
156
 
157
  def get_product_info(query, products_data):
158
+ """Retrieves product information based on a query."""
159
+ if not products_data:
160
+ return "Product data is not loaded."
161
+ query_lower = query.lower()
162
+ found_products = []
163
+ for product in products_data:
164
+ if query_lower in (product.get("name", "").lower()) or \
165
+ query_lower in (product.get("description", "").lower()) or \
166
+ query_lower == (product.get("product_id", "").lower()):
167
+ found_products.append(product)
168
+ if found_products:
169
+ return json.dumps(found_products, indent=2)
170
+ return f"No product information found matching your query: '{query}'."
171
 
172
  # --- LLM Operations ---
173
  @st.cache_data(show_spinner="Generating response with LLaMA3...")
174
  def generate_response_groq(_groq_client, query, context, model="llama3-8b-8192"):
175
+ """Generates a response using GROQ LLaMA3 API."""
176
+ if not _groq_client:
177
+ return "GROQ client not initialized. Please check API key."
178
+ if not query:
179
+ return "Query is empty."
180
+ prompt = f"""You are a helpful customer support assistant.
181
  Use the following context to answer the user's question.
182
  If the context doesn't contain the answer, state that you don't have enough information.
183
  Do not make up information. Be concise and polite.
 
189
 
190
  Assistant Answer:
191
  """
192
+ try:
193
+ chat_completion = _groq_client.chat.completions.create(
194
+ messages=[
195
+ {"role": "system", "content": "You are a helpful customer support assistant."},
196
+ {"role": "user", "content": prompt}
197
+ ],
198
+ model=model, temperature=0.7, max_tokens=1024, top_p=1
199
+ )
200
+ response = chat_completion.choices[0].message.content
201
+ return response
202
+ except Exception as e:
203
+ st.error(f"Error calling GROQ API: {e}")
204
+ return "Sorry, I encountered an error while trying to generate a response."
205
 
206
  def initialize_groq_client(api_key_val):
207
+ """Initializes the GROQ client."""
208
+ if not api_key_val: # Changed parameter name to avoid conflict
209
+ st.warning("GROQ API Key is missing.")
210
+ return None
211
+ try:
212
+ client = Groq(api_key=api_key_val)
213
+ return client
214
+ except Exception as e:
215
+ st.error(f"Failed to initialize GROQ client: {e}")
216
+ return None
217
 
218
  # --- Twilio Operations ---
219
  def initialize_twilio_client(acc_sid, auth_tkn): # Changed parameter names
220
+ """Initializes the Twilio client."""
221
+ if not acc_sid or not auth_tkn:
222
+ st.warning("Twilio Account SID or Auth Token is missing.")
223
+ return None
224
+ try:
225
+ client = Client(acc_sid, auth_tkn)
226
+ return client
227
+ except Exception as e:
228
+ st.error(f"Failed to initialize Twilio client: {e}")
229
+ return None
230
 
231
  def get_new_whatsapp_messages(twilio_client, conversation_service_sid_val, bot_start_time_utc, # Renamed
232
+ processed_message_sids, bot_whatsapp_identity_val): # Renamed
233
+ """Fetches new, unanswered WhatsApp messages from Twilio Conversations."""
234
+ if not twilio_client:
235
+ st.warning("Twilio client not initialized.")
236
+ return []
237
+ if not conversation_service_sid_val:
238
+ st.warning("Twilio Conversation Service SID not provided.")
239
+ return []
240
+
241
+ new_messages_to_process = []
242
+ try:
243
+ conversations = twilio_client.conversations.v1 \
244
+ .services(conversation_service_sid_val) \
245
+ .conversations \
246
+ .list(limit=50)
247
+
248
+ for conv in conversations:
249
+ if conv.date_updated and conv.date_updated > bot_start_time_utc:
250
+ messages = twilio_client.conversations.v1 \
251
+ .services(conversation_service_sid_val) \
252
+ .conversations(conv.sid) \
253
+ .messages \
254
+ .list(order='desc', limit=10)
255
+
256
+ for msg in messages:
257
+ if msg.sid in processed_message_sids:
258
+ continue
259
+ if msg.author and msg.author.lower() != bot_whatsapp_identity_val.lower() and \
260
+ msg.date_created and msg.date_created > bot_start_time_utc:
261
+ new_messages_to_process.append({
262
+ "conversation_sid": conv.sid, "message_sid": msg.sid,
263
+ "author_identity": msg.author, "message_body": msg.body,
264
+ "timestamp_utc": msg.date_created
265
+ })
266
+ break
267
+ except Exception as e:
268
+ st.error(f"Error fetching Twilio messages: {e}")
269
+ return sorted(new_messages_to_process, key=lambda m: m['timestamp_utc'])
270
 
271
  def send_whatsapp_message(twilio_client, conversation_service_sid_val, conversation_sid, message_body, bot_identity_val): # Renamed
272
+ """Sends a message to a Twilio Conversation from the bot's identity."""
273
+ if not twilio_client:
274
+ st.error("Twilio client not initialized for sending message.")
275
+ return False
276
+ if not conversation_service_sid_val:
277
+ st.error("Twilio Conversation Service SID not provided for sending message.")
278
+ return False
279
+ if not bot_identity_val:
280
+ st.error("Bot identity not provided for sending message.")
281
+ return False
282
+ try:
283
+ twilio_client.conversations.v1 \
284
+ .services(conversation_service_sid_val) \
285
+ .conversations(conversation_sid) \
286
+ .messages \
287
+ .create(author=bot_identity_val, body=message_body)
288
+ st.success(f"Sent reply to conversation {conversation_sid}")
289
+ return True
290
+ except Exception as e:
291
+ st.error(f"Error sending Twilio message to {conversation_sid}: {e}")
292
+ return False
293
 
294
  # --- Main Application Logic & UI ---
295
  st.title("🤖 RAG-Based Customer Support Chatbot")
 
300
 
301
  # Use APP_ prefixed variables for values from secrets, then allow manual input if not found
302
  if APP_TWILIO_ACCOUNT_SID:
303
+ st.sidebar.text_input("Twilio Account SID (from Secrets)", value="********" + APP_TWILIO_ACCOUNT_SID[-4:] if len(APP_TWILIO_ACCOUNT_SID) > 4 else "********", disabled=True)
304
+ twilio_account_sid_to_use = APP_TWILIO_ACCOUNT_SID
305
  else:
306
+ st.sidebar.warning("Secret 'TWILIO_SID' not found.")
307
+ twilio_account_sid_to_use = st.sidebar.text_input("Twilio Account SID (Enter Manually)", value=DEFAULT_TWILIO_ACCOUNT_SID_FALLBACK, type="password")
308
 
309
  if APP_TWILIO_AUTH_TOKEN:
310
+ st.sidebar.text_input("Twilio Auth Token (from Secrets)", value="********", disabled=True)
311
+ twilio_auth_token_to_use = APP_TWILIO_AUTH_TOKEN
312
  else:
313
+ st.sidebar.warning("Secret 'TWILIO_TOKEN' not found.")
314
+ twilio_auth_token_to_use = st.sidebar.text_input("Twilio Auth Token (Enter Manually)", value=DEFAULT_TWILIO_AUTH_TOKEN_FALLBACK, type="password")
315
 
316
  if APP_GROQ_API_KEY:
317
+ st.sidebar.text_input("GROQ API Key (from Secrets)", value="gsk_********" + APP_GROQ_API_KEY[-4:] if len(APP_GROQ_API_KEY) > 8 else "********", disabled=True)
318
+ groq_api_key_to_use = APP_GROQ_API_KEY
319
  else:
320
+ st.sidebar.warning("Secret 'GROQ_API_KEY' not found.")
321
+ groq_api_key_to_use = st.sidebar.text_input("GROQ API Key (Enter Manually)", value=DEFAULT_GROQ_API_KEY_FALLBACK, type="password")
322
 
323
  # For other configurations that can be overridden if secrets not found or for user preference
324
  twilio_conversation_service_sid_to_use = st.sidebar.text_input(
325
+ "Twilio Conversation Service SID (IS...)",
326
+ value=APP_TWILIO_CONVERSATION_SERVICE_SID_SECRET or DEFAULT_TWILIO_CONVERSATION_SERVICE_SID,
327
+ type="password",
328
+ help="The SID of your Twilio Conversations Service. Can be set by 'TWILIO_CONVERSATION_SERVICE_SID' secret."
329
  )
330
  twilio_bot_whatsapp_identity_to_use = st.sidebar.text_input(
331
+ "Twilio Bot WhatsApp Identity",
332
+ value=APP_TWILIO_BOT_WHATSAPP_IDENTITY_SECRET or DEFAULT_TWILIO_BOT_WHATSAPP_IDENTITY,
333
+ help="e.g., 'whatsapp:+1234567890'. Can be set by 'TWILIO_BOT_WHATSAPP_IDENTITY' secret."
334
  )
335
  embedding_model_name_to_use = st.sidebar.text_input( # Renamed
336
+ "Embedding Model Name",
337
+ value=DEFAULT_EMBEDDING_MODEL_NAME
338
  )
339
  polling_interval_to_use = st.sidebar.number_input( # Renamed
340
+ "Twilio Polling Interval (seconds)",
341
+ min_value=10, max_value=300,
342
+ value=DEFAULT_POLLING_INTERVAL_S,
343
+ step=5
344
  )
345
 
346
  # --- Initialize Session State ---
 
354
 
355
  # --- Helper: Simple Intent Classifier ---
356
  def simple_intent_classifier(query):
357
+ query_lower = query.lower()
358
+ if any(k in query_lower for k in ["order", "status", "track", "delivery"]):
359
+ # More specific regex to find 'ORD' followed by digits (assuming order IDs are like ORD1001)
360
+ match = re.search(r'\b(ord\d{3,})\b', query_lower) # Matches 'ord' followed by at least 3 digits, as a whole word
361
+ if match:
362
+ return "ORDER_STATUS", match.group(1).upper() # Return intent and extracted ID
363
+ # Fallback if specific order ID not found but still an order-related query
364
+ return "ORDER_STATUS", None # Indicate order status intent but no specific ID found yet
365
+
366
+ if any(k in query_lower for k in ["product", "item", "buy", "price", "feature", "stock"]): return "PRODUCT_INFO", None
367
+ if any(k in query_lower for k in ["return", "policy", "refund", "exchange", "faq", "question", "how to", "support"]): return "GENERAL_POLICY_FAQ", None
368
+ return "UNKNOWN", None # Return intent and None for ID if unknown
369
 
370
  # --- Main Application Controls ---
371
  col1, col2, col3, col4 = st.columns(4)
372
  with col1:
373
+ if st.button("🚀 Start App", disabled=st.session_state.app_started, use_container_width=True):
374
+ if not groq_api_key_to_use: # Use the correct variable
375
+ st.error("GROQ API Key is required.")
376
+ else:
377
+ with st.spinner("Initializing RAG pipeline..."):
378
+ st.session_state.embedding_model = initialize_embedding_model(embedding_model_name_to_use) # Use correct var
379
+ st.session_state.customer_orders_data = load_json_data(CUSTOMER_ORDERS_FILE)
380
+ st.session_state.products_data = load_json_data(PRODUCTS_FILE)
381
+ policy_pdf_pages = load_pdf_data(POLICY_PDF_FILE)
382
+ faq_pdf_pages = load_pdf_data(FAQ_PDF_FILE)
383
+ all_pdf_text_pages = policy_pdf_pages + faq_pdf_pages
384
+ st.session_state.pdf_text_chunks_raw = chunk_text(all_pdf_text_pages)
385
+
386
+ if st.session_state.embedding_model and st.session_state.pdf_text_chunks_raw:
387
+ st.session_state.faiss_index_pdfs, st.session_state.indexed_pdf_chunks = \
388
+ create_faiss_index(st.session_state.pdf_text_chunks_raw, st.session_state.embedding_model)
389
+ else:
390
+ st.session_state.faiss_index_pdfs, st.session_state.indexed_pdf_chunks = None, []
391
+ st.warning("FAISS index for PDFs could not be created.")
392
+
393
+ st.session_state.groq_client = initialize_groq_client(groq_api_key_to_use) # Use correct var
394
+
395
+ if st.session_state.embedding_model and st.session_state.groq_client and \
396
+ st.session_state.customer_orders_data and st.session_state.products_data:
397
+ st.session_state.rag_pipeline_ready = True
398
+ st.session_state.app_started = True
399
+ st.success("RAG Application Started!")
400
+ st.rerun()
401
+ else:
402
+ st.error("Failed to initialize RAG pipeline. Check configurations and ensure all data files are present in 'docs/'.")
403
+ st.session_state.app_started = False
404
  with col2:
405
+ if st.button("🛑 Stop App", disabled=not st.session_state.app_started, use_container_width=True):
406
+ keys_to_reset = ["app_started", "bot_started", "rag_pipeline_ready", "embedding_model",
407
+ "customer_orders_data", "products_data", "pdf_text_chunks_raw",
408
+ "faiss_index_pdfs", "indexed_pdf_chunks", "groq_client", "twilio_client",
409
+ "bot_start_time_utc", "processed_message_sids", "manual_chat_history"]
410
+ for key in keys_to_reset:
411
+ if key in st.session_state: del st.session_state[key]
412
+ st.session_state.app_started = False
413
+ st.session_state.bot_started = False
414
+ st.session_state.rag_pipeline_ready = False
415
+ st.session_state.processed_message_sids = set()
416
+ st.session_state.manual_chat_history = []
417
+ st.success("Application Stopped.")
418
+ st.rerun()
419
  with col3:
420
+ if st.button("💬 Start WhatsApp Bot", disabled=not st.session_state.app_started or st.session_state.bot_started, use_container_width=True):
421
+ if not all([twilio_account_sid_to_use, twilio_auth_token_to_use, twilio_conversation_service_sid_to_use, twilio_bot_whatsapp_identity_to_use]): # Use correct vars
422
+ st.error("Twilio credentials, Service SID, and Bot Identity are required.")
423
+ else:
424
+ st.session_state.twilio_client = initialize_twilio_client(twilio_account_sid_to_use, twilio_auth_token_to_use) # Use correct vars
425
+ if st.session_state.twilio_client:
426
+ st.session_state.bot_started = True
427
+ st.session_state.bot_start_time_utc = datetime.now(timezone.utc)
428
+ st.session_state.processed_message_sids = set()
429
+ st.session_state.last_twilio_poll_time = time.time() - polling_interval_to_use -1 # Use correct var
430
+ st.success("WhatsApp Bot Started!")
431
+ st.rerun()
432
+ else:
433
+ st.error("Failed to initialize Twilio client.")
434
  with col4:
435
+ if st.button("🔕 Stop WhatsApp Bot", disabled=not st.session_state.bot_started, use_container_width=True):
436
+ st.session_state.bot_started = False
437
+ st.info("WhatsApp Bot Stopped.")
438
+ st.rerun()
439
  st.divider()
440
 
441
  # --- Manual Query Interface ---
442
  if st.session_state.get("app_started") and st.session_state.get("rag_pipeline_ready"):
443
+ st.subheader("💬 Manual Query")
444
+ for chat_entry in st.session_state.manual_chat_history:
445
+ with st.chat_message(chat_entry["role"]):
446
+ st.markdown(chat_entry["content"])
447
+ if "context" in chat_entry and chat_entry["context"]:
448
+ with st.expander("Retrieved Context"):
449
+ try:
450
+ # Attempt to parse as JSON only if it looks like a JSON string
451
+ if isinstance(chat_entry["context"], str) and (chat_entry["context"].strip().startswith('{') or chat_entry["context"].strip().startswith('[')):
452
+ st.json(json.loads(chat_entry["context"]))
453
+ else:
454
+ # Otherwise, display as plain text
455
+ st.text(str(chat_entry["context"]))
456
+ except (json.JSONDecodeError, TypeError):
457
+ # Fallback for any other parsing errors
458
+ st.text(str(chat_entry["context"]))
459
+
460
+ user_query_manual = st.chat_input("Ask a question:")
461
+ if user_query_manual:
462
+ st.session_state.manual_chat_history.append({"role": "user", "content": user_query_manual})
463
+ with st.chat_message("user"): st.markdown(user_query_manual)
464
+
465
+ with st.spinner("Thinking..."):
466
+ intent_result = simple_intent_classifier(user_query_manual) # Get both intent and potential_id
467
+ intent = intent_result[0]
468
+ potential_oid_from_intent = intent_result[1] # This is the extracted ID if any
469
+
470
+ context_for_llm, raw_context_data = "No specific context.", None
471
+
472
+ if intent == "ORDER_STATUS":
473
+ order_id_to_check = None
474
+ if potential_oid_from_intent:
475
+ order_id_to_check = potential_oid_from_intent
476
+ else:
477
+ # Fallback for edge cases, though the regex should catch most
478
+ words = user_query_manual.upper().split()
479
+ # This regex specifically looks for 'ORD' followed by digits
480
+ possible_match = next((w for w in words if re.match(r'ORD\d+', w)), None)
481
+ if possible_match:
482
+ order_id_to_check = possible_match
483
+
484
+
485
+ if order_id_to_check:
486
+ raw_context_data = get_order_details(order_id_to_check.upper(), st.session_state.customer_orders_data)
487
+ context_for_llm = f"Order Details: {raw_context_data}"
488
+ else:
489
+ context_for_llm = "Please provide a valid Order ID (e.g., ORD1234)."
490
+ raw_context_data = {"message": "Order ID needed."}
491
+ elif intent == "PRODUCT_INFO":
492
+ raw_context_data = get_product_info(user_query_manual, st.session_state.products_data)
493
+ context_for_llm = f"Product Information: {raw_context_data}"
494
+ elif intent == "GENERAL_POLICY_FAQ" or intent == "UNKNOWN":
495
+ # ... (rest of your existing logic for these intents) ...
496
+ if st.session_state.faiss_index_pdfs and st.session_state.embedding_model:
497
+ k_val = 2 if intent == "GENERAL_POLICY_FAQ" else 1
498
+ retrieved_chunks = search_faiss_index(st.session_state.faiss_index_pdfs, user_query_manual,
499
+ st.session_state.embedding_model, st.session_state.indexed_pdf_chunks, k=k_val)
500
+ if retrieved_chunks:
501
+ context_for_llm = "\n\n".join(retrieved_chunks)
502
+ raw_context_data = retrieved_chunks
503
+ else:
504
+ context_for_llm = "No specific policy/FAQ info found." if intent == "GENERAL_POLICY_FAQ" else "Could not find relevant info."
505
+ raw_context_data = {"message": "No relevant PDF chunks found."}
506
+ else:
507
+ context_for_llm = "Policy/FAQ documents unavailable."
508
+ raw_context_data = {"message": "PDF index not ready."}
509
+
510
+ llm_response = generate_response_groq(st.session_state.groq_client, user_query_manual, context_for_llm)
511
+ with st.chat_message("assistant"):
512
+ st.markdown(llm_response)
513
+ if raw_context_data:
514
+ with st.expander("Retrieved Context"):
515
+ try:
516
+ if isinstance(raw_context_data, str) and (raw_context_data.strip().startswith('{') or raw_context_data.strip().startswith('[')):
517
+ st.json(json.loads(raw_context_data))
518
+ else:
519
+ st.text(str(raw_context_data))
520
+ except (json.JSONDecodeError, TypeError):
521
+ st.text(str(raw_context_data))
522
+ st.session_state.manual_chat_history.append({"role": "assistant", "content": llm_response, "context": raw_context_data})
523
 
524
  # --- Twilio Bot Polling Logic ---
525
  if st.session_state.get("bot_started") and st.session_state.get("rag_pipeline_ready"):
526
+ current_time = time.time()
527
+ if (current_time - st.session_state.get("last_twilio_poll_time", 0)) > polling_interval_to_use: # Use correct var
528
+ st.session_state.last_twilio_poll_time = current_time
529
+ with st.spinner("Checking WhatsApp messages..."):
530
+ if not st.session_state.get("twilio_client") or not twilio_conversation_service_sid_to_use or not twilio_bot_whatsapp_identity_to_use: # Use correct vars
531
+ st.warning("Twilio client/config missing for polling.")
532
+ else:
533
+ new_messages = get_new_whatsapp_messages(st.session_state.twilio_client, twilio_conversation_service_sid_to_use,
534
+ st.session_state.bot_start_time_utc, st.session_state.processed_message_sids,
535
+ twilio_bot_whatsapp_identity_to_use) # Use correct vars
536
+ if new_messages:
537
+ st.info(f"Found {len(new_messages)} new WhatsApp message(s).")
538
+ for msg_data in new_messages:
539
+ user_query_whatsapp, conv_sid, msg_sid, author_id = msg_data["message_body"], msg_data["conversation_sid"], msg_data["message_sid"], msg_data["author_identity"]
540
+ st.write(f"Processing from {author_id} in {conv_sid}: '{user_query_whatsapp}'")
541
+
542
+ intent_result_whatsapp = simple_intent_classifier(user_query_whatsapp) # Use the updated classifier
543
+ intent_whatsapp = intent_result_whatsapp[0]
544
+ potential_oid_whatsapp = intent_result_whatsapp[1] # Extracted ID from intent classifier
545
+
546
+ context_whatsapp = "No specific context."
547
+ if intent_whatsapp == "ORDER_STATUS":
548
  order_id_to_check_whatsapp = None
549
  if potential_oid_whatsapp:
550
  order_id_to_check_whatsapp = potential_oid_whatsapp
 
555
  order_id_to_check_whatsapp = possible_match_whatsapp
556
 
557
  if order_id_to_check_whatsapp:
558
+ context_whatsapp = f"Order Details: {get_order_details(order_id_to_check_whatsapp.upper(), st.session_state.customer_orders_data)}"
559
  else:
560
  context_whatsapp = "Please provide a valid Order ID."
561
+ elif intent_whatsapp == "PRODUCT_INFO":
562
+ context_whatsapp = f"Product Info: {get_product_info(user_query_whatsapp, st.session_state.products_data)}"
563
+ elif intent_whatsapp == "GENERAL_POLICY_FAQ" or intent_whatsapp == "UNKNOWN":
564
+ if st.session_state.faiss_index_pdfs and st.session_state.embedding_model:
565
+ k_val = 2 if intent_whatsapp == "GENERAL_POLICY_FAQ" else 1
566
+ chunks = search_faiss_index(st.session_state.faiss_index_pdfs, user_query_whatsapp, st.session_state.embedding_model, st.session_state.indexed_pdf_chunks, k=k_val)
567
+ context_whatsapp = "\n\n".join(chunks) if chunks else ("No policy/FAQ info." if intent_whatsapp == "GENERAL_POLICY_FAQ" else "No relevant info.")
568
+ else: context_whatsapp = "Policy/FAQ docs unavailable."
569
+
570
+ response_whatsapp = generate_response_groq(st.session_state.groq_client, user_query_whatsapp, context_whatsapp)
571
+ if send_whatsapp_message(st.session_state.twilio_client, twilio_conversation_service_sid_to_use, conv_sid, response_whatsapp, twilio_bot_whatsapp_identity_to_use): # Use correct vars
572
+ st.session_state.processed_message_sids.add(msg_sid)
573
+ st.success(f"Responded to {msg_sid} from {author_id}")
574
+ else: st.error(f"Failed to send response for {msg_sid}")
575
+ st.experimental_rerun()
576
 
577
  # --- Footer & Status ---
578
  st.sidebar.markdown("---")
579
  st.sidebar.info("Ensure all keys and SIDs are correctly configured. Primary API keys (Twilio SID/Token, GROQ Key) are loaded from secrets if available.")
580
  if st.session_state.get("app_started"):
581
+ st.sidebar.success(f"App RUNNING. WhatsApp Bot {'RUNNING' if st.session_state.get('bot_started') else 'STOPPED'}.")
582
  else:
583
+ st.sidebar.warning("App is STOPPED.")