AIToyBot

Sleeping

App Files Files Community

masadonline commited on May 26

Commit

6d5efc5

verified ·

1 Parent(s): 30c292a

Update app.py

Browse files

Files changed (1) hide show

app.py +234 -123

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ DEFAULT_TWILIO_AUTH_TOKEN_FALLBACK = "" # Fallback if secret "TWILIO_TOKEN" is n
 DEFAULT_GROQ_API_KEY_FALLBACK = "" # Fallback if secret "GROQ_API_KEY" is not found
 DEFAULT_TWILIO_CONVERSATION_SERVICE_SID = ""
-DEFAULT_TWILIO_BOT_WHATSAPP_IDENTITY = st.secrets.get("TWILIO_PHONE_NUMBER")#"whatsapp:+14155238886" # Twilio Sandbox default
 DEFAULT_EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 DEFAULT_POLLING_INTERVAL_S = 30
 DOCS_FOLDER = "docs/"
@@ -66,7 +66,7 @@ def load_pdf_data(file_path):
             for page_num in range(len(reader.pages)):
                 page = reader.pages[page_num]
                 text_pages.append(page.extract_text() or "")
-        return text_pages
     except FileNotFoundError:
         st.error(f"Error: PDF file not found at {file_path}")
         return []
@@ -87,7 +87,7 @@ def chunk_text(text_pages, chunk_size=1000, chunk_overlap=200):
         if end >= len(full_text):
             break
         start += (chunk_size - chunk_overlap)
-        if start >= len(full_text):
             break
     return [chunk for chunk in chunks if chunk.strip()]
@@ -113,7 +113,7 @@ def create_faiss_index(_text_chunks, _embedding_model):
             st.warning("No valid text chunks to embed for FAISS index.")
             return None, []
         embeddings = _embedding_model.encode(valid_chunks, convert_to_tensor=False)
-        if embeddings.ndim == 1:
             embeddings = embeddings.reshape(1, -1)
         if embeddings.shape[0] == 0:
             st.warning("No embeddings were generated for FAISS index.")
@@ -132,8 +132,8 @@ def search_faiss_index(index, query_text, embedding_model, indexed_chunks, k=3):
         return []
     try:
         query_embedding = embedding_model.encode([query_text], convert_to_tensor=False)
-        if query_embedding.ndim == 1:
-            query_embedding = query_embedding.reshape(1, -1)
         distances, indices = index.search(np.array(query_embedding, dtype=np.float32), k)
         results = []
         for i in range(len(indices[0])):
@@ -150,37 +150,66 @@ def get_order_details(order_id, customer_orders_data):
     if not customer_orders_data:
         return "Customer order data is not loaded."
     for order in customer_orders_data:
-        if order.get("order_id") == order_id:
             return json.dumps(order, indent=2)
     return f"No order found with ID: {order_id}."
 def get_product_info(query, products_data):
-    """Retrieves product information based on a query."""
     if not products_data:
         return "Product data is not loaded."
     query_lower = query.lower()
     found_products = []
     for product in products_data:
-        # Changed keys to match Products.json
-        if query_lower in (product.get("Product_Name", "").lower()) or \
-           query_lower in (product.get("Product_Type", "").lower()) or \
-           query_lower == (product.get("Product_ID", "").lower()):
             found_products.append(product)
     if found_products:
         return json.dumps(found_products, indent=2)
     return f"No product information found matching your query: '{query}'."
 # --- LLM Operations ---
-@st.cache_data(show_spinner="Generating response with LLaMA3...")
 def generate_response_groq(_groq_client, query, context, model="llama3-8b-8192"):
     """Generates a response using GROQ LLaMA3 API."""
     if not _groq_client:
         return "GROQ client not initialized. Please check API key."
     if not query:
         return "Query is empty."
     prompt = f"""You are a helpful customer support assistant.
 Use the following context to answer the user's question.
-If the context doesn't contain the answer, state that you don't have enough information.
 Do not make up information. Be concise and polite.
 Context:
@@ -206,7 +235,7 @@ Assistant Answer:
 def initialize_groq_client(api_key_val):
     """Initializes the GROQ client."""
-    if not api_key_val: # Changed parameter name to avoid conflict
         st.warning("GROQ API Key is missing.")
         return None
     try:
@@ -217,7 +246,7 @@ def initialize_groq_client(api_key_val):
         return None
 # --- Twilio Operations ---
-def initialize_twilio_client(acc_sid, auth_tkn): # Changed parameter names
     """Initializes the Twilio client."""
     if not acc_sid or not auth_tkn:
         st.warning("Twilio Account SID or Auth Token is missing.")
@@ -229,8 +258,8 @@ def initialize_twilio_client(acc_sid, auth_tkn): # Changed parameter names
         st.error(f"Failed to initialize Twilio client: {e}")
         return None
-def get_new_whatsapp_messages(twilio_client, conversation_service_sid_val, bot_start_time_utc, # Renamed
-                               processed_message_sids, bot_whatsapp_identity_val): # Renamed
     """Fetches new, unanswered WhatsApp messages from Twilio Conversations."""
     if not twilio_client:
         st.warning("Twilio client not initialized.")
@@ -238,25 +267,34 @@ def get_new_whatsapp_messages(twilio_client, conversation_service_sid_val, bot_s
     if not conversation_service_sid_val:
         st.warning("Twilio Conversation Service SID not provided.")
         return []
     new_messages_to_process = []
     try:
         conversations = twilio_client.conversations.v1 \
             .services(conversation_service_sid_val) \
             .conversations \
-            .list(limit=50)
         for conv in conversations:
             if conv.date_updated and conv.date_updated > bot_start_time_utc:
                 messages = twilio_client.conversations.v1 \
                     .services(conversation_service_sid_val) \
                     .conversations(conv.sid) \
                     .messages \
-                    .list(order='desc', limit=10)
                 for msg in messages:
                     if msg.sid in processed_message_sids:
-                        continue
                     if msg.author and msg.author.lower() != bot_whatsapp_identity_val.lower() and \
                        msg.date_created and msg.date_created > bot_start_time_utc:
                         new_messages_to_process.append({
@@ -264,12 +302,15 @@ def get_new_whatsapp_messages(twilio_client, conversation_service_sid_val, bot_s
                             "author_identity": msg.author, "message_body": msg.body,
                             "timestamp_utc": msg.date_created
                         })
-                        break
     except Exception as e:
         st.error(f"Error fetching Twilio messages: {e}")
-    return sorted(new_messages_to_process, key=lambda m: m['timestamp_utc'])
-def send_whatsapp_message(twilio_client, conversation_service_sid_val, conversation_sid, message_body, bot_identity_val): # Renamed
     """Sends a message to a Twilio Conversation from the bot's identity."""
     if not twilio_client:
         st.error("Twilio client not initialized for sending message.")
@@ -304,14 +345,14 @@ if APP_TWILIO_ACCOUNT_SID:
     st.sidebar.text_input("Twilio Account SID (from Secrets)", value="********" + APP_TWILIO_ACCOUNT_SID[-4:] if len(APP_TWILIO_ACCOUNT_SID) > 4 else "********", disabled=True)
     twilio_account_sid_to_use = APP_TWILIO_ACCOUNT_SID
 else:
-    st.sidebar.warning("Secret 'TWILIO_SID' not found.")
     twilio_account_sid_to_use = st.sidebar.text_input("Twilio Account SID (Enter Manually)", value=DEFAULT_TWILIO_ACCOUNT_SID_FALLBACK, type="password")
 if APP_TWILIO_AUTH_TOKEN:
     st.sidebar.text_input("Twilio Auth Token (from Secrets)", value="********", disabled=True)
     twilio_auth_token_to_use = APP_TWILIO_AUTH_TOKEN
 else:
-    st.sidebar.warning("Secret 'TWILIO_TOKEN' not found.")
     twilio_auth_token_to_use = st.sidebar.text_input("Twilio Auth Token (Enter Manually)", value=DEFAULT_TWILIO_AUTH_TOKEN_FALLBACK, type="password")
 if APP_GROQ_API_KEY:
@@ -325,7 +366,7 @@ else:
 twilio_conversation_service_sid_to_use = st.sidebar.text_input(
     "Twilio Conversation Service SID (IS...)",
     value=APP_TWILIO_CONVERSATION_SERVICE_SID_SECRET or DEFAULT_TWILIO_CONVERSATION_SERVICE_SID,
-    type="password",
     help="The SID of your Twilio Conversations Service. Can be set by 'TWILIO_CONVERSATION_SERVICE_SID' secret."
 )
 twilio_bot_whatsapp_identity_to_use = st.sidebar.text_input(
@@ -333,11 +374,11 @@ twilio_bot_whatsapp_identity_to_use = st.sidebar.text_input(
     value=APP_TWILIO_BOT_WHATSAPP_IDENTITY_SECRET or DEFAULT_TWILIO_BOT_WHATSAPP_IDENTITY,
     help="e.g., 'whatsapp:+1234567890'. Can be set by 'TWILIO_BOT_WHATSAPP_IDENTITY' secret."
 )
-embedding_model_name_to_use = st.sidebar.text_input( # Renamed
     "Embedding Model Name",
     value=DEFAULT_EMBEDDING_MODEL_NAME
 )
-polling_interval_to_use = st.sidebar.number_input( # Renamed
     "Twilio Polling Interval (seconds)",
     min_value=10, max_value=300,
     value=DEFAULT_POLLING_INTERVAL_S,
@@ -356,27 +397,40 @@ if "manual_chat_history" not in st.session_state: st.session_state.manual_chat_h
 # --- Helper: Simple Intent Classifier ---
 def simple_intent_classifier(query):
     query_lower = query.lower()
-    if any(k in query_lower for k in ["order", "status", "track", "delivery"]):
-        # More specific regex to find 'ORD' followed by digits (assuming order IDs are like ORD1001)
-        match = re.search(r'\b(ord\d{3,})\b', query_lower) # Matches 'ord' followed by at least 3 digits, as a whole word
-        if match:
-            return "ORDER_STATUS", match.group(1).upper() # Return intent and extracted ID
-        # Fallback if specific order ID not found but still an order-related query
-        return "ORDER_STATUS", None # Indicate order status intent but no specific ID found yet
-    if any(k in query_lower for k in ["product", "item", "buy", "price", "feature", "stock"]): return "PRODUCT_INFO", None
-    if any(k in query_lower for k in ["return", "policy", "refund", "exchange", "faq", "question", "how to", "support"]): return "GENERAL_POLICY_FAQ", None
-    return "UNKNOWN", None # Return intent and None for ID if unknown
 # --- Main Application Controls ---
 col1, col2, col3, col4 = st.columns(4)
 with col1:
     if st.button("🚀 Start App", disabled=st.session_state.app_started, use_container_width=True):
-        if not groq_api_key_to_use: # Use the correct variable
             st.error("GROQ API Key is required.")
         else:
             with st.spinner("Initializing RAG pipeline..."):
-                st.session_state.embedding_model = initialize_embedding_model(embedding_model_name_to_use) # Use correct var
                 st.session_state.customer_orders_data = load_json_data(CUSTOMER_ORDERS_FILE)
                 st.session_state.products_data = load_json_data(PRODUCTS_FILE)
                 policy_pdf_pages = load_pdf_data(POLICY_PDF_FILE)
@@ -389,18 +443,28 @@ with col1:
                         create_faiss_index(st.session_state.pdf_text_chunks_raw, st.session_state.embedding_model)
                 else:
                     st.session_state.faiss_index_pdfs, st.session_state.indexed_pdf_chunks = None, []
-                    st.warning("FAISS index for PDFs could not be created.")
-                st.session_state.groq_client = initialize_groq_client(groq_api_key_to_use) # Use correct var
-                if st.session_state.embedding_model and st.session_state.groq_client and \
-                   st.session_state.customer_orders_data and st.session_state.products_data:
                     st.session_state.rag_pipeline_ready = True
                     st.session_state.app_started = True
                     st.success("RAG Application Started!")
                     st.rerun()
                 else:
-                    st.error("Failed to initialize RAG pipeline. Check configurations and ensure all data files are present in 'docs/'.")
                     st.session_state.app_started = False
 with col2:
     if st.button("🛑 Stop App", disabled=not st.session_state.app_started, use_container_width=True):
@@ -410,52 +474,61 @@ with col2:
                          "bot_start_time_utc", "processed_message_sids", "manual_chat_history"]
         for key in keys_to_reset:
             if key in st.session_state: del st.session_state[key]
         st.session_state.app_started = False
         st.session_state.bot_started = False
         st.session_state.rag_pipeline_ready = False
         st.session_state.processed_message_sids = set()
         st.session_state.manual_chat_history = []
         st.success("Application Stopped.")
         st.rerun()
 with col3:
     if st.button("💬 Start WhatsApp Bot", disabled=not st.session_state.app_started or st.session_state.bot_started, use_container_width=True):
-        if not all([twilio_account_sid_to_use, twilio_auth_token_to_use, twilio_conversation_service_sid_to_use, twilio_bot_whatsapp_identity_to_use]): # Use correct vars
-            st.error("Twilio credentials, Service SID, and Bot Identity are required.")
         else:
-            st.session_state.twilio_client = initialize_twilio_client(twilio_account_sid_to_use, twilio_auth_token_to_use) # Use correct vars
             if st.session_state.twilio_client:
                 st.session_state.bot_started = True
                 st.session_state.bot_start_time_utc = datetime.now(timezone.utc)
-                st.session_state.processed_message_sids = set()
-                st.session_state.last_twilio_poll_time = time.time() - polling_interval_to_use -1 # Use correct var
                 st.success("WhatsApp Bot Started!")
                 st.rerun()
             else:
-                st.error("Failed to initialize Twilio client.")
 with col4:
     if st.button("🔕 Stop WhatsApp Bot", disabled=not st.session_state.bot_started, use_container_width=True):
         st.session_state.bot_started = False
         st.info("WhatsApp Bot Stopped.")
         st.rerun()
 st.divider()
 # --- Manual Query Interface ---
 if st.session_state.get("app_started") and st.session_state.get("rag_pipeline_ready"):
     st.subheader("💬 Manual Query")
     for chat_entry in st.session_state.manual_chat_history:
         with st.chat_message(chat_entry["role"]):
             st.markdown(chat_entry["content"])
-            if "context" in chat_entry and chat_entry["context"]:
                 with st.expander("Retrieved Context"):
                     try:
                         # Attempt to parse as JSON only if it looks like a JSON string
-                        if isinstance(chat_entry["context"], str) and (chat_entry["context"].strip().startswith('{') or chat_entry["context"].strip().startswith('[')):
                             st.json(json.loads(chat_entry["context"]))
-                        else:
-                            # Otherwise, display as plain text
                             st.text(str(chat_entry["context"]))
-                    except (json.JSONDecodeError, TypeError):
-                        # Fallback for any other parsing errors
                         st.text(str(chat_entry["context"]))
     user_query_manual = st.chat_input("Ask a question:")
@@ -464,121 +537,159 @@ if st.session_state.get("app_started") and st.session_state.get("rag_pipeline_re
         with st.chat_message("user"): st.markdown(user_query_manual)
         with st.spinner("Thinking..."):
-            intent_result = simple_intent_classifier(user_query_manual) # Get both intent and potential_id
             intent = intent_result[0]
-            potential_oid_from_intent = intent_result[1] # This is the extracted ID if any
-            context_for_llm, raw_context_data = "No specific context.", None
             if intent == "ORDER_STATUS":
                 order_id_to_check = None
-                if potential_oid_from_intent:
                     order_id_to_check = potential_oid_from_intent
-                else:
-                    # Fallback for edge cases, though the regex should catch most
-                    words = user_query_manual.upper().split()
-                    # This regex specifically looks for 'ORD' followed by digits
-                    possible_match = next((w for w in words if re.match(r'ORD\d+', w)), None)
-                    if possible_match:
-                        order_id_to_check = possible_match
                 if order_id_to_check:
-                    raw_context_data = get_order_details(order_id_to_check.upper(), st.session_state.customer_orders_data)
-                    context_for_llm = f"Order Details: {raw_context_data}"
                 else:
-                    context_for_llm = "Please provide a valid Order ID (e.g., ORD1234)."
-                    raw_context_data = {"message": "Order ID needed."}
             elif intent == "PRODUCT_INFO":
                 raw_context_data = get_product_info(user_query_manual, st.session_state.products_data)
-                context_for_llm = f"Product Information: {raw_context_data}"
-            elif intent == "GENERAL_POLICY_FAQ" or intent == "UNKNOWN":
-                # ... (rest of your existing logic for these intents) ...
-                if st.session_state.faiss_index_pdfs and st.session_state.embedding_model:
-                    k_val = 2 if intent == "GENERAL_POLICY_FAQ" else 1
                     retrieved_chunks = search_faiss_index(st.session_state.faiss_index_pdfs, user_query_manual,
                                                           st.session_state.embedding_model, st.session_state.indexed_pdf_chunks, k=k_val)
                     if retrieved_chunks:
-                        context_for_llm = "\n\n".join(retrieved_chunks)
-                        raw_context_data = retrieved_chunks
                     else:
-                        context_for_llm = "No specific policy/FAQ info found." if intent == "GENERAL_POLICY_FAQ" else "Could not find relevant info."
                         raw_context_data = {"message": "No relevant PDF chunks found."}
                 else:
-                    context_for_llm = "Policy/FAQ documents unavailable."
-                    raw_context_data = {"message": "PDF index not ready."}
             llm_response = generate_response_groq(st.session_state.groq_client, user_query_manual, context_for_llm)
             with st.chat_message("assistant"):
                 st.markdown(llm_response)
-                if raw_context_data:
-                    with st.expander("Retrieved Context"):
                         try:
-                            if isinstance(raw_context_data, str) and (raw_context_data.strip().startswith('{') or raw_context_data.strip().startswith('[')):
                                 st.json(json.loads(raw_context_data))
                             else:
                                 st.text(str(raw_context_data))
                         except (json.JSONDecodeError, TypeError):
                             st.text(str(raw_context_data))
             st.session_state.manual_chat_history.append({"role": "assistant", "content": llm_response, "context": raw_context_data})
 # --- Twilio Bot Polling Logic ---
 if st.session_state.get("bot_started") and st.session_state.get("rag_pipeline_ready"):
     current_time = time.time()
-    if (current_time - st.session_state.get("last_twilio_poll_time", 0)) > polling_interval_to_use: # Use correct var
         st.session_state.last_twilio_poll_time = current_time
-        with st.spinner("Checking WhatsApp messages..."):
-            if not st.session_state.get("twilio_client") or not twilio_conversation_service_sid_to_use or not twilio_bot_whatsapp_identity_to_use: # Use correct vars
-                st.warning("Twilio client/config missing for polling.")
-            else:
-                new_messages = get_new_whatsapp_messages(st.session_state.twilio_client, twilio_conversation_service_sid_to_use,
-                                                         st.session_state.bot_start_time_utc, st.session_state.processed_message_sids,
-                                                         twilio_bot_whatsapp_identity_to_use) # Use correct vars
                 if new_messages:
-                    st.info(f"Found {len(new_messages)} new WhatsApp message(s).")
                     for msg_data in new_messages:
-                        user_query_whatsapp, conv_sid, msg_sid, author_id = msg_data["message_body"], msg_data["conversation_sid"], msg_data["message_sid"], msg_data["author_identity"]
-                        st.write(f"Processing from {author_id} in {conv_sid}: '{user_query_whatsapp}'")
-                        intent_result_whatsapp = simple_intent_classifier(user_query_whatsapp) # Use the updated classifier
                         intent_whatsapp = intent_result_whatsapp[0]
-                        potential_oid_whatsapp = intent_result_whatsapp[1] # Extracted ID from intent classifier
-                        context_whatsapp = "No specific context."
                         if intent_whatsapp == "ORDER_STATUS":
                             order_id_to_check_whatsapp = None
                             if potential_oid_whatsapp:
                                 order_id_to_check_whatsapp = potential_oid_whatsapp
                             else:
-                                words_whatsapp = user_query_whatsapp.upper().split()
-                                possible_match_whatsapp = next((w for w in words_whatsapp if re.match(r'ORD\d+', w)), None)
-                                if possible_match_whatsapp:
-                                    order_id_to_check_whatsapp = possible_match_whatsapp
                             if order_id_to_check_whatsapp:
-                                context_whatsapp = f"Order Details: {get_order_details(order_id_to_check_whatsapp.upper(), st.session_state.customer_orders_data)}"
                             else:
-                                context_whatsapp = "Please provide a valid Order ID."
                         elif intent_whatsapp == "PRODUCT_INFO":
-                            context_whatsapp = f"Product Info: {get_product_info(user_query_whatsapp, st.session_state.products_data)}"
                         elif intent_whatsapp == "GENERAL_POLICY_FAQ" or intent_whatsapp == "UNKNOWN":
-                            if st.session_state.faiss_index_pdfs and st.session_state.embedding_model:
-                                k_val = 2 if intent_whatsapp == "GENERAL_POLICY_FAQ" else 1
-                                chunks = search_faiss_index(st.session_state.faiss_index_pdfs, user_query_whatsapp, st.session_state.embedding_model, st.session_state.indexed_pdf_chunks, k=k_val)
-                                context_whatsapp = "\n\n".join(chunks) if chunks else ("No policy/FAQ info." if intent_whatsapp == "GENERAL_POLICY_FAQ" else "No relevant info.")
-                            else: context_whatsapp = "Policy/FAQ docs unavailable."
-                        response_whatsapp = generate_response_groq(st.session_state.groq_client, user_query_whatsapp, context_whatsapp)
-                        if send_whatsapp_message(st.session_state.twilio_client, twilio_conversation_service_sid_to_use, conv_sid, response_whatsapp, twilio_bot_whatsapp_identity_to_use): # Use correct vars
                             st.session_state.processed_message_sids.add(msg_sid)
-                            st.success(f"Responded to {msg_sid} from {author_id}")
-                        else: st.error(f"Failed to send response for {msg_sid}")
-                    st.experimental_rerun()
 # --- Footer & Status ---
 st.sidebar.markdown("---")
 st.sidebar.info("Ensure all keys and SIDs are correctly configured. Primary API keys (Twilio SID/Token, GROQ Key) are loaded from secrets if available.")
 if st.session_state.get("app_started"):
-    st.sidebar.success(f"App RUNNING. WhatsApp Bot {'RUNNING' if st.session_state.get('bot_started') else 'STOPPED'}.")
 else:
-    st.sidebar.warning("App is STOPPED.")

 DEFAULT_GROQ_API_KEY_FALLBACK = "" # Fallback if secret "GROQ_API_KEY" is not found
 DEFAULT_TWILIO_CONVERSATION_SERVICE_SID = ""
+DEFAULT_TWILIO_BOT_WHATSAPP_IDENTITY = st.secrets.get("TWILIO_PHONE_NUMBER", "whatsapp:+14155238886") # Twilio Sandbox default
 DEFAULT_EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 DEFAULT_POLLING_INTERVAL_S = 30
 DOCS_FOLDER = "docs/"
             for page_num in range(len(reader.pages)):
                 page = reader.pages[page_num]
                 text_pages.append(page.extract_text() or "")
+            return text_pages
     except FileNotFoundError:
         st.error(f"Error: PDF file not found at {file_path}")
         return []
         if end >= len(full_text):
             break
         start += (chunk_size - chunk_overlap)
+        if start >= len(full_text): # Should be `start > len(full_text) - chunk_overlap` or similar to avoid empty last chunk
             break
     return [chunk for chunk in chunks if chunk.strip()]
             st.warning("No valid text chunks to embed for FAISS index.")
             return None, []
         embeddings = _embedding_model.encode(valid_chunks, convert_to_tensor=False)
+        if embeddings.ndim == 1: # Handle single chunk case
             embeddings = embeddings.reshape(1, -1)
         if embeddings.shape[0] == 0:
             st.warning("No embeddings were generated for FAISS index.")
         return []
     try:
         query_embedding = embedding_model.encode([query_text], convert_to_tensor=False)
+        if query_embedding.ndim == 1: # Handle single query embedding
+             query_embedding = query_embedding.reshape(1, -1)
         distances, indices = index.search(np.array(query_embedding, dtype=np.float32), k)
         results = []
         for i in range(len(indices[0])):
     if not customer_orders_data:
         return "Customer order data is not loaded."
     for order in customer_orders_data:
+        if order.get("order_id") == order_id: # Assuming order_id is stored as uppercase in JSON or matches case
             return json.dumps(order, indent=2)
     return f"No order found with ID: {order_id}."
 def get_product_info(query, products_data):
+    """Retrieves product information based on a query.
+    This function is updated to correctly find products by ID, name, or type within the query.
+    """
     if not products_data:
+        st.warning("Product data is not loaded or is empty in get_product_info.")
         return "Product data is not loaded."
     query_lower = query.lower()
     found_products = []
     for product in products_data:
+        if not isinstance(product, dict): # Skip if product entry is not a dictionary
+            continue
+        product_id_lower = str(product.get("Product_ID", "")).lower()
+        product_name_lower = str(product.get("Product_Name", "")).lower()
+        product_type_lower = str(product.get("Product_Type", "")).lower()
+        match = False
+        # 1. Check if the Product ID is mentioned in the query
+        if product_id_lower and product_id_lower in query_lower:
+            match = True
+        # 2. If no match by ID, check for Product Name
+        #    - If the query (e.g., "rattle") is a substring of the product name (e.g., "soft rattle set")
+        #    - OR if the product name (e.g., "soft rattle set") is a substring of the query (e.g., "info on soft rattle set")
+        if not match and product_name_lower:
+            if query_lower in product_name_lower or product_name_lower in query_lower:
+                match = True
+        # 3. If no match yet, check for Product Type similarly
+        if not match and product_type_lower:
+            if query_lower in product_type_lower or product_type_lower in query_lower:
+                match = True
+        if match:
             found_products.append(product)
     if found_products:
         return json.dumps(found_products, indent=2)
     return f"No product information found matching your query: '{query}'."
 # --- LLM Operations ---
+@st.cache_data(show_spinner="Generating response with LLaMA3...") # Consider disabling caching if context changes frequently or add more granular cache invalidation
 def generate_response_groq(_groq_client, query, context, model="llama3-8b-8192"):
     """Generates a response using GROQ LLaMA3 API."""
     if not _groq_client:
         return "GROQ client not initialized. Please check API key."
     if not query:
         return "Query is empty."
+    # Basic prompt, can be enhanced
     prompt = f"""You are a helpful customer support assistant.
 Use the following context to answer the user's question.
+If the context doesn't contain the answer, state that you don't have enough information or ask clarifying questions.
 Do not make up information. Be concise and polite.
 Context:
 def initialize_groq_client(api_key_val):
     """Initializes the GROQ client."""
+    if not api_key_val:
         st.warning("GROQ API Key is missing.")
         return None
     try:
         return None
 # --- Twilio Operations ---
+def initialize_twilio_client(acc_sid, auth_tkn):
     """Initializes the Twilio client."""
     if not acc_sid or not auth_tkn:
         st.warning("Twilio Account SID or Auth Token is missing.")
         st.error(f"Failed to initialize Twilio client: {e}")
         return None
+def get_new_whatsapp_messages(twilio_client, conversation_service_sid_val, bot_start_time_utc,
+                              processed_message_sids, bot_whatsapp_identity_val):
     """Fetches new, unanswered WhatsApp messages from Twilio Conversations."""
     if not twilio_client:
         st.warning("Twilio client not initialized.")
     if not conversation_service_sid_val:
         st.warning("Twilio Conversation Service SID not provided.")
         return []
+    if not bot_whatsapp_identity_val: # Added check
+        st.warning("Twilio Bot WhatsApp Identity not provided.")
+        return []
     new_messages_to_process = []
     try:
+        # Fetch conversations updated since the bot started or a reasonable window
         conversations = twilio_client.conversations.v1 \
             .services(conversation_service_sid_val) \
             .conversations \
+            .list(limit=50) # Consider filtering by date_updated if API supports
         for conv in conversations:
+            # Check if conversation was updated after bot start time
             if conv.date_updated and conv.date_updated > bot_start_time_utc:
+                # Fetch recent messages from this conversation
                 messages = twilio_client.conversations.v1 \
                     .services(conversation_service_sid_val) \
                     .conversations(conv.sid) \
                     .messages \
+                    .list(order='desc', limit=10) # Get latest messages first
                 for msg in messages:
                     if msg.sid in processed_message_sids:
+                        continue # Skip already processed messages
+                    # Check if message is from a user (not the bot) and is new
                     if msg.author and msg.author.lower() != bot_whatsapp_identity_val.lower() and \
                        msg.date_created and msg.date_created > bot_start_time_utc:
                         new_messages_to_process.append({
                             "author_identity": msg.author, "message_body": msg.body,
                             "timestamp_utc": msg.date_created
                         })
+                        # Assuming we only process the latest unread message per conversation polling cycle
+                        # If multiple new messages from the same user in one convo need processing, this break might be too early.
+                        # For simplicity, processing one latest unread message per conversation poll.
+                        break
     except Exception as e:
         st.error(f"Error fetching Twilio messages: {e}")
+    return sorted(new_messages_to_process, key=lambda m: m['timestamp_utc']) # Process in chronological order
+def send_whatsapp_message(twilio_client, conversation_service_sid_val, conversation_sid, message_body, bot_identity_val):
     """Sends a message to a Twilio Conversation from the bot's identity."""
     if not twilio_client:
         st.error("Twilio client not initialized for sending message.")
     st.sidebar.text_input("Twilio Account SID (from Secrets)", value="********" + APP_TWILIO_ACCOUNT_SID[-4:] if len(APP_TWILIO_ACCOUNT_SID) > 4 else "********", disabled=True)
     twilio_account_sid_to_use = APP_TWILIO_ACCOUNT_SID
 else:
+    st.sidebar.warning("Secret 'TWILIO_ACCOUNT_SID' not found.") # Corrected secret name from TWILIO_SID
     twilio_account_sid_to_use = st.sidebar.text_input("Twilio Account SID (Enter Manually)", value=DEFAULT_TWILIO_ACCOUNT_SID_FALLBACK, type="password")
 if APP_TWILIO_AUTH_TOKEN:
     st.sidebar.text_input("Twilio Auth Token (from Secrets)", value="********", disabled=True)
     twilio_auth_token_to_use = APP_TWILIO_AUTH_TOKEN
 else:
+    st.sidebar.warning("Secret 'TWILIO_AUTH_TOKEN' not found.") # Corrected secret name from TWILIO_TOKEN
     twilio_auth_token_to_use = st.sidebar.text_input("Twilio Auth Token (Enter Manually)", value=DEFAULT_TWILIO_AUTH_TOKEN_FALLBACK, type="password")
 if APP_GROQ_API_KEY:
 twilio_conversation_service_sid_to_use = st.sidebar.text_input(
     "Twilio Conversation Service SID (IS...)",
     value=APP_TWILIO_CONVERSATION_SERVICE_SID_SECRET or DEFAULT_TWILIO_CONVERSATION_SERVICE_SID,
+    type="password", # Keep as password if sensitive
     help="The SID of your Twilio Conversations Service. Can be set by 'TWILIO_CONVERSATION_SERVICE_SID' secret."
 )
 twilio_bot_whatsapp_identity_to_use = st.sidebar.text_input(
     value=APP_TWILIO_BOT_WHATSAPP_IDENTITY_SECRET or DEFAULT_TWILIO_BOT_WHATSAPP_IDENTITY,
     help="e.g., 'whatsapp:+1234567890'. Can be set by 'TWILIO_BOT_WHATSAPP_IDENTITY' secret."
 )
+embedding_model_name_to_use = st.sidebar.text_input(
     "Embedding Model Name",
     value=DEFAULT_EMBEDDING_MODEL_NAME
 )
+polling_interval_to_use = st.sidebar.number_input(
     "Twilio Polling Interval (seconds)",
     min_value=10, max_value=300,
     value=DEFAULT_POLLING_INTERVAL_S,
 # --- Helper: Simple Intent Classifier ---
 def simple_intent_classifier(query):
     query_lower = query.lower()
+    # Order status: look for 'order', 'status', 'track', 'delivery' AND an order ID pattern
+    order_keywords = ["order", "status", "track", "delivery"]
+    # Regex for 'ORD' followed by 3 or more digits (case insensitive for 'ord')
+    order_id_match = re.search(r'\b(ord\d{3,})\b', query_lower, re.IGNORECASE)
+    if any(k in query_lower for k in order_keywords):
+        if order_id_match:
+            return "ORDER_STATUS", order_id_match.group(1).upper() # Return intent and extracted ID
+        return "ORDER_STATUS", None # Order-related query but no specific ID found yet
+    # Product info: look for 'product', 'item', 'buy', 'price', 'feature', 'stock' OR a product ID pattern
+    product_keywords = ["product", "item", "buy", "price", "feature", "stock"]
+    # Regex for 'PRD' followed by 3 or more digits (case insensitive for 'prd')
+    # This is an example, adjust if your product IDs have a different format
+    product_id_match = re.search(r'\b(prd\d{3,})\b', query_lower, re.IGNORECASE)
+    if any(k in query_lower for k in product_keywords) or product_id_match:
+        # If a PRD ID is explicitly found, we can pass it, though get_product_info also searches the query
+        # For simplicity, product_info intent doesn't pass an ID here, get_product_info handles it.
+        return "PRODUCT_INFO", None
+    if any(k in query_lower for k in ["return", "policy", "refund", "exchange", "faq", "question", "how to", "support"]):
+        return "GENERAL_POLICY_FAQ", None
+    return "UNKNOWN", None
 # --- Main Application Controls ---
 col1, col2, col3, col4 = st.columns(4)
 with col1:
     if st.button("🚀 Start App", disabled=st.session_state.app_started, use_container_width=True):
+        if not groq_api_key_to_use:
             st.error("GROQ API Key is required.")
         else:
             with st.spinner("Initializing RAG pipeline..."):
+                st.session_state.embedding_model = initialize_embedding_model(embedding_model_name_to_use)
                 st.session_state.customer_orders_data = load_json_data(CUSTOMER_ORDERS_FILE)
                 st.session_state.products_data = load_json_data(PRODUCTS_FILE)
                 policy_pdf_pages = load_pdf_data(POLICY_PDF_FILE)
                         create_faiss_index(st.session_state.pdf_text_chunks_raw, st.session_state.embedding_model)
                 else:
                     st.session_state.faiss_index_pdfs, st.session_state.indexed_pdf_chunks = None, []
+                    st.warning("FAISS index for PDFs could not be created (model or chunks missing).")
+                st.session_state.groq_client = initialize_groq_client(groq_api_key_to_use)
+                # Check all critical components for RAG readiness
+                if st.session_state.embedding_model and \
+                   st.session_state.groq_client and \
+                   st.session_state.customer_orders_data is not None and \
+                   st.session_state.products_data is not None and \
+                   (st.session_state.faiss_index_pdfs is not None or not all_pdf_text_pages): # Index needed if PDFs exist
                     st.session_state.rag_pipeline_ready = True
                     st.session_state.app_started = True
                     st.success("RAG Application Started!")
                     st.rerun()
                 else:
+                    error_messages = []
+                    if not st.session_state.embedding_model: error_messages.append("Embedding model failed to initialize.")
+                    if not st.session_state.groq_client: error_messages.append("GROQ client failed to initialize.")
+                    if st.session_state.customer_orders_data is None: error_messages.append(f"CustomerOrders.json ({CUSTOMER_ORDERS_FILE}) failed to load.")
+                    if st.session_state.products_data is None: error_messages.append(f"Products.json ({PRODUCTS_FILE}) failed to load.")
+                    if all_pdf_text_pages and st.session_state.faiss_index_pdfs is None: error_messages.append("PDF FAISS index failed to create.")
+                    st.error("Failed to initialize RAG pipeline. Issues:\n- " + "\n- ".join(error_messages) + "\nCheck configurations and ensure all data files are present in 'docs/'.")
                     st.session_state.app_started = False
 with col2:
     if st.button("🛑 Stop App", disabled=not st.session_state.app_started, use_container_width=True):
                          "bot_start_time_utc", "processed_message_sids", "manual_chat_history"]
         for key in keys_to_reset:
             if key in st.session_state: del st.session_state[key]
+        # Explicitly reset to default states
         st.session_state.app_started = False
         st.session_state.bot_started = False
         st.session_state.rag_pipeline_ready = False
         st.session_state.processed_message_sids = set()
         st.session_state.manual_chat_history = []
         st.success("Application Stopped.")
+        # Clear cached resources if desired (or let Streamlit manage them)
+        # initialize_embedding_model.clear()
+        # create_faiss_index.clear()
+        # generate_response_groq.clear()
         st.rerun()
 with col3:
     if st.button("💬 Start WhatsApp Bot", disabled=not st.session_state.app_started or st.session_state.bot_started, use_container_width=True):
+        if not all([twilio_account_sid_to_use, twilio_auth_token_to_use, twilio_conversation_service_sid_to_use, twilio_bot_whatsapp_identity_to_use]):
+            st.error("Twilio Account SID, Auth Token, Conversation Service SID, and Bot WhatsApp Identity are all required.")
         else:
+            st.session_state.twilio_client = initialize_twilio_client(twilio_account_sid_to_use, twilio_auth_token_to_use)
             if st.session_state.twilio_client:
                 st.session_state.bot_started = True
                 st.session_state.bot_start_time_utc = datetime.now(timezone.utc)
+                st.session_state.processed_message_sids = set() # Reset processed messages on bot start
+                st.session_state.last_twilio_poll_time = time.time() - polling_interval_to_use - 1 # Ensure immediate first poll
                 st.success("WhatsApp Bot Started!")
                 st.rerun()
             else:
+                st.error("Failed to initialize Twilio client. WhatsApp Bot not started.")
 with col4:
     if st.button("🔕 Stop WhatsApp Bot", disabled=not st.session_state.bot_started, use_container_width=True):
         st.session_state.bot_started = False
         st.info("WhatsApp Bot Stopped.")
+        # Optionally clear twilio_client from session state if desired
+        # if "twilio_client" in st.session_state: del st.session_state.twilio_client
         st.rerun()
 st.divider()
 # --- Manual Query Interface ---
 if st.session_state.get("app_started") and st.session_state.get("rag_pipeline_ready"):
     st.subheader("💬 Manual Query")
+    # Display chat history
     for chat_entry in st.session_state.manual_chat_history:
         with st.chat_message(chat_entry["role"]):
             st.markdown(chat_entry["content"])
+            if "context" in chat_entry and chat_entry["context"]: # Check if context exists and is not None/empty
                 with st.expander("Retrieved Context"):
                     try:
                         # Attempt to parse as JSON only if it looks like a JSON string
+                        if isinstance(chat_entry["context"], str) and \
+                           (chat_entry["context"].strip().startswith('{') or chat_entry["context"].strip().startswith('[')):
                             st.json(json.loads(chat_entry["context"]))
+                        elif isinstance(chat_entry["context"], list): # Handle if context is already a list (e.g. PDF chunks)
+                             st.json(chat_entry["context"]) # Or st.text for list of strings
+                        else: # Otherwise, display as plain text
                             st.text(str(chat_entry["context"]))
+                    except (json.JSONDecodeError, TypeError): # Fallback for any other parsing errors
                         st.text(str(chat_entry["context"]))
     user_query_manual = st.chat_input("Ask a question:")
         with st.chat_message("user"): st.markdown(user_query_manual)
         with st.spinner("Thinking..."):
+            intent_result = simple_intent_classifier(user_query_manual)
             intent = intent_result[0]
+            potential_oid_from_intent = intent_result[1] # This is the extracted Order ID if any
+            context_for_llm, raw_context_data = "No specific context could be retrieved.", None # Default
             if intent == "ORDER_STATUS":
                 order_id_to_check = None
+                if potential_oid_from_intent: # ID from intent classifier (preferred)
                     order_id_to_check = potential_oid_from_intent
+                else: # Fallback: try to find any 'ORDxxx' in the query if intent classifier missed it (less likely with current regex)
+                    match_manual = re.search(r'\b(ord\d{3,})\b', user_query_manual.lower(), re.IGNORECASE)
+                    if match_manual:
+                        order_id_to_check = match_manual.group(1).upper()
                 if order_id_to_check:
+                    raw_context_data = get_order_details(order_id_to_check, st.session_state.customer_orders_data)
+                    context_for_llm = f"Order Details for {order_id_to_check}: {raw_context_data}"
                 else:
+                    context_for_llm = "To check an order status, please provide a valid Order ID (e.g., ORD123)."
+                    raw_context_data = {"message": "Order ID needed or not found in query."}
             elif intent == "PRODUCT_INFO":
                 raw_context_data = get_product_info(user_query_manual, st.session_state.products_data)
+                context_for_llm = f"Product Information related to '{user_query_manual}': {raw_context_data}"
+            elif intent == "GENERAL_POLICY_FAQ" or intent == "UNKNOWN": # Consolidate for PDF search
+                if st.session_state.faiss_index_pdfs and st.session_state.embedding_model and st.session_state.indexed_pdf_chunks:
+                    k_val = 3 if intent == "GENERAL_POLICY_FAQ" else 2 # Retrieve more for specific FAQ, less for UNKNOWN
                     retrieved_chunks = search_faiss_index(st.session_state.faiss_index_pdfs, user_query_manual,
                                                           st.session_state.embedding_model, st.session_state.indexed_pdf_chunks, k=k_val)
                     if retrieved_chunks:
+                        context_for_llm = "Relevant information from documents:\n\n" + "\n\n---\n\n".join(retrieved_chunks)
+                        raw_context_data = retrieved_chunks # Store the list of chunks
                     else:
+                        context_for_llm = "I couldn't find specific information in our policy or FAQ documents regarding your query."
                         raw_context_data = {"message": "No relevant PDF chunks found."}
                 else:
+                    context_for_llm = "Our policy and FAQ documents are currently unavailable for search."
+                    raw_context_data = {"message": "PDF index or embedding model not ready."}
             llm_response = generate_response_groq(st.session_state.groq_client, user_query_manual, context_for_llm)
             with st.chat_message("assistant"):
                 st.markdown(llm_response)
+                if raw_context_data: # Display context if it was retrieved
+                    with st.expander("Retrieved Context For Assistant"): # Changed label for clarity
                         try:
+                            if isinstance(raw_context_data, str) and \
+                               (raw_context_data.strip().startswith('{') or raw_context_data.strip().startswith('[')):
                                 st.json(json.loads(raw_context_data))
+                            elif isinstance(raw_context_data, list):
+                                st.json(raw_context_data) # Display list of strings (chunks) as JSON array
                             else:
                                 st.text(str(raw_context_data))
                         except (json.JSONDecodeError, TypeError):
                             st.text(str(raw_context_data))
             st.session_state.manual_chat_history.append({"role": "assistant", "content": llm_response, "context": raw_context_data})
+            st.rerun() # Rerun to update chat display immediately
 # --- Twilio Bot Polling Logic ---
 if st.session_state.get("bot_started") and st.session_state.get("rag_pipeline_ready"):
     current_time = time.time()
+    # Ensure last_twilio_poll_time is initialized
+    if "last_twilio_poll_time" not in st.session_state:
+        st.session_state.last_twilio_poll_time = current_time - polling_interval_to_use - 1
+    if (current_time - st.session_state.last_twilio_poll_time) > polling_interval_to_use:
         st.session_state.last_twilio_poll_time = current_time
+        # Check if Twilio client and necessary configs are available
+        if not st.session_state.get("twilio_client") or \
+           not twilio_conversation_service_sid_to_use or \
+           not twilio_bot_whatsapp_identity_to_use or \
+           not st.session_state.get("bot_start_time_utc"):
+            st.warning("Twilio client/config missing for polling. Ensure bot is started and SIDs are set.")
+        else:
+            with st.spinner(f"Checking WhatsApp messages (last poll: {datetime.fromtimestamp(st.session_state.last_twilio_poll_time).strftime('%H:%M:%S')})..."):
+                new_messages = get_new_whatsapp_messages(st.session_state.twilio_client,
+                                                         twilio_conversation_service_sid_to_use,
+                                                         st.session_state.bot_start_time_utc,
+                                                         st.session_state.processed_message_sids,
+                                                         twilio_bot_whatsapp_identity_to_use)
                 if new_messages:
+                    st.info(f"Found {len(new_messages)} new WhatsApp message(s) to process.")
                     for msg_data in new_messages:
+                        user_query_whatsapp, conv_sid, msg_sid, author_id = \
+                            msg_data["message_body"], msg_data["conversation_sid"], \
+                            msg_data["message_sid"], msg_data["author_identity"]
+                        st.write(f"Processing WhatsApp message from {author_id} in conversation {conv_sid}: '{user_query_whatsapp}' (SID: {msg_sid})")
+                        # --- (Identical RAG logic as manual query, adapted for WhatsApp context) ---
+                        intent_result_whatsapp = simple_intent_classifier(user_query_whatsapp)
                         intent_whatsapp = intent_result_whatsapp[0]
+                        potential_oid_whatsapp = intent_result_whatsapp[1]
+                        context_for_llm_whatsapp = "No specific context could be retrieved."
                         if intent_whatsapp == "ORDER_STATUS":
                             order_id_to_check_whatsapp = None
                             if potential_oid_whatsapp:
                                 order_id_to_check_whatsapp = potential_oid_whatsapp
                             else:
+                                match_whatsapp = re.search(r'\b(ord\d{3,})\b', user_query_whatsapp.lower(), re.IGNORECASE)
+                                if match_whatsapp:
+                                    order_id_to_check_whatsapp = match_whatsapp.group(1).upper()
                             if order_id_to_check_whatsapp:
+                                order_details_whatsapp = get_order_details(order_id_to_check_whatsapp, st.session_state.customer_orders_data)
+                                context_for_llm_whatsapp = f"Order Details for {order_id_to_check_whatsapp}: {order_details_whatsapp}"
                             else:
+                                context_for_llm_whatsapp = "To check an order status, please provide a valid Order ID (e.g., ORD123)."
                         elif intent_whatsapp == "PRODUCT_INFO":
+                            product_info_whatsapp = get_product_info(user_query_whatsapp, st.session_state.products_data)
+                            context_for_llm_whatsapp = f"Product Information related to '{user_query_whatsapp}': {product_info_whatsapp}"
                         elif intent_whatsapp == "GENERAL_POLICY_FAQ" or intent_whatsapp == "UNKNOWN":
+                            if st.session_state.faiss_index_pdfs and st.session_state.embedding_model and st.session_state.indexed_pdf_chunks:
+                                k_val_whatsapp = 3 if intent_whatsapp == "GENERAL_POLICY_FAQ" else 2
+                                chunks_whatsapp = search_faiss_index(st.session_state.faiss_index_pdfs, user_query_whatsapp,
+                                                                     st.session_state.embedding_model, st.session_state.indexed_pdf_chunks, k=k_val_whatsapp)
+                                if chunks_whatsapp:
+                                    context_for_llm_whatsapp = "Relevant information from documents:\n\n" + "\n\n---\n\n".join(chunks_whatsapp)
+                                else:
+                                    context_for_llm_whatsapp = "I couldn't find specific information in our policy or FAQ documents regarding your query."
+                            else:
+                                context_for_llm_whatsapp = "Our policy and FAQ documents are currently unavailable for search."
+                        # --- End of RAG logic for WhatsApp ---
+                        response_whatsapp = generate_response_groq(st.session_state.groq_client, user_query_whatsapp, context_for_llm_whatsapp)
+                        if send_whatsapp_message(st.session_state.twilio_client, twilio_conversation_service_sid_to_use,
+                                                 conv_sid, response_whatsapp, twilio_bot_whatsapp_identity_to_use):
                             st.session_state.processed_message_sids.add(msg_sid)
+                            st.success(f"Successfully responded to WhatsApp message SID {msg_sid} from {author_id}.")
+                        else:
+                            st.error(f"Failed to send WhatsApp response for message SID {msg_sid} from {author_id}.")
+                    st.experimental_rerun() # Rerun to clear spinner and update UI if messages were processed
+                # else:
+                #    st.write(f"No new WhatsApp messages since last poll at {datetime.fromtimestamp(st.session_state.last_twilio_poll_time).strftime('%H:%M:%S')}.")
 # --- Footer & Status ---
 st.sidebar.markdown("---")
 st.sidebar.info("Ensure all keys and SIDs are correctly configured. Primary API keys (Twilio SID/Token, GROQ Key) are loaded from secrets if available.")
 if st.session_state.get("app_started"):
+    status_color = "green" if st.session_state.get("rag_pipeline_ready") else "orange"
+    app_status_text = "App RUNNING" if st.session_state.get("rag_pipeline_ready") else "App Initializing/Error"
+    bot_status_text = "WhatsApp Bot RUNNING" if st.session_state.get("bot_started") else "WhatsApp Bot STOPPED"
+    st.sidebar.markdown(f"<span style='color:{status_color};'>{app_status_text}</span>. {bot_status_text}.", unsafe_allow_html=True)
 else:
+    st.sidebar.warning("App is STOPPED.")