AIToyBot

Sleeping

App Files Files Community

masadonline commited on May 19

Commit

ef85737

verified ·

1 Parent(s): c300567

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -116

app.py CHANGED Viewed

@@ -1,117 +1,95 @@
-import streamlit as st
 import os
-import glob
 import time
 import threading
 from datetime import datetime
-from twilio.rest import Client
-from dotenv import load_dotenv
-from langchain_community.document_loaders import UnstructuredFileLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
-from langchain_groq import ChatGroq
-from langchain.prompts import PromptTemplate
-from langchain.schema.runnable import RunnablePassthrough
-from langchain.schema.output_parser import StrOutputParser
-# Load env vars if available locally (for local dev)
-load_dotenv()
-groq_api_key = os.getenv("GROQ_API_KEY")
-twilio_sid = os.getenv("TWILIO_ACCOUNT_SID")
-twilio_token = os.getenv("TWILIO_AUTH_TOKEN")
-POLL_INTERVAL_SECONDS = 30
 DOCS_DIR = "docs"
 EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 APP_START_TIME = datetime.utcnow()
-GENERAL_QA_PROMPT = """
-You are an AI assistant for our internal knowledge base.
-Your goal is to provide accurate and concise answers based ONLY on the provided context.
-Do not make up information. If the answer is not found in the context, state that clearly.
-Ensure your answers are directly supported by the text.
-Accuracy is paramount.
-Context:
-{context}
-Question: {question}
-Answer:
-"""
-def get_loader(file_path):
-    _, ext = os.path.splitext(file_path)
-    ext = ext.lower()
-    if ext in ['.pdf', '.docx', '.doc', '.xlsx', '.xls', '.json', '.txt', '.md', '.html']:
-        return UnstructuredFileLoader(file_path, mode="elements", strategy="fast")
-    else:
-        st.warning(f"Unsupported file type: {ext}. Skipping {os.path.basename(file_path)}")
-        return None
-@st.cache_resource(show_spinner=False)
-def load_and_process_documents(docs_path):
-    documents = []
-    doc_files = []
-    for ext in ["*.pdf", "*.docx", "*.xlsx", "*.json", "*.txt", "*.md"]:
-        doc_files.extend(glob.glob(os.path.join(docs_path, ext)))
-    for file_path in doc_files:
-        loader = get_loader(file_path)
-        if loader:
-            docs = loader.load()
-            for doc in docs:
-                doc.metadata["source"] = os.path.basename(file_path)
-            documents.extend(docs)
-    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-    return splitter.split_documents(documents)
-@st.cache_resource(show_spinner=False)
-def create_vector_store(documents, model_name):
     embeddings = HuggingFaceEmbeddings(model_name=model_name)
-    return FAISS.from_documents(documents, embedding=embeddings)
-@st.cache_resource(show_spinner=False)
-def get_llm(api_key, model_name="llama3-8b-8192"):
-    return ChatGroq(temperature=0, groq_api_key=api_key, model_name=model_name)
-def get_rag_chain(llm, retriever, prompt_template):
-    prompt = PromptTemplate.from_template(prompt_template)
-    return (
-        {"context": retriever, "question": RunnablePassthrough()}
-        | prompt
-        | llm
-        | StrOutputParser()
-    )
-def fetch_latest_incoming_message(client, convo_sid):
-    messages = client.conversations.v1.conversations(convo_sid).messages.list(order="desc", limit=1)
-    for msg in messages:
-        if msg.author != "system" and msg.direction == "inbound":
-            return {"body": msg.body, "author": msg.author, "timestamp": msg.date_created}
     return None
-def retrieve_chunks(question, index, model, chunks, top_k=3):
-    retriever = index.as_retriever(search_type="similarity", search_kwargs={"k": top_k})
-    rag_chain = get_rag_chain(model, retriever, GENERAL_QA_PROMPT)
-    answer = rag_chain.invoke({"question": question})
-    return [answer]
 def generate_answer_with_groq(question, context):
-    # This uses the LLM directly with context and question prompt
-    llm = get_llm(groq_api_key)
-    prompt = GENERAL_QA_PROMPT.format(context=context, question=question)
-    return llm.invoke(prompt)
-def send_twilio_message(client, convo_sid, message):
-    client.conversations.v1.conversations(convo_sid).messages.create(body=message)
-def start_conversation_monitor(client, index, model, chunks):
     processed_convos = set()
     last_processed_timestamp = {}
@@ -124,15 +102,19 @@ def start_conversation_monitor(client, index, model, chunks):
                     if convo_sid not in last_processed_timestamp or msg_time > last_processed_timestamp[convo_sid]:
                         last_processed_timestamp[convo_sid] = msg_time
                         question = latest_msg["body"]
-                        context = "\n\n".join(retrieve_chunks(question, index, model, chunks))
                         answer = generate_answer_with_groq(question, context)
                         send_twilio_message(client, convo_sid, answer)
                 time.sleep(3)
             except Exception as e:
-                print(f"Error polling convo {convo_sid}: {e}")
                 time.sleep(5)
     def poll_new_conversations():
         while True:
             try:
                 conversations = client.conversations.v1.conversations.list(limit=20)
@@ -141,28 +123,49 @@ def start_conversation_monitor(client, index, model, chunks):
                     if convo.sid not in processed_convos and convo_full.date_created > APP_START_TIME:
                         participants = client.conversations.v1.conversations(convo.sid).participants.list()
                         for p in participants:
-                            address = p.messaging_binding.get("address", "")
                             if address.startswith("whatsapp:"):
                                 processed_convos.add(convo.sid)
                                 threading.Thread(target=poll_conversation, args=(convo.sid,), daemon=True).start()
             except Exception as e:
-                print(f"Error polling conversations: {e}")
-            time.sleep(POLL_INTERVAL_SECONDS)
     threading.Thread(target=poll_new_conversations, daemon=True).start()
-st.title("🤖 WhatsApp Chatbot with RAG")
-if st.button("🚀 Start WhatsApp Chatbot"):
-    if not all([groq_api_key, twilio_sid, twilio_token]):
-        st.error("Please set GROQ_API_KEY, TWILIO_ACCOUNT_SID, and TWILIO_AUTH_TOKEN environment variables!")
-    else:
-        st.success("🟢 Loading Knowledge Base and Initializing LLM...")
-        client = Client(twilio_sid, twilio_token)
-        chunks = load_and_process_documents(DOCS_DIR)
-        index = create_vector_store(chunks, EMBEDDING_MODEL_NAME)
-        model = get_llm(groq_api_key)
-        st.success("✅ Ready! Monitoring new WhatsApp conversations...")
-        start_conversation_monitor(client, index, model, chunks)
-        st.info(f"⏳ Will check for new messages every {POLL_INTERVAL_SECONDS} seconds.")

 import os
 import time
+import json
 import threading
 from datetime import datetime
+import streamlit as st
+from twilio.rest import Client
 from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.document_loaders import PyMuPDFLoader, TextLoader, UnstructuredExcelLoader, UnstructuredWordDocumentLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.llms import OpenAI
+# Config
 DOCS_DIR = "docs"
 EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+MONITOR_INTERVAL_SECONDS = 30
 APP_START_TIME = datetime.utcnow()
+# Helper functions
+def load_and_process_documents(folder_path):
+    loaders = {
+        ".pdf": PyMuPDFLoader,
+        ".txt": TextLoader,
+        ".xlsx": UnstructuredExcelLoader,
+        ".docx": UnstructuredWordDocumentLoader
+    }
+    docs = []
+    for filename in os.listdir(folder_path):
+        file_path = os.path.join(folder_path, filename)
+        ext = os.path.splitext(filename)[-1].lower()
+        loader_cls = loaders.get(ext)
+        if loader_cls:
+            try:
+                loader = loader_cls(file_path)
+                docs.extend(loader.load())
+            except Exception as e:
+                print(f"❌ Failed to load {filename}: {e}")
+    if not docs:
+        st.error("No documents loaded.")
+        return []
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    return text_splitter.split_documents(docs)
+def create_vector_store(docs, model_name):
     embeddings = HuggingFaceEmbeddings(model_name=model_name)
+    return FAISS.from_documents(docs, embeddings)
+def get_llm(api_key):
+    os.environ["OPENAI_API_KEY"] = api_key
+    return OpenAI(model_name="gpt-3.5-turbo", temperature=0)
+def fetch_latest_incoming_message(client, conversation_sid):
+    try:
+        messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=5)
+        for msg in reversed(messages):
+            if msg.direction == "inbound" and msg.author and msg.body:
+                return {
+                    "author": msg.author,
+                    "body": msg.body.strip(),
+                    "timestamp": msg.date_created
+                }
+    except Exception as e:
+        print(f"❌ Error fetching messages: {e}")
     return None
+def retrieve_chunks(query, index, embed_model, text_chunks, k=4):
+    query_embedding = embed_model.embed_query(query)
+    docs_and_scores = index.similarity_search_by_vector(query_embedding, k=k)
+    return [doc.page_content for doc in docs_and_scores]
 def generate_answer_with_groq(question, context):
+    from groq import Groq
+    client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+    chat_completion = client.chat.completions.create(
+        model="llama3-8b-8192",
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant for a toy shop. Respond to customer queries based on provided order and product info."},
+            {"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}
+        ]
+    )
+    return chat_completion.choices[0].message.content.strip()
+def send_twilio_message(client, conversation_sid, reply):
+    try:
+        client.conversations.v1.conversations(conversation_sid).messages.create(body=reply)
+    except Exception as e:
+        print(f"❌ Failed to send message: {e}")
+def start_conversation_monitor(client, index, embed_model, text_chunks):
     processed_convos = set()
     last_processed_timestamp = {}
                     if convo_sid not in last_processed_timestamp or msg_time > last_processed_timestamp[convo_sid]:
                         last_processed_timestamp[convo_sid] = msg_time
                         question = latest_msg["body"]
+                        sender = latest_msg["author"]
+                        print(f"\n📥 New message from {sender} in {convo_sid}: {question}")
+                        context = "\n\n".join(retrieve_chunks(question, index, embed_model, text_chunks))
                         answer = generate_answer_with_groq(question, context)
                         send_twilio_message(client, convo_sid, answer)
+                        print(f"📤 Replied to {sender}: {answer}")
                 time.sleep(3)
             except Exception as e:
+                print(f"❌ Error in convo {convo_sid} polling:", e)
                 time.sleep(5)
     def poll_new_conversations():
+        print("➡️ Monitoring for new WhatsApp conversations...")
         while True:
             try:
                 conversations = client.conversations.v1.conversations.list(limit=20)
                     if convo.sid not in processed_convos and convo_full.date_created > APP_START_TIME:
                         participants = client.conversations.v1.conversations(convo.sid).participants.list()
                         for p in participants:
+                            address = p.messaging_binding.get("address", "") if p.messaging_binding else ""
                             if address.startswith("whatsapp:"):
+                                print(f"🆕 New WhatsApp convo found: {convo.sid}")
                                 processed_convos.add(convo.sid)
                                 threading.Thread(target=poll_conversation, args=(convo.sid,), daemon=True).start()
             except Exception as e:
+                print("❌ Error polling conversations:", e)
+            time.sleep(MONITOR_INTERVAL_SECONDS)
     threading.Thread(target=poll_new_conversations, daemon=True).start()
+# Main Streamlit UI
+def main():
+    st.set_page_config(page_title="ToyShop Assistant", layout="wide")
+    st.title("🧸 ToyShop Assistant – WhatsApp Chatbot (RAG + Twilio)")
+    if st.button("🚀 Start"):
+        with st.spinner("Loading and processing documents..."):
+            docs = load_and_process_documents(DOCS_DIR)
+            if not docs:
+                return
+        with st.spinner("Creating vector store..."):
+            vector_store = create_vector_store(docs, EMBEDDING_MODEL_NAME)
+            if not vector_store:
+                return
+        with st.spinner("Initializing LLM..."):
+            llm = get_llm(os.getenv("OPENAI_API_KEY"))
+            if not llm:
+                return
+        account_sid = os.getenv("TWILIO_ACCOUNT_SID")
+        auth_token = os.getenv("TWILIO_AUTH_TOKEN")
+        if not account_sid or not auth_token:
+            st.error("Twilio credentials not found in environment variables.")
+            return
+        client = Client(account_sid, auth_token)
+        st.success("✅ Setup complete. Monitoring WhatsApp conversations...")
+        start_conversation_monitor(client, vector_store, HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME), docs)
+        st.info(f"📡 Watching for messages every {MONITOR_INTERVAL_SECONDS} seconds...")
+if __name__ == "__main__":
+    main()