Spaces:

masadonline
/

Quasa

Sleeping

App Files Files Community

masadonline commited on May 17

Commit

60c8a15

verified ·

1 Parent(s): 7088627

Update app.py

Browse files

Files changed (1) hide show

app.py +153 -115

app.py CHANGED Viewed

@@ -1,126 +1,164 @@
 import streamlit as st
 from twilio.rest import Client
-import requests
-from PyPDF2 import PdfReader
 from groq import Groq
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import FAISS
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.docstore.document import Document
-from langchain.prompts import PromptTemplate
-from langchain.chains import RetrievalQA
-from langchain.llms.base import LLM
-from langchain_core.outputs import Generation
-import tempfile
-import os
-# ---- CONFIG ---- #
-st.set_page_config(page_title="Quasa – Smart WhatsApp Chatbot", layout="wide")
-# ---- SESSION STATE ---- #
-if "conversation_sid" not in st.session_state:
-    st.session_state.conversation_sid = ""
-if "user_message" not in st.session_state:
-    st.session_state.user_message = ""
-if "response" not in st.session_state:
-    st.session_state.response = ""
-# ---- SIDEBAR ---- #
-with st.sidebar:
-    st.title("📱 Quasa Setup")
-    groq_api_key = st.text_input("🔑 GROQ API Key", type="password")
-    twilio_sid = st.text_input("🧩 Twilio Account SID", type="password")
-    twilio_token = st.text_input("🔐 Twilio Auth Token", type="password")
-    twilio_conv_sid = st.text_input("💬 Twilio Conversation SID")
-    uploaded_file = st.file_uploader("📄 Upload Knowledge PDF", type=["pdf"])
-    if uploaded_file:
-        st.success("PDF uploaded. Ready to chat!")
-# ---- LLM Setup ---- #
-class SimpleGroqLLM(LLM):
-    def __init__(self, api_key: str, model_name="llama3-8b-8192"):
-        self.client = Groq(api_key=api_key)
-        self.model_name = model_name
-    def _call(self, prompt: str, stop=None) -> str:
-        response = self.client.chat.completions.create(
-            messages=[{"role": "user", "content": prompt}],
-            model=self.model_name
-        )
-        return response.choices[0].message.content
-    @property
-    def _llm_type(self) -> str:
-        return "simple_groq"
-# ---- HELPER FUNCTIONS ---- #
-def extract_text_from_pdf(file) -> str:
-    reader = PdfReader(file)
-    text = ""
-    for page in reader.pages:
-        text += page.extract_text() + "\n"
-    return text
-def create_vector_store(text: str):
-    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
-    chunks = splitter.split_text(text)
-    docs = [Document(page_content=chunk) for chunk in chunks]
-    embeddings = HuggingFaceEmbeddings()
-    return FAISS.from_documents(docs, embeddings)
-def get_response_from_rag(query, vectorstore, groq_api_key):
-    retriever = vectorstore.as_retriever()
-    llm = SimpleGroqLLM(api_key=groq_api_key)
-    qa_chain = RetrievalQA.from_chain_type(
-        llm=llm,
-        retriever=retriever,
-        return_source_documents=False
-    )
-    return qa_chain.run(query)
 def send_twilio_message(account_sid, auth_token, conversation_sid, body):
     try:
         client = Client(account_sid, auth_token)
-        message = client.conversations.v1.conversations(conversation_sid).messages.create(
-            author='ChatBot',  # Fixed: use static name instead of WhatsApp number
-            body=body
-        )
         return message.sid
     except Exception as e:
-        return f"⚠️ Failed to send message: {e}"
-# ---- MAIN ---- #
-st.title("🤖 Quasa – Smart WhatsApp Chatbot")
-if uploaded_file and groq_api_key and twilio_sid and twilio_token and twilio_conv_sid:
-    st.session_state.conversation_sid = twilio_conv_sid
-    # Extract and vectorize
-    with st.spinner("🔍 Reading and indexing document..."):
-        text = extract_text_from_pdf(uploaded_file)
-        vectorstore = create_vector_store(text)
-    # Input + response area
-    user_input = st.text_input("💬 Ask a question (from WhatsApp user):", key="input")
-    if st.button("📩 Respond & Send"):
-        if user_input:
-            with st.spinner("🤖 Generating response..."):
-                answer = get_response_from_rag(user_input, vectorstore, groq_api_key)
-                st.success("✅ Response Generated:")
-                st.write(answer)
-                # Send to Twilio
-                with st.spinner("📤 Sending to WhatsApp..."):
-                    msg_sid = send_twilio_message(twilio_sid, twilio_token, twilio_conv_sid, answer)
-                    st.info(f"📨 Message SID: `{msg_sid}`")
-        else:
-            st.warning("❗Please enter a question to proceed.")
 else:
-    st.warning("🚧 Please upload a PDF and fill in all credentials in the sidebar to proceed.")

+import os
+import time
 import streamlit as st
 from twilio.rest import Client
+from twilio.base.exceptions import TwilioRestException
+from pdfminer.high_level import extract_text
+from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer
+import faiss
+import numpy as np
+import docx
 from groq import Groq
+import PyPDF2
+import requests
+# --- Document Loaders ---
+def extract_text_from_pdf(pdf_path):
+    try:
+        text = ""
+        with open(pdf_path, 'rb') as file:
+            pdf_reader = PyPDF2.PdfReader(file)
+            for page_num in range(len(pdf_reader.pages)):
+                page = pdf_reader.pages[page_num]
+                page_text = page.extract_text()
+                if page_text:
+                    text += page_text
+        return text
+    except:
+        return extract_text(pdf_path)
+def extract_text_from_docx(docx_path):
+    try:
+        doc = docx.Document(docx_path)
+        return '\n'.join(para.text for para in doc.paragraphs)
+    except:
+        return ""
+def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
+    tokens = tokenizer.tokenize(text)
+    chunks, start = [], 0
+    while start < len(tokens):
+        end = min(start + chunk_size, len(tokens))
+        chunk_tokens = tokens[start:end]
+        chunks.append(tokenizer.convert_tokens_to_string(chunk_tokens))
+        start += chunk_size - chunk_overlap
+    return chunks
+def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
+    question_embedding = embed_model.encode([question])[0]
+    D, I = index.search(np.array([question_embedding]), k)
+    return [text_chunks[i] for i in I[0]]
+# --- GROQ Answer Generation ---
+def generate_answer_with_groq(question, context, retries=3, delay=2):
+    url = "https://api.groq.com/openai/v1/chat/completions"
+    api_key = os.environ["GROQ_API_KEY"]
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+    prompt = f"Based on the following context, answer the question: '{question}'\n\nContext:\n{context}"
+    payload = {
+        "model": "llama3-8b-8192",
+        "messages": [
+            {"role": "system", "content": "Hey there! I'm designed to respond just like a real person would. Ask me anything, and I'll do my best to give you a thoughtful and courteous answer."},
+            {"role": "user", "content": prompt},
+        ],
+        "temperature": 0.5,
+        "max_tokens": 300,
+    }
+    for attempt in range(retries):
+        try:
+            response = requests.post(url, headers=headers, json=payload)
+            result = response.json()
+            return result['choices'][0]['message']['content'].strip()
+        except Exception as e:
+            if "503" in str(e) and attempt < retries - 1:
+                time.sleep(delay)
+                continue
+            else:
+                return f"⚠️ Groq API Error: {str(e)}"
+# --- Twilio Chat Handlers ---
+def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
+    client = Client(account_sid, auth_token)
+    messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
+    for msg in reversed(messages):
+        if msg.author.startswith("whatsapp:"):
+            return msg.body, msg.author, msg.index
+    return None, None, None
 def send_twilio_message(account_sid, auth_token, conversation_sid, body):
     try:
         client = Client(account_sid, auth_token)
+        message = client.conversations.v1.conversations(conversation_sid).messages.create(author="system", body=body)
         return message.sid
     except Exception as e:
+        return str(e)
+# --- Streamlit UI ---
+st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
+st.title("📱 Quasa – A Smart WhatsApp Chatbot")
+# Load from Hugging Face secrets
+account_sid = st.secrets.get("TWILIO_SID")
+auth_token = st.secrets.get("TWILIO_TOKEN")
+GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
+# Fallback for testing
+if not all([account_sid, auth_token, GROQ_API_KEY]):
+    st.warning("⚠️ Some secrets not found. Please enter missing credentials below:")
+    account_sid = st.text_input("Twilio SID", value=account_sid or "")
+    auth_token = st.text_input("Twilio Auth Token", type="password", value=auth_token or "")
+    GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
+# Always show conversation SID input
+conversation_sid = st.text_input("Enter Conversation SID", value="")
+# Initialize session state to track last message
+if "last_index" not in st.session_state:
+    st.session_state.last_index = -1
+if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
+    os.environ["GROQ_API_KEY"] = GROQ_API_KEY
+    @st.cache_resource
+    def setup_knowledge_base():
+        folder_path = "docs"
+        all_text = ""
+        for file in os.listdir(folder_path):
+            if file.endswith(".pdf"):
+                all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
+            elif file.endswith((".docx", ".doc")):
+                all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
+        tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
+        chunks = chunk_text(all_text, tokenizer)
+        model = SentenceTransformer('all-mpnet-base-v2')
+        embeddings = model.encode(chunks)
+        dim = embeddings[0].shape[0]
+        index = faiss.IndexFlatL2(dim)
+        index.add(np.array(embeddings))
+        return index, model, chunks
+    index, embedding_model, text_chunks = setup_knowledge_base()
+    st.success("✅ Knowledge base ready. Monitoring WhatsApp...")
+    if st.button("🔁 Check for New WhatsApp Query"):
+        with st.spinner("Checking messages..."):
+            question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
+            if question and msg_index > st.session_state.last_index:
+                st.session_state.last_index = msg_index
+                st.info(f"📥 New Question from {sender}:\n\n> {question}")
+                relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
+                context = "\n\n".join(relevant_chunks)
+                answer = generate_answer_with_groq(question, context)
+                send_twilio_message(account_sid, auth_token, conversation_sid, answer)
+                st.success("📤 Answer sent via WhatsApp!")
+                st.markdown(f"### ✨ Answer:\n\n{answer}")
+            else:
+                st.warning("No new messages from users found.")
 else:
+    st.warning("❗ Please provide all required credentials and conversation SID.")