Spaces:

masadonline
/

Quasa

Sleeping

App Files Files Community

masadonline commited on May 16

Commit

6785822

verified ·

1 Parent(s): 1b3ae27

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -39

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
-import streamlit as st
 import PyPDF2
 from pdfminer.high_level import extract_text
 from transformers import AutoTokenizer
@@ -7,9 +8,9 @@ from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
 from groq import Groq
-import docx  # to read .docx files
-# --- Helper Functions ---
 def extract_text_from_pdf(pdf_path):
     try:
@@ -23,7 +24,7 @@ def extract_text_from_pdf(pdf_path):
                     text += page_text
         return text
     except Exception as e:
-        st.warning(f"PyPDF2 failed with error: {e}. Trying pdfminer.six...")
         return extract_text(pdf_path)
 def extract_text_from_docx(docx_path):
@@ -34,7 +35,7 @@ def extract_text_from_docx(docx_path):
             full_text.append(para.text)
         return '\n'.join(full_text)
     except Exception as e:
-        st.warning(f"Failed to read DOCX {docx_path}: {e}")
         return ""
 def chunk_text_with_tokenizer(text, tokenizer, chunk_size=150, chunk_overlap=30):
@@ -57,7 +58,7 @@ def retrieve_relevant_chunks(question, index, embeddings_model, text_chunks, k=3
 def generate_answer_with_groq(question, context):
     prompt = f"Based on the following context, answer the question: '{question}'\n\nContext:\n{context}"
-    model_name = "llama-3.3-70b-versatile"  # Adjust model if needed
     try:
         groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         response = groq_client.chat.completions.create(
@@ -69,49 +70,34 @@ def generate_answer_with_groq(question, context):
         )
         return response.choices[0].message.content
     except Exception as e:
-        st.error(f"Error generating answer with Groq API: {e}")
         return "I'm sorry, I couldn't generate an answer at this time."
-# --- Streamlit UI & Logic ---
-st.set_page_config(page_title="SMEHelpBot 🤖", layout="wide")
-st.title("🤖 SMEHelpBot – Your AI Assistant for Small Businesses")
-# GROQ API key check
-GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY")
-if not GROQ_API_KEY:
-    st.error("❌ Please set your GROQ_API_KEY in environment or .streamlit/secrets.toml")
-    st.stop()
-os.environ["GROQ_API_KEY"] = GROQ_API_KEY
-# Load and process all docs at startup
-@st.cache_data(show_spinner=True)
 def load_and_prepare_docs(folder_path="docs"):
     all_text = ""
     if not os.path.exists(folder_path):
-        st.error(f"Folder '{folder_path}' does not exist!")
         return None, None, None
-    # Collect all pdf and docx files
     files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.pdf', '.docx', '.doc'))]
     if not files:
-        st.error(f"No PDF or DOCX files found in folder '{folder_path}'.")
         return None, None, None
     for file in files:
         path = os.path.join(folder_path, file)
         if file.lower().endswith('.pdf'):
             text = extract_text_from_pdf(path)
-        elif file.lower().endswith(('.docx', '.doc')):
-            text = extract_text_from_docx(path)
         else:
-            text = ""
         if text:
             all_text += text + "\n\n"
     if not all_text.strip():
-        st.error("No text extracted from documents.")
         return None, None, None
     tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
@@ -121,26 +107,54 @@ def load_and_prepare_docs(folder_path="docs"):
     all_embeddings = embedding_model.encode(text_chunks) if text_chunks else None
     if all_embeddings is None or len(all_embeddings) == 0:
-        st.error("No text chunks found to create embeddings.")
         return None, None, None
     embedding_dim = all_embeddings[0].shape[0]
     index = faiss.IndexFlatL2(embedding_dim)
     index.add(np.array(all_embeddings))
     return index, embedding_model, text_chunks
 index, embedding_model, text_chunks = load_and_prepare_docs()
-user_question = st.text_input("💬 Ask your question about SME documents:")
-if st.button("Get Answer") and user_question:
     if index is None or embedding_model is None or text_chunks is None:
-        st.error("The document knowledge base is not ready. Please check the errors above.")
-    else:
-        with st.spinner("Searching for relevant information and generating answer..."):
-            relevant_chunks = retrieve_relevant_chunks(user_question, index, embedding_model, text_chunks)
-            context = "\n\n".join(relevant_chunks)
-            answer = generate_answer_with_groq(user_question, context)
-            st.markdown("### Answer:")
-            st.success(answer)

 import os
+from flask import Flask, request
+from twilio.twiml.messaging_response import MessagingResponse
 import PyPDF2
 from pdfminer.high_level import extract_text
 from transformers import AutoTokenizer
 import faiss
 import numpy as np
 from groq import Groq
+import docx
+# --- Helper functions from your code ---
 def extract_text_from_pdf(pdf_path):
     try:
                     text += page_text
         return text
     except Exception as e:
+        print(f"PyPDF2 failed with error: {e}. Trying pdfminer.six...")
         return extract_text(pdf_path)
 def extract_text_from_docx(docx_path):
             full_text.append(para.text)
         return '\n'.join(full_text)
     except Exception as e:
+        print(f"Failed to read DOCX {docx_path}: {e}")
         return ""
 def chunk_text_with_tokenizer(text, tokenizer, chunk_size=150, chunk_overlap=30):
 def generate_answer_with_groq(question, context):
     prompt = f"Based on the following context, answer the question: '{question}'\n\nContext:\n{context}"
+    model_name = "llama-3.3-70b-versatile"  # Adjust if needed
     try:
         groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
         response = groq_client.chat.completions.create(
         )
         return response.choices[0].message.content
     except Exception as e:
+        print(f"Error generating answer with Groq API: {e}")
         return "I'm sorry, I couldn't generate an answer at this time."
+# --- Load and prepare docs on startup ---
 def load_and_prepare_docs(folder_path="docs"):
+    print("Loading documents from", folder_path)
     all_text = ""
     if not os.path.exists(folder_path):
+        print(f"Folder '{folder_path}' does not exist!")
         return None, None, None
     files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.pdf', '.docx', '.doc'))]
     if not files:
+        print(f"No PDF or DOCX files found in folder '{folder_path}'.")
         return None, None, None
     for file in files:
         path = os.path.join(folder_path, file)
         if file.lower().endswith('.pdf'):
             text = extract_text_from_pdf(path)
         else:
+            text = extract_text_from_docx(path)
         if text:
             all_text += text + "\n\n"
     if not all_text.strip():
+        print("No text extracted from documents.")
         return None, None, None
     tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
     all_embeddings = embedding_model.encode(text_chunks) if text_chunks else None
     if all_embeddings is None or len(all_embeddings) == 0:
+        print("No text chunks found to create embeddings.")
         return None, None, None
     embedding_dim = all_embeddings[0].shape[0]
     index = faiss.IndexFlatL2(embedding_dim)
     index.add(np.array(all_embeddings))
+    print("Documents loaded and FAISS index created.")
     return index, embedding_model, text_chunks
+# --- Flask app and WhatsApp webhook ---
+from flask_cors import CORS
+app = Flask(__name__)
+CORS(app)  # Optional, if you call API from other domains
+# Load documents once at start
 index, embedding_model, text_chunks = load_and_prepare_docs()
+@app.route("/whatsapp", methods=["POST"])
+def whatsapp_reply():
+    incoming_msg = request.values.get('Body', '').strip()
+    from_number = request.values.get('From', '')
+    print(f"Incoming message from {from_number}: {incoming_msg}")
+    resp = MessagingResponse()
+    msg = resp.message()
+    if not incoming_msg:
+        msg.body("Please send a question.")
+        return str(resp)
     if index is None or embedding_model is None or text_chunks is None:
+        msg.body("Sorry, the knowledge base is not ready. Please try again later.")
+        return str(resp)
+    # Retrieve context and generate answer
+    relevant_chunks = retrieve_relevant_chunks(incoming_msg, index, embedding_model, text_chunks)
+    context = "\n\n".join(relevant_chunks)
+    answer = generate_answer_with_groq(incoming_msg, context)
+    msg.body(answer)
+    return str(resp)
+if __name__ == "__main__":
+    GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+    if not GROQ_API_KEY:
+        print("Please set the GROQ_API_KEY environment variable before running.")
+        exit(1)
+    print("Starting WhatsApp SMEHelpBot server...")
+    app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 5000)))