masadonline commited on
Commit
717234d
Β·
verified Β·
1 Parent(s): 6bda95c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -108
app.py CHANGED
@@ -1,145 +1,130 @@
1
  import os
 
2
  import streamlit as st
3
- import PyPDF2
 
4
  from pdfminer.high_level import extract_text
5
- from transformers import AutoTokenizer
6
  from sentence_transformers import SentenceTransformer
 
7
  import faiss
8
  import numpy as np
9
- from groq import Groq
10
  import docx
11
- from fastapi import FastAPI, Request
12
- import uvicorn
13
- import threading
14
- from pydantic import BaseModel
15
- from twilio.rest import Client
16
- from fastapi.responses import JSONResponse
17
-
18
- # --- Global Config ---
19
-
20
- WHATSAPP_FROM = "whatsapp:+14155238886" # Twilio sandbox number
21
- WHATSAPP_TO = os.getenv("WHATSAPP_TO") or "whatsapp:+YOUR_NUMBER"
22
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
23
- TWILIO_SID = os.getenv("TWILIO_SID")
24
- TWILIO_TOKEN = os.getenv("TWILIO_TOKEN")
25
-
26
- # --- Helper Functions ---
27
 
 
28
  def extract_text_from_pdf(pdf_path):
29
  try:
30
  text = ""
31
  with open(pdf_path, 'rb') as file:
32
  pdf_reader = PyPDF2.PdfReader(file)
33
- for page in pdf_reader.pages:
 
34
  page_text = page.extract_text()
35
  if page_text:
36
  text += page_text
37
  return text
38
- except Exception as e:
39
  return extract_text(pdf_path)
40
 
41
  def extract_text_from_docx(docx_path):
42
  try:
43
  doc = docx.Document(docx_path)
44
  return '\n'.join(para.text for para in doc.paragraphs)
45
- except Exception:
46
  return ""
47
 
48
- def chunk_text(text, tokenizer, chunk_size=150, overlap=30):
49
  tokens = tokenizer.tokenize(text)
50
  chunks, start = [], 0
51
  while start < len(tokens):
52
  end = min(start + chunk_size, len(tokens))
53
- chunk = tokenizer.convert_tokens_to_string(tokens[start:end])
54
- chunks.append(chunk)
55
- start += chunk_size - overlap
56
  return chunks
57
 
58
- def get_embeddings_and_index(chunks, model):
59
- embeddings = model.encode(chunks)
60
- dim = embeddings[0].shape[0]
61
- index = faiss.IndexFlatL2(dim)
62
- index.add(np.array(embeddings))
63
- return index, embeddings
64
-
65
- def get_relevant_chunks(question, index, model, chunks, k=3):
66
- query_vector = model.encode([question])[0]
67
- _, I = index.search(np.array([query_vector]), k)
68
- return [chunks[i] for i in I[0]]
69
 
70
- def generate_answer(question, context):
71
  prompt = f"Based on the following context, answer the question: '{question}'\n\nContext:\n{context}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  try:
73
- client = Groq(api_key=GROQ_API_KEY)
74
- response = client.chat.completions.create(
75
- model="llama-3.3-70b-versatile",
76
- messages=[
77
- {"role": "system", "content": "You are an AI assistant for small businesses."},
78
- {"role": "user", "content": prompt},
79
- ],
80
- )
81
- return response.choices[0].message.content
82
  except Exception as e:
83
- return f"Error: {e}"
84
-
85
- def send_whatsapp_reply(to, message):
86
- client = Client(TWILIO_SID, TWILIO_TOKEN)
87
- client.messages.create(body=message, from_=WHATSAPP_FROM, to=to)
88
-
89
- # --- Load Documents ---
90
-
91
- @st.cache_data
92
- def load_documents(folder="docs"):
93
- all_text = ""
94
- for file in os.listdir(folder):
95
- path = os.path.join(folder, file)
96
- if file.endswith(".pdf"):
97
- all_text += extract_text_from_pdf(path) + "\n"
98
- elif file.endswith((".docx", ".doc")):
99
- all_text += extract_text_from_docx(path) + "\n"
100
- return all_text
101
-
102
- tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
103
- embedder = SentenceTransformer("all-mpnet-base-v2")
104
- docs = load_documents()
105
- chunks = chunk_text(docs, tokenizer)
106
- index, embeddings = get_embeddings_and_index(chunks, embedder)
107
 
108
  # --- Streamlit UI ---
109
-
110
- st.set_page_config(page_title="SMEHelpBot πŸ€–", layout="wide")
111
- st.title("πŸ€– SMEHelpBot – Ask your business questions!")
112
-
113
- question = st.text_input("πŸ’¬ Ask something:")
114
-
115
- if st.button("Get Answer") and question:
116
- with st.spinner("Searching..."):
117
- top_chunks = get_relevant_chunks(question, index, embedder, chunks)
118
- context = "\n".join(top_chunks)
119
- answer = generate_answer(question, context)
120
- st.success(answer)
121
-
122
- # --- FastAPI WhatsApp Webhook Server ---
123
-
124
- app = FastAPI()
125
-
126
- class WhatsAppMessage(BaseModel):
127
- Body: str
128
- From: str
129
-
130
- @app.post("/whatsapp-webhook")
131
- async def whatsapp_webhook(msg: WhatsAppMessage):
132
- question = msg.Body.strip()
133
- from_number = msg.From
134
- relevant_chunks = get_relevant_chunks(question, index, embedder, chunks)
135
- context = "\n".join(relevant_chunks)
136
- answer = generate_answer(question, context)
137
- send_whatsapp_reply(from_number, answer)
138
- return JSONResponse(content={"status": "sent"})
139
-
140
- # --- Run FastAPI in background ---
141
-
142
- def run_fastapi():
143
- uvicorn.run(app, host="0.0.0.0", port=7860)
144
-
145
- threading.Thread(target=run_fastapi, daemon=True).start()
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import time
3
  import streamlit as st
4
+ from twilio.rest import Client
5
+ from twilio.base.exceptions import TwilioRestException
6
  from pdfminer.high_level import extract_text
 
7
  from sentence_transformers import SentenceTransformer
8
+ from transformers import AutoTokenizer
9
  import faiss
10
  import numpy as np
 
11
  import docx
12
+ from groq import Groq
13
+ import PyPDF2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # --- Document Loaders ---
16
  def extract_text_from_pdf(pdf_path):
17
  try:
18
  text = ""
19
  with open(pdf_path, 'rb') as file:
20
  pdf_reader = PyPDF2.PdfReader(file)
21
+ for page_num in range(len(pdf_reader.pages)):
22
+ page = pdf_reader.pages[page_num]
23
  page_text = page.extract_text()
24
  if page_text:
25
  text += page_text
26
  return text
27
+ except:
28
  return extract_text(pdf_path)
29
 
30
  def extract_text_from_docx(docx_path):
31
  try:
32
  doc = docx.Document(docx_path)
33
  return '\n'.join(para.text for para in doc.paragraphs)
34
+ except:
35
  return ""
36
 
37
+ def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
38
  tokens = tokenizer.tokenize(text)
39
  chunks, start = [], 0
40
  while start < len(tokens):
41
  end = min(start + chunk_size, len(tokens))
42
+ chunk_tokens = tokens[start:end]
43
+ chunks.append(tokenizer.convert_tokens_to_string(chunk_tokens))
44
+ start += chunk_size - chunk_overlap
45
  return chunks
46
 
47
+ def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
48
+ question_embedding = embed_model.encode([question])[0]
49
+ D, I = index.search(np.array([question_embedding]), k)
50
+ return [text_chunks[i] for i in I[0]]
 
 
 
 
 
 
 
51
 
52
+ def generate_answer_with_groq(question, context):
53
  prompt = f"Based on the following context, answer the question: '{question}'\n\nContext:\n{context}"
54
+ groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
55
+ response = groq_client.chat.completions.create(
56
+ model="llama-3-3b-8192",
57
+ messages=[
58
+ {"role": "system", "content": "You are an AI Assistant for Small Businesses."},
59
+ {"role": "user", "content": prompt},
60
+ ]
61
+ )
62
+ return response.choices[0].message.content
63
+
64
+ # --- Twilio Chat Handlers ---
65
+ def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
66
+ client = Client(account_sid, auth_token)
67
+ messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
68
+ for msg in reversed(messages):
69
+ if msg.author.startswith("whatsapp:"):
70
+ return msg.body, msg.author, msg.index
71
+ return None, None, None
72
+
73
+ def send_twilio_message(account_sid, auth_token, conversation_sid, to, body):
74
  try:
75
+ client = Client(account_sid, auth_token)
76
+ message = client.conversations.v1.conversations(conversation_sid).messages.create(author=to, body=body)
77
+ return message.sid
 
 
 
 
 
 
78
  except Exception as e:
79
+ return str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  # --- Streamlit UI ---
82
+ st.set_page_config(page_title="SMEHelpBot – WhatsApp Integration", layout="wide")
83
+ st.title("πŸ“± SMEHelpBot + WhatsApp (via Twilio)")
84
+
85
+ # Secrets and config
86
+ account_sid = st.secrets.get("TWILIO_ACCOUNT_SID") or st.text_input("Twilio Account SID", "")
87
+ auth_token = st.secrets.get("TWILIO_AUTH_TOKEN") or st.text_input("Twilio Auth Token", type="password")
88
+ conversation_sid = st.text_input("Twilio Conversation SID", "")
89
+ GROQ_API_KEY = st.secrets.get("GROQ_API_KEY") or st.text_input("GROQ API Key", type="password")
90
+
91
+ if all([account_sid, auth_token, conversation_sid, GROQ_API_KEY]):
92
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
93
+
94
+ @st.cache_resource
95
+ def setup_knowledge_base():
96
+ folder_path = "docs"
97
+ all_text = ""
98
+ for file in os.listdir(folder_path):
99
+ if file.endswith(".pdf"):
100
+ all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
101
+ elif file.endswith((".docx", ".doc")):
102
+ all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
103
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
104
+ chunks = chunk_text(all_text, tokenizer)
105
+ model = SentenceTransformer('all-mpnet-base-v2')
106
+ embeddings = model.encode(chunks)
107
+ dim = embeddings[0].shape[0]
108
+ index = faiss.IndexFlatL2(dim)
109
+ index.add(np.array(embeddings))
110
+ return index, model, chunks
111
+
112
+ index, embedding_model, text_chunks = setup_knowledge_base()
113
+
114
+ st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
115
+
116
+ if st.button("πŸ” Check for New WhatsApp Query"):
117
+ with st.spinner("Checking messages..."):
118
+ question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
119
+ if question:
120
+ st.info(f"πŸ“₯ New Question from {sender}:\n\n> {question}")
121
+ relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
122
+ context = "\n\n".join(relevant_chunks)
123
+ answer = generate_answer_with_groq(question, context)
124
+ send_twilio_message(account_sid, auth_token, conversation_sid, "system", answer)
125
+ st.success("πŸ“€ Answer sent via WhatsApp!")
126
+ st.markdown(f"### ✨ Answer:\n\n{answer}")
127
+ else:
128
+ st.warning("No new messages from users found.")
129
+ else:
130
+ st.warning("Please fill all required credentials.")