masadonline commited on
Commit
f51c85c
Β·
verified Β·
1 Parent(s): 7d4af79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -41
app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import time
3
  import streamlit as st
4
  from twilio.rest import Client
5
- from twilio.base.exceptions import TwilioRestException
6
  from pdfminer.high_level import extract_text
7
  from sentence_transformers import SentenceTransformer
8
  from transformers import AutoTokenizer
@@ -12,14 +11,17 @@ import docx
12
  from groq import Groq
13
  import PyPDF2
14
  import requests
 
15
 
 
16
  # --- Document Loaders ---
17
  def extract_text_from_pdf(pdf_path):
18
  try:
19
  text = ""
20
  with open(pdf_path, 'rb') as file:
21
  pdf_reader = PyPDF2.PdfReader(file)
22
- for page in pdf_reader.pages:
 
23
  page_text = page.extract_text()
24
  if page_text:
25
  text += page_text
@@ -49,6 +51,7 @@ def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
49
  D, I = index.search(np.array([question_embedding]), k)
50
  return [text_chunks[i] for i in I[0]]
51
 
 
52
  def generate_answer_with_groq(question, context, retries=3, delay=2):
53
  url = "https://api.groq.com/openai/v1/chat/completions"
54
  api_key = os.environ.get("GROQ_API_KEY")
@@ -97,7 +100,7 @@ def generate_answer_with_groq(question, context, retries=3, delay=2):
97
  except Exception as e:
98
  return f"⚠️ Groq API Error: {e}"
99
 
100
- # --- Twilio Chat Handlers ---
101
  def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
102
  client = Client(account_sid, auth_token)
103
  messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
@@ -114,60 +117,72 @@ def send_twilio_message(account_sid, auth_token, conversation_sid, body):
114
  except Exception as e:
115
  return str(e)
116
 
117
- # --- Streamlit UI ---
118
- st.set_page_config(page_title="SMEHelpBot – WhatsApp Integration", layout="wide")
119
- st.title("πŸ“± SMEHelpBot + WhatsApp (via Twilio)")
 
 
 
120
 
121
- # Load from Hugging Face secrets or fallback to manual input
122
  account_sid = st.secrets.get("TWILIO_SID")
123
  auth_token = st.secrets.get("TWILIO_TOKEN")
124
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
125
 
126
  if not all([account_sid, auth_token, GROQ_API_KEY]):
127
  st.warning("⚠️ Some secrets not found. Please enter missing credentials below:")
128
- account_sid = st.text_input("πŸ” Twilio SID", value=account_sid or "")
129
- auth_token = st.text_input("πŸ” Twilio Auth Token", type="password", value=auth_token or "")
130
- GROQ_API_KEY = st.text_input("πŸ” GROQ API Key", type="password", value=GROQ_API_KEY or "")
 
 
 
 
 
 
131
 
132
- if all([account_sid, auth_token, conversation_sid, GROQ_API_KEY]):
133
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
134
 
135
- @st.cache_resource
136
  def setup_knowledge_base():
137
  folder_path = "docs"
138
  all_text = ""
139
- for file in os.listdir(folder_path):
140
- if file.endswith(".pdf"):
141
- all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
142
- elif file.endswith((".docx", ".doc")):
143
- all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
144
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
145
- chunks = chunk_text(all_text, tokenizer)
146
- model = SentenceTransformer('all-mpnet-base-v2')
147
- embeddings = model.encode(chunks)
148
- dim = embeddings[0].shape[0]
149
- index = faiss.IndexFlatL2(dim)
150
- index.add(np.array(embeddings))
151
- return index, model, chunks
 
 
 
 
152
 
153
  index, embedding_model, text_chunks = setup_knowledge_base()
 
 
154
 
155
  st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
156
 
157
- conversation_sid = st.text_input("πŸ’¬ Enter Twilio Conversation SID", placeholder="e.g. CHxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
158
-
159
- if st.button("πŸ” Check for New WhatsApp Query"):
160
- with st.spinner("Checking messages..."):
161
- question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
162
- if question:
163
- st.info(f"πŸ“₯ New Question from {sender}:\n\n> {question}")
164
- relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
165
- context = "\n\n".join(relevant_chunks)
166
- answer = generate_answer_with_groq(question, context)
167
- send_twilio_message(account_sid, auth_token, conversation_sid, answer)
168
- st.success("πŸ“€ Answer sent via WhatsApp!")
169
- st.markdown(f"### ✨ Answer:\n\n{answer}")
170
- else:
171
- st.warning("No new messages from users found.")
172
  else:
173
- st.warning("❗ Please provide all required credentials including Conversation SID.")
 
2
  import time
3
  import streamlit as st
4
  from twilio.rest import Client
 
5
  from pdfminer.high_level import extract_text
6
  from sentence_transformers import SentenceTransformer
7
  from transformers import AutoTokenizer
 
11
  from groq import Groq
12
  import PyPDF2
13
  import requests
14
+ from streamlit_autorefresh import st_autorefresh
15
 
16
+ # Extract text from PDF with fallback
17
  # --- Document Loaders ---
18
  def extract_text_from_pdf(pdf_path):
19
  try:
20
  text = ""
21
  with open(pdf_path, 'rb') as file:
22
  pdf_reader = PyPDF2.PdfReader(file)
23
+ for page_num in range(len(pdf_reader.pages)):
24
+ page = pdf_reader.pages[page_num]
25
  page_text = page.extract_text()
26
  if page_text:
27
  text += page_text
 
51
  D, I = index.search(np.array([question_embedding]), k)
52
  return [text_chunks[i] for i in I[0]]
53
 
54
+ # Generate answer using Groq API with retries and timeout
55
  def generate_answer_with_groq(question, context, retries=3, delay=2):
56
  url = "https://api.groq.com/openai/v1/chat/completions"
57
  api_key = os.environ.get("GROQ_API_KEY")
 
100
  except Exception as e:
101
  return f"⚠️ Groq API Error: {e}"
102
 
103
+ # Twilio message fetch and send
104
  def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
105
  client = Client(account_sid, auth_token)
106
  messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
 
117
  except Exception as e:
118
  return str(e)
119
 
120
+ # Streamlit UI
121
+ st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
122
+ st.title("πŸ“± Quasa – A Smart WhatsApp Chatbot")
123
+
124
+ if "last_index" not in st.session_state:
125
+ st.session_state.last_index = -1
126
 
 
127
  account_sid = st.secrets.get("TWILIO_SID")
128
  auth_token = st.secrets.get("TWILIO_TOKEN")
129
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
130
 
131
  if not all([account_sid, auth_token, GROQ_API_KEY]):
132
  st.warning("⚠️ Some secrets not found. Please enter missing credentials below:")
133
+ account_sid = st.text_input("Twilio SID", value=account_sid or "")
134
+ auth_token = st.text_input("Twilio Auth Token", type="password", value=auth_token or "")
135
+ GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
136
+
137
+ enable_autorefresh = st.checkbox("πŸ”„ Enable Auto-Refresh", value=True)
138
+ interval_seconds = st.selectbox("Refresh Interval (seconds)", options=[5, 10, 15, 30, 60], index=5)
139
+
140
+ if enable_autorefresh:
141
+ st_autorefresh(interval=interval_seconds * 1000, key="auto-refresh")
142
 
143
+ if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
144
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
145
 
146
+ @st.cache_data(show_spinner=False)
147
  def setup_knowledge_base():
148
  folder_path = "docs"
149
  all_text = ""
150
+ try:
151
+ for file in os.listdir(folder_path):
152
+ if file.endswith(".pdf"):
153
+ all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
154
+ elif file.endswith((".docx", ".doc")):
155
+ all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
156
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
157
+ chunks = chunk_text(all_text, tokenizer)
158
+ model = SentenceTransformer('all-mpnet-base-v2')
159
+ embeddings = model.encode(chunks)
160
+ dim = embeddings[0].shape[0]
161
+ index = faiss.IndexFlatL2(dim)
162
+ index.add(np.array(embeddings).astype('float32'))
163
+ return index, model, chunks
164
+ except Exception as e:
165
+ st.error(f"Error setting up knowledge base: {e}")
166
+ return None, None, None
167
 
168
  index, embedding_model, text_chunks = setup_knowledge_base()
169
+ if index is None:
170
+ st.stop()
171
 
172
  st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
173
 
174
+ with st.spinner("⏳ Checking for new WhatsApp messages..."):
175
+ question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
176
+ if question and msg_index > st.session_state.last_index:
177
+ st.session_state.last_index = msg_index
178
+ st.info(f"πŸ“₯ New Question from {sender}:\n\n> {question}")
179
+ relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
180
+ context = "\n\n".join(relevant_chunks)
181
+ answer = generate_answer_with_groq(question, context)
182
+ send_twilio_message(account_sid, auth_token, conversation_sid, answer)
183
+ st.success("πŸ“€ Answer sent via WhatsApp!")
184
+ st.markdown(f"### ✨ Answer:\n\n{answer}")
185
+ else:
186
+ st.caption("βœ… No new message yet. Waiting for refresh...")
 
 
187
  else:
188
+ st.warning("❗ Please provide all required credentials and conversation SID.")