masadonline commited on
Commit
19cd752
Β·
verified Β·
1 Parent(s): b94cf70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -104
app.py CHANGED
@@ -1,5 +1,8 @@
 
 
1
  import os
2
  import time
 
3
  import streamlit as st
4
  from twilio.rest import Client
5
  from pdfminer.high_level import extract_text
@@ -11,16 +14,14 @@ import docx
11
  from groq import Groq
12
  import PyPDF2
13
  import requests
14
- from streamlit_autorefresh import st_autorefresh
15
 
16
- # Extract text from PDF
17
  def extract_text_from_pdf(pdf_path):
18
  try:
19
  text = ""
20
  with open(pdf_path, 'rb') as file:
21
- pdf_reader = PyPDF2.PdfReader(file)
22
- for page_num in range(len(pdf_reader.pages)):
23
- page = pdf_reader.pages[page_num]
24
  page_text = page.extract_text()
25
  if page_text:
26
  text += page_text
@@ -35,6 +36,7 @@ def extract_text_from_docx(docx_path):
35
  except:
36
  return ""
37
 
 
38
  def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
39
  tokens = tokenizer.tokenize(text)
40
  chunks, start = [], 0
@@ -46,16 +48,14 @@ def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
46
  return chunks
47
 
48
  def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
49
- question_embedding = embed_model.encode([question])[0]
50
- D, I = index.search(np.array([question_embedding]), k)
51
  return [text_chunks[i] for i in I[0]]
52
 
53
- def generate_answer_with_groq(question, context, retries=3, delay=2):
 
54
  url = "https://api.groq.com/openai/v1/chat/completions"
55
  api_key = os.environ.get("GROQ_API_KEY")
56
- if not api_key:
57
- return "⚠️ GROQ_API_KEY not set."
58
-
59
  headers = {
60
  "Authorization": f"Bearer {api_key}",
61
  "Content-Type": "application/json",
@@ -74,7 +74,6 @@ def generate_answer_with_groq(question, context, retries=3, delay=2):
74
  "You are ToyBot, a friendly and helpful WhatsApp assistant for an online toy shop. "
75
  "Your goal is to politely answer customer questions, help them choose the right toys, "
76
  "provide order or delivery information, explain return policies, and guide them through purchases. "
77
- "Always sound warm, helpful, and trustworthy like a professional customer support agent."
78
  )
79
  },
80
  {"role": "user", "content": prompt},
@@ -82,124 +81,103 @@ def generate_answer_with_groq(question, context, retries=3, delay=2):
82
  "temperature": 0.5,
83
  "max_tokens": 300,
84
  }
 
 
 
85
 
86
- for attempt in range(retries):
87
- try:
88
- response = requests.post(url, headers=headers, json=payload, timeout=10)
89
- response.raise_for_status()
90
- result = response.json()
91
- return result['choices'][0]['message']['content'].strip()
92
- except requests.exceptions.HTTPError as e:
93
- if response.status_code == 503 and attempt < retries - 1:
94
- time.sleep(delay)
95
- continue
96
- else:
97
- return f"⚠️ Groq API HTTPError: {e}"
98
- except Exception as e:
99
- return f"⚠️ Groq API Error: {e}"
100
-
101
- def fetch_latest_incoming_message(client, conversation_sid):
102
- messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
103
- for msg in reversed(messages):
104
- if msg.author.startswith("whatsapp:"):
105
- return msg.body, msg.author, msg.index
106
- return None, None, None
107
-
108
- def send_twilio_message(client, conversation_sid, body):
109
- try:
110
- message = client.conversations.v1.conversations(conversation_sid).messages.create(author="system", body=body)
111
- return message.sid
112
- except Exception as e:
113
- return str(e)
114
-
115
- # Automatically get the latest WhatsApp conversation SID
116
  def get_latest_whatsapp_conversation_sid(client):
117
  conversations = client.conversations.v1.conversations.list(limit=20)
118
  for convo in conversations:
119
  try:
120
  participants = client.conversations.v1.conversations(convo.sid).participants.list()
121
  for p in participants:
122
- if p.identity and p.identity.startswith("whatsapp:"):
123
- return convo.sid
124
- if p.messaging_binding and p.messaging_binding.get("address", "").startswith("whatsapp:"):
125
  return convo.sid
126
- except Exception as e:
127
  continue
128
  return conversations[0].sid if conversations else None
129
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
- # Streamlit UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
133
  st.title("πŸ“± Quasa – A Smart WhatsApp Chatbot")
134
 
135
- if "last_index" not in st.session_state:
136
- st.session_state.last_index = -1
137
-
138
  account_sid = st.secrets.get("TWILIO_SID")
139
  auth_token = st.secrets.get("TWILIO_TOKEN")
140
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
141
 
142
  if not all([account_sid, auth_token, GROQ_API_KEY]):
143
- st.warning("⚠️ Some secrets not found. Please enter missing credentials below:")
144
  account_sid = st.text_input("Twilio SID", value=account_sid or "")
145
- auth_token = st.text_input("Twilio Auth Token", type="password", value=auth_token or "")
146
  GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
147
 
148
- enable_autorefresh = st.checkbox("πŸ”„ Enable Auto-Refresh", value=True)
149
- interval_seconds = st.selectbox("Refresh Interval (seconds)", options=[5, 10, 15, 30, 60], index=4)
150
-
151
- if enable_autorefresh:
152
- st_autorefresh(interval=interval_seconds * 1000, key="auto-refresh")
153
-
154
  if all([account_sid, auth_token, GROQ_API_KEY]):
155
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
156
  client = Client(account_sid, auth_token)
157
  conversation_sid = get_latest_whatsapp_conversation_sid(client)
158
 
159
- if conversation_sid is None:
 
 
 
 
 
160
  st.error("❌ No WhatsApp conversation found.")
161
- st.stop()
162
-
163
- @st.cache_data(show_spinner=False)
164
- def setup_knowledge_base():
165
- folder_path = "docs"
166
- all_text = ""
167
- try:
168
- for file in os.listdir(folder_path):
169
- if file.endswith(".pdf"):
170
- all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
171
- elif file.endswith((".docx", ".doc")):
172
- all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
173
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
174
- chunks = chunk_text(all_text, tokenizer)
175
- model = SentenceTransformer('all-mpnet-base-v2')
176
- embeddings = model.encode(chunks)
177
- dim = embeddings[0].shape[0]
178
- index = faiss.IndexFlatL2(dim)
179
- index.add(np.array(embeddings).astype('float32'))
180
- return index, model, chunks
181
- except Exception as e:
182
- st.error(f"Error setting up knowledge base: {e}")
183
- return None, None, None
184
-
185
- index, embedding_model, text_chunks = setup_knowledge_base()
186
- if index is None:
187
- st.stop()
188
-
189
- st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
190
-
191
- with st.spinner("⏳ Checking for new WhatsApp messages..."):
192
- question, sender, msg_index = fetch_latest_incoming_message(client, conversation_sid)
193
- if question and msg_index > st.session_state.last_index:
194
- st.session_state.last_index = msg_index
195
- st.info(f"πŸ“₯ New Question from {sender}:\n\n> {question}")
196
- relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
197
- context = "\n\n".join(relevant_chunks)
198
- answer = generate_answer_with_groq(question, context)
199
- send_twilio_message(client, conversation_sid, answer)
200
- st.success("πŸ“€ Answer sent via WhatsApp!")
201
- st.markdown(f"### ✨ Answer:\n\n{answer}")
202
- else:
203
- st.caption("βœ… No new message yet. Waiting for refresh...")
204
- else:
205
- st.warning("❗ Please provide all required credentials.")
 
1
+ # app.py
2
+
3
  import os
4
  import time
5
+ import threading
6
  import streamlit as st
7
  from twilio.rest import Client
8
  from pdfminer.high_level import extract_text
 
14
  from groq import Groq
15
  import PyPDF2
16
  import requests
 
17
 
18
+ # --- Text Extraction Utilities ---
19
  def extract_text_from_pdf(pdf_path):
20
  try:
21
  text = ""
22
  with open(pdf_path, 'rb') as file:
23
+ reader = PyPDF2.PdfReader(file)
24
+ for page in reader.pages:
 
25
  page_text = page.extract_text()
26
  if page_text:
27
  text += page_text
 
36
  except:
37
  return ""
38
 
39
+ # --- Chunking & Retrieval ---
40
  def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
41
  tokens = tokenizer.tokenize(text)
42
  chunks, start = [], 0
 
48
  return chunks
49
 
50
  def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
51
+ q_embedding = embed_model.encode([question])[0]
52
+ D, I = index.search(np.array([q_embedding]), k)
53
  return [text_chunks[i] for i in I[0]]
54
 
55
+ # --- Groq Answer Generator ---
56
+ def generate_answer_with_groq(question, context):
57
  url = "https://api.groq.com/openai/v1/chat/completions"
58
  api_key = os.environ.get("GROQ_API_KEY")
 
 
 
59
  headers = {
60
  "Authorization": f"Bearer {api_key}",
61
  "Content-Type": "application/json",
 
74
  "You are ToyBot, a friendly and helpful WhatsApp assistant for an online toy shop. "
75
  "Your goal is to politely answer customer questions, help them choose the right toys, "
76
  "provide order or delivery information, explain return policies, and guide them through purchases. "
 
77
  )
78
  },
79
  {"role": "user", "content": prompt},
 
81
  "temperature": 0.5,
82
  "max_tokens": 300,
83
  }
84
+ response = requests.post(url, headers=headers, json=payload)
85
+ response.raise_for_status()
86
+ return response.json()['choices'][0]['message']['content'].strip()
87
 
88
+ # --- Twilio Functions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def get_latest_whatsapp_conversation_sid(client):
90
  conversations = client.conversations.v1.conversations.list(limit=20)
91
  for convo in conversations:
92
  try:
93
  participants = client.conversations.v1.conversations(convo.sid).participants.list()
94
  for p in participants:
95
+ if (p.identity and p.identity.startswith("whatsapp:")) or (
96
+ p.messaging_binding and p.messaging_binding.get("address", "").startswith("whatsapp:")
97
+ ):
98
  return convo.sid
99
+ except:
100
  continue
101
  return conversations[0].sid if conversations else None
102
 
103
+ def fetch_latest_incoming_message(client, conversation_sid):
104
+ messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
105
+ for msg in reversed(messages):
106
+ if msg.author.startswith("whatsapp:"):
107
+ return msg.body, msg.author, msg.index
108
+ return None, None, None
109
+
110
+ def send_twilio_message(client, conversation_sid, body):
111
+ return client.conversations.v1.conversations(conversation_sid).messages.create(
112
+ author="system", body=body
113
+ )
114
 
115
+ # --- Load Knowledge Base ---
116
+ def setup_knowledge_base():
117
+ folder_path = "docs"
118
+ all_text = ""
119
+ for file in os.listdir(folder_path):
120
+ path = os.path.join(folder_path, file)
121
+ if file.endswith(".pdf"):
122
+ all_text += extract_text_from_pdf(path) + "\n"
123
+ elif file.endswith((".docx", ".doc")):
124
+ all_text += extract_text_from_docx(path) + "\n"
125
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
126
+ chunks = chunk_text(all_text, tokenizer)
127
+ model = SentenceTransformer('all-mpnet-base-v2')
128
+ embeddings = model.encode(chunks)
129
+ dim = embeddings[0].shape[0]
130
+ index = faiss.IndexFlatL2(dim)
131
+ index.add(np.array(embeddings).astype('float32'))
132
+ return index, model, chunks
133
+
134
+ # --- Background Polling Thread ---
135
+ def start_message_monitor(client, convo_sid, index, embed_model, text_chunks):
136
+ last_index = -1
137
+
138
+ def poll_loop():
139
+ nonlocal last_index
140
+ while True:
141
+ try:
142
+ question, sender, msg_index = fetch_latest_incoming_message(client, convo_sid)
143
+ if question and msg_index > last_index:
144
+ last_index = msg_index
145
+ print(f"\nπŸ“₯ New Message from {sender}: {question}")
146
+ context = "\n\n".join(retrieve_chunks(question, index, embed_model, text_chunks))
147
+ answer = generate_answer_with_groq(question, context)
148
+ send_twilio_message(client, convo_sid, answer)
149
+ print(f"πŸ“€ Sent Reply: {answer}")
150
+ time.sleep(3)
151
+ except Exception as e:
152
+ print("❌ Error in polling loop:", e)
153
+ time.sleep(5)
154
+
155
+ thread = threading.Thread(target=poll_loop, daemon=True)
156
+ thread.start()
157
+
158
+ # --- Streamlit UI ---
159
  st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
160
  st.title("πŸ“± Quasa – A Smart WhatsApp Chatbot")
161
 
 
 
 
162
  account_sid = st.secrets.get("TWILIO_SID")
163
  auth_token = st.secrets.get("TWILIO_TOKEN")
164
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
165
 
166
  if not all([account_sid, auth_token, GROQ_API_KEY]):
167
+ st.warning("⚠️ Provide all credentials below:")
168
  account_sid = st.text_input("Twilio SID", value=account_sid or "")
169
+ auth_token = st.text_input("Twilio Token", type="password", value=auth_token or "")
170
  GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
171
 
 
 
 
 
 
 
172
  if all([account_sid, auth_token, GROQ_API_KEY]):
173
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
174
  client = Client(account_sid, auth_token)
175
  conversation_sid = get_latest_whatsapp_conversation_sid(client)
176
 
177
+ if conversation_sid:
178
+ st.success("βœ… WhatsApp connected. Initializing chatbot...")
179
+ index, model, chunks = setup_knowledge_base()
180
+ start_message_monitor(client, conversation_sid, index, model, chunks)
181
+ st.success("🟒 Chatbot is running in background and will reply automatically.")
182
+ else:
183
  st.error("❌ No WhatsApp conversation found.")