masadonline commited on
Commit
40ee9a0
Β·
verified Β·
1 Parent(s): f6c29c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -91
app.py CHANGED
@@ -8,20 +8,19 @@ from transformers import AutoTokenizer
8
  import faiss
9
  import numpy as np
10
  import docx
11
- from groq import Groq
12
  import PyPDF2
13
  import requests
14
- from streamlit_autorefresh import st_autorefresh
15
 
16
- # Extract text from PDF with fallback
17
- # --- Document Loaders ---
 
 
18
  def extract_text_from_pdf(pdf_path):
19
  try:
20
  text = ""
21
  with open(pdf_path, 'rb') as file:
22
  pdf_reader = PyPDF2.PdfReader(file)
23
- for page_num in range(len(pdf_reader.pages)):
24
- page = pdf_reader.pages[page_num]
25
  page_text = page.extract_text()
26
  if page_text:
27
  text += page_text
@@ -36,6 +35,7 @@ def extract_text_from_docx(docx_path):
36
  except:
37
  return ""
38
 
 
39
  def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
40
  tokens = tokenizer.tokenize(text)
41
  chunks, start = [], 0
@@ -46,18 +46,16 @@ def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
46
  start += chunk_size - chunk_overlap
47
  return chunks
48
 
 
49
  def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
50
  question_embedding = embed_model.encode([question])[0]
51
  D, I = index.search(np.array([question_embedding]), k)
52
  return [text_chunks[i] for i in I[0]]
53
 
54
- # Generate answer using Groq API with retries and timeout
55
  def generate_answer_with_groq(question, context, retries=3, delay=2):
56
  url = "https://api.groq.com/openai/v1/chat/completions"
57
  api_key = os.environ.get("GROQ_API_KEY")
58
- if not api_key:
59
- return "⚠️ GROQ_API_KEY not set."
60
-
61
  headers = {
62
  "Authorization": f"Bearer {api_key}",
63
  "Content-Type": "application/json",
@@ -75,8 +73,7 @@ def generate_answer_with_groq(question, context, retries=3, delay=2):
75
  "content": (
76
  "You are ToyBot, a friendly and helpful WhatsApp assistant for an online toy shop. "
77
  "Your goal is to politely answer customer questions, help them choose the right toys, "
78
- "provide order or delivery information, explain return policies, and guide them through purchases. "
79
- "Always sound warm, helpful, and trustworthy like a professional customer support agent."
80
  )
81
  },
82
  {"role": "user", "content": prompt},
@@ -87,20 +84,16 @@ def generate_answer_with_groq(question, context, retries=3, delay=2):
87
 
88
  for attempt in range(retries):
89
  try:
90
- response = requests.post(url, headers=headers, json=payload, timeout=10)
91
- response.raise_for_status()
92
  result = response.json()
93
  return result['choices'][0]['message']['content'].strip()
94
- except requests.exceptions.HTTPError as e:
95
- if response.status_code == 503 and attempt < retries - 1:
96
  time.sleep(delay)
97
  continue
98
- else:
99
- return f"⚠️ Groq API HTTPError: {e}"
100
- except Exception as e:
101
- return f"⚠️ Groq API Error: {e}"
102
 
103
- # Twilio message fetch and send
104
  def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
105
  client = Client(account_sid, auth_token)
106
  messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
@@ -112,79 +105,84 @@ def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
112
  def send_twilio_message(account_sid, auth_token, conversation_sid, body):
113
  try:
114
  client = Client(account_sid, auth_token)
115
- message = client.conversations.v1.conversations(conversation_sid).messages.create(author="system", body=body)
 
 
 
116
  return message.sid
117
  except Exception as e:
118
  return str(e)
119
 
120
- # Streamlit UI
121
- st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
122
- st.title("πŸ“± Quasa – A Smart WhatsApp Chatbot")
123
-
124
- if "last_index" not in st.session_state:
125
- st.session_state.last_index = -1
126
-
127
- account_sid = st.secrets.get("TWILIO_SID")
128
- auth_token = st.secrets.get("TWILIO_TOKEN")
129
- GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
130
-
131
- if not all([account_sid, auth_token, GROQ_API_KEY]):
132
- st.warning("⚠️ Some secrets not found. Please enter missing credentials below:")
133
- account_sid = st.text_input("Twilio SID", value=account_sid or "")
134
- auth_token = st.text_input("Twilio Auth Token", type="password", value=auth_token or "")
135
- GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
136
-
137
- conversation_sid = st.text_input("Enter Conversation SID", value="")
138
-
139
- enable_autorefresh = st.checkbox("πŸ”„ Enable Auto-Refresh", value=True)
140
- interval_seconds = st.selectbox("Refresh Interval (seconds)", options=[5, 10, 15, 30, 60], index=1)
141
-
142
- if enable_autorefresh:
143
- st_autorefresh(interval=interval_seconds * 1000, key="auto-refresh")
144
-
145
- if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
146
- os.environ["GROQ_API_KEY"] = GROQ_API_KEY
147
-
148
- @st.cache_data(show_spinner=False)
149
- def setup_knowledge_base():
150
- folder_path = "docs"
151
- all_text = ""
152
- try:
153
- for file in os.listdir(folder_path):
154
- if file.endswith(".pdf"):
155
- all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
156
- elif file.endswith((".docx", ".doc")):
157
- all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
158
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
159
- chunks = chunk_text(all_text, tokenizer)
160
- model = SentenceTransformer('all-mpnet-base-v2')
161
- embeddings = model.encode(chunks)
162
- dim = embeddings[0].shape[0]
163
- index = faiss.IndexFlatL2(dim)
164
- index.add(np.array(embeddings).astype('float32'))
165
- return index, model, chunks
166
- except Exception as e:
167
- st.error(f"Error setting up knowledge base: {e}")
168
- return None, None, None
169
-
170
- index, embedding_model, text_chunks = setup_knowledge_base()
171
- if index is None:
172
- st.stop()
173
-
174
- st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
175
-
176
- with st.spinner("⏳ Checking for new WhatsApp messages..."):
177
- question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
178
- if question and msg_index > st.session_state.last_index:
179
- st.session_state.last_index = msg_index
180
- st.info(f"πŸ“₯ New Question from {sender}:\n\n> {question}")
181
- relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
182
- context = "\n\n".join(relevant_chunks)
183
- answer = generate_answer_with_groq(question, context)
184
- send_twilio_message(account_sid, auth_token, conversation_sid, answer)
185
- st.success("πŸ“€ Answer sent via WhatsApp!")
186
- st.markdown(f"### ✨ Answer:\n\n{answer}")
187
  else:
188
- st.caption("βœ… No new message yet. Waiting for refresh...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  else:
190
- st.warning("❗ Please provide all required credentials and conversation SID.")
 
8
  import faiss
9
  import numpy as np
10
  import docx
 
11
  import PyPDF2
12
  import requests
 
13
 
14
+ # --- Streamlit Config ---
15
+ st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
16
+
17
+ # --- Utility: Extract Text ---
18
  def extract_text_from_pdf(pdf_path):
19
  try:
20
  text = ""
21
  with open(pdf_path, 'rb') as file:
22
  pdf_reader = PyPDF2.PdfReader(file)
23
+ for page in pdf_reader.pages:
 
24
  page_text = page.extract_text()
25
  if page_text:
26
  text += page_text
 
35
  except:
36
  return ""
37
 
38
+ # --- Chunking ---
39
  def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
40
  tokens = tokenizer.tokenize(text)
41
  chunks, start = [], 0
 
46
  start += chunk_size - chunk_overlap
47
  return chunks
48
 
49
+ # --- Retrieve Relevant Chunks ---
50
  def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
51
  question_embedding = embed_model.encode([question])[0]
52
  D, I = index.search(np.array([question_embedding]), k)
53
  return [text_chunks[i] for i in I[0]]
54
 
55
+ # --- GROQ API Call ---
56
  def generate_answer_with_groq(question, context, retries=3, delay=2):
57
  url = "https://api.groq.com/openai/v1/chat/completions"
58
  api_key = os.environ.get("GROQ_API_KEY")
 
 
 
59
  headers = {
60
  "Authorization": f"Bearer {api_key}",
61
  "Content-Type": "application/json",
 
73
  "content": (
74
  "You are ToyBot, a friendly and helpful WhatsApp assistant for an online toy shop. "
75
  "Your goal is to politely answer customer questions, help them choose the right toys, "
76
+ "provide order or delivery information, explain return policies, and guide them through purchases."
 
77
  )
78
  },
79
  {"role": "user", "content": prompt},
 
84
 
85
  for attempt in range(retries):
86
  try:
87
+ response = requests.post(url, headers=headers, json=payload)
 
88
  result = response.json()
89
  return result['choices'][0]['message']['content'].strip()
90
+ except Exception as e:
91
+ if "503" in str(e) and attempt < retries - 1:
92
  time.sleep(delay)
93
  continue
94
+ return f"⚠️ Groq API Error: {str(e)}"
 
 
 
95
 
96
+ # --- Twilio Helpers ---
97
  def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
98
  client = Client(account_sid, auth_token)
99
  messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
 
105
  def send_twilio_message(account_sid, auth_token, conversation_sid, body):
106
  try:
107
  client = Client(account_sid, auth_token)
108
+ message = client.conversations.v1.conversations(conversation_sid).messages.create(
109
+ author="system",
110
+ body=body
111
+ )
112
  return message.sid
113
  except Exception as e:
114
  return str(e)
115
 
116
+ # --- UI Styling ---
117
+ st.markdown("""
118
+ <style>
119
+ .big-font { font-size: 28px !important; font-weight: bold; }
120
+ .small-font { font-size: 16px; color: #555; }
121
+ .stButton > button {
122
+ background-color: #0066CC; color: white;
123
+ padding: 0.5em 1em; border-radius: 8px; font-size: 18px;
124
+ }
125
+ .stTextInput > div > input { font-size: 16px; }
126
+ </style>
127
+ """, unsafe_allow_html=True)
128
+
129
+ st.markdown('<div class="big-font">πŸ“± Quasa – A Smart WhatsApp Chatbot</div>', unsafe_allow_html=True)
130
+ st.markdown('<div class="small-font">Talk to your documents using WhatsApp. Powered by Groq, Twilio, and RAG.</div>', unsafe_allow_html=True)
131
+
132
+ # --- Credentials ---
133
+ account_sid = st.secrets.get("TWILIO_SID") or st.text_input("πŸ” Twilio SID", "")
134
+ auth_token = st.secrets.get("TWILIO_TOKEN") or st.text_input("πŸ” Twilio Auth Token", type="password")
135
+ groq_key = st.secrets.get("GROQ_API_KEY") or st.text_input("πŸ” GROQ API Key", type="password")
136
+
137
+ if all([account_sid, auth_token, groq_key]):
138
+ os.environ["GROQ_API_KEY"] = groq_key
139
+
140
+ # Conversation SID Input
141
+ conversation_sid = st.text_input("πŸ’¬ Enter Twilio Conversation SID", key="conv_sid")
142
+
143
+ if st.button("πŸ”„ Load Conversation"):
144
+ if not conversation_sid:
145
+ st.warning("Please enter a valid Conversation SID.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  else:
147
+ st.success(f"Conversation SID `{conversation_sid}` loaded!")
148
+
149
+ @st.cache_resource
150
+ def setup_knowledge_base():
151
+ folder_path = "docs"
152
+ all_text = ""
153
+ for file in os.listdir(folder_path):
154
+ if file.endswith(".pdf"):
155
+ all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
156
+ elif file.endswith((".docx", ".doc")):
157
+ all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
158
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
159
+ chunks = chunk_text(all_text, tokenizer)
160
+ model = SentenceTransformer('all-mpnet-base-v2')
161
+ embeddings = model.encode(chunks)
162
+ dim = embeddings[0].shape[0]
163
+ index = faiss.IndexFlatL2(dim)
164
+ index.add(np.array(embeddings))
165
+ return index, model, chunks
166
+
167
+ index, embedding_model, text_chunks = setup_knowledge_base()
168
+ st.success("βœ… Knowledge base loaded!")
169
+
170
+ if "last_processed_index" not in st.session_state:
171
+ st.session_state.last_processed_index = -1
172
+
173
+ if st.button("πŸ“² Check WhatsApp for New Message"):
174
+ with st.spinner("Checking messages..."):
175
+ question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
176
+ if question and msg_index != st.session_state.last_processed_index:
177
+ st.session_state.last_processed_index = msg_index
178
+ st.info(f"πŸ“₯ New message from **{sender}**:\n\n> {question}")
179
+ relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
180
+ context = "\n\n".join(relevant_chunks)
181
+ answer = generate_answer_with_groq(question, context)
182
+ send_twilio_message(account_sid, auth_token, conversation_sid, answer)
183
+ st.success("πŸ“€ Answer sent to user!")
184
+ st.markdown(f"### 🧠 Answer:\n\n{answer}")
185
+ else:
186
+ st.warning("No new messages found.")
187
  else:
188
+ st.warning("Please enter all required credentials.")