masadonline commited on
Commit
7088627
Β·
verified Β·
1 Parent(s): eff646e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -202
app.py CHANGED
@@ -1,219 +1,126 @@
1
- import os
2
- import time
3
  import streamlit as st
4
  from twilio.rest import Client
5
- from pdfminer.high_level import extract_text
6
- from sentence_transformers import SentenceTransformer
7
- from transformers import AutoTokenizer
8
- import faiss
9
- import numpy as np
10
- import docx
11
- import PyPDF2
12
  import requests
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # --- Streamlit page config MUST be first ---
15
- st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
16
 
17
- # --- Auto-refresh every 10 seconds ---
18
- if "last_refresh" not in st.session_state:
19
- st.session_state.last_refresh = time.time()
20
- elif time.time() - st.session_state.last_refresh > 10:
21
- st.session_state.last_refresh = time.time()
22
- st.experimental_rerun()
23
 
24
- # --- Document Loaders ---
25
- def extract_text_from_pdf(pdf_path):
26
- try:
27
- text = ""
28
- with open(pdf_path, 'rb') as file:
29
- pdf_reader = PyPDF2.PdfReader(file)
30
- for page_num in range(len(pdf_reader.pages)):
31
- page = pdf_reader.pages[page_num]
32
- page_text = page.extract_text()
33
- if page_text:
34
- text += page_text
35
- return text
36
- except:
37
- return extract_text(pdf_path)
38
-
39
- def extract_text_from_docx(docx_path):
40
- try:
41
- doc = docx.Document(docx_path)
42
- return '\n'.join(para.text for para in doc.paragraphs)
43
- except:
44
- return ""
45
-
46
- def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
47
- tokens = tokenizer.tokenize(text)
48
- chunks, start = [], 0
49
- while start < len(tokens):
50
- end = min(start + chunk_size, len(tokens))
51
- chunk_tokens = tokens[start:end]
52
- chunks.append(tokenizer.convert_tokens_to_string(chunk_tokens))
53
- start += chunk_size - chunk_overlap
54
- return chunks
55
-
56
- def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
57
- question_embedding = embed_model.encode([question])[0]
58
- D, I = index.search(np.array([question_embedding]), k)
59
- return [text_chunks[i] for i in I[0]]
60
-
61
- # --- GROQ Answer Generation ---
62
- def generate_answer_with_groq(question, context, retries=3, delay=2):
63
- url = "https://api.groq.com/openai/v1/chat/completions"
64
- api_key = os.environ["GROQ_API_KEY"]
65
- headers = {
66
- "Authorization": f"Bearer {api_key}",
67
- "Content-Type": "application/json",
68
- }
69
- prompt = (
70
- f"Customer asked: '{question}'\n\n"
71
- f"Here is the relevant product or policy info to help:\n{context}\n\n"
72
- f"Respond in a friendly and helpful tone as a toy shop support agent."
73
- )
74
- payload = {
75
- "model": "llama3-8b-8192",
76
- "messages": [
77
- {
78
- "role": "system",
79
- "content": (
80
- "You are ToyBot, a friendly and helpful WhatsApp assistant for an online toy shop. "
81
- "Your goal is to politely answer customer questions, help them choose the right toys, "
82
- "provide order or delivery information, explain return policies, and guide them through purchases. "
83
- "Always sound warm, helpful, and trustworthy like a professional customer support agent."
84
- )
85
- },
86
- {"role": "user", "content": prompt},
87
- ],
88
- "temperature": 0.5,
89
- "max_tokens": 300,
90
- }
91
-
92
- for attempt in range(retries):
93
- try:
94
- response = requests.post(url, headers=headers, json=payload)
95
- result = response.json()
96
- return result['choices'][0]['message']['content'].strip()
97
- except Exception as e:
98
- if "503" in str(e) and attempt < retries - 1:
99
- time.sleep(delay)
100
- continue
101
- else:
102
- return f"⚠️ Groq API Error: {str(e)}"
103
-
104
- # --- Twilio Chat Handlers ---
105
- def fetch_latest_conversation_sid(account_sid, auth_token):
106
- try:
107
- client = Client(account_sid, auth_token)
108
- conversations = client.conversations.v1.conversations.list(limit=1)
109
- if conversations:
110
- return conversations[0].sid
111
- except Exception as e:
112
- st.error(f"⚠️ Could not fetch conversation SID: {e}")
113
- return None
114
 
115
- def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
116
- client = Client(account_sid, auth_token)
117
- messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
118
- for msg in reversed(messages):
119
- if msg.author.startswith("whatsapp:"):
120
- return msg.body, msg.author, msg.index
121
- return None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  def send_twilio_message(account_sid, auth_token, conversation_sid, body):
124
  try:
125
  client = Client(account_sid, auth_token)
126
- participants = client.conversations.v1.conversations(conversation_sid).participants.list()
127
- bot_identity = None
128
- for p in participants:
129
- if p.identity.startswith("whatsapp:"):
130
- bot_identity = p.identity
131
- break
132
- if not bot_identity:
133
- return "⚠️ Bot identity with whatsapp: prefix not found in participants."
134
-
135
  message = client.conversations.v1.conversations(conversation_sid).messages.create(
136
- author=bot_identity,
137
  body=body
138
  )
139
  return message.sid
140
  except Exception as e:
141
- return str(e)
142
-
143
- # --- Streamlit UI ---
144
- st.markdown("""
145
- <style>
146
- .big-font { font-size: 28px !important; font-weight: bold; }
147
- .small-font { font-size: 16px; color: #555; }
148
- .stButton > button {
149
- background-color: #0066CC; color: white;
150
- padding: 0.5em 1em; border-radius: 8px; font-size: 18px;
151
- }
152
- .stTextInput > div > input { font-size: 16px; }
153
- </style>
154
- """, unsafe_allow_html=True)
155
-
156
- st.markdown('<div class="big-font">πŸ“± Quasa – A Smart WhatsApp Chatbot</div>', unsafe_allow_html=True)
157
- st.markdown('<div class="small-font">Talk to your documents using WhatsApp. Powered by Groq, Twilio, and RAG.</div>', unsafe_allow_html=True)
158
-
159
- # Load secrets or fallback
160
- account_sid = st.secrets.get("TWILIO_SID")
161
- auth_token = st.secrets.get("TWILIO_TOKEN")
162
- GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
163
-
164
- if not all([account_sid, auth_token, GROQ_API_KEY]):
165
- st.warning("⚠️ Some secrets are missing. Please provide them manually:")
166
- account_sid = st.text_input("Twilio SID", value=account_sid or "")
167
- auth_token = st.text_input("Twilio Auth Token", type="password", value=auth_token or "")
168
- GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
169
-
170
- if all([account_sid, auth_token, GROQ_API_KEY]):
171
- os.environ["GROQ_API_KEY"] = GROQ_API_KEY
172
-
173
- conversation_sid = fetch_latest_conversation_sid(account_sid, auth_token)
174
-
175
- if conversation_sid:
176
-
177
- @st.cache_resource
178
- def setup_knowledge_base():
179
- folder_path = "docs"
180
- all_text = ""
181
- for file in os.listdir(folder_path):
182
- if file.endswith(".pdf"):
183
- all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
184
- elif file.endswith((".docx", ".doc")):
185
- all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
186
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
187
- chunks = chunk_text(all_text, tokenizer)
188
- model = SentenceTransformer('all-mpnet-base-v2')
189
- embeddings = model.encode(chunks)
190
- dim = embeddings[0].shape[0]
191
- index = faiss.IndexFlatL2(dim)
192
- index.add(np.array(embeddings))
193
- return index, model, chunks
194
-
195
- index, embedding_model, text_chunks = setup_knowledge_base()
196
-
197
- st.success(f"βœ… Knowledge base ready. Monitoring WhatsApp messages for conversation: `{conversation_sid}`")
198
-
199
- if "last_processed_index" not in st.session_state:
200
- st.session_state.last_processed_index = -1
201
-
202
- with st.spinner("Checking for new WhatsApp messages..."):
203
- question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
204
-
205
- if question and msg_index != st.session_state.last_processed_index:
206
- st.session_state.last_processed_index = msg_index
207
- st.info(f"πŸ“₯ New question from **{sender}**:\n\n> {question}")
208
- relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
209
- context = "\n\n".join(relevant_chunks)
210
- answer = generate_answer_with_groq(question, context)
211
- send_twilio_message(account_sid, auth_token, conversation_sid, answer)
212
- st.success("πŸ“€ Answer sent back to user on WhatsApp!")
213
- st.markdown(f"### ✨ Answer:\n\n{answer}")
214
- else:
215
- st.warning("No new messages found.")
216
- else:
217
- st.warning("❗ No active conversation found.")
218
  else:
219
- st.warning("❗ Please provide all required credentials.")
 
 
 
1
  import streamlit as st
2
  from twilio.rest import Client
 
 
 
 
 
 
 
3
  import requests
4
+ from PyPDF2 import PdfReader
5
+ from groq import Groq
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.docstore.document import Document
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain.chains import RetrievalQA
12
+ from langchain.llms.base import LLM
13
+ from langchain_core.outputs import Generation
14
+ import tempfile
15
+ import os
16
 
17
+ # ---- CONFIG ---- #
18
+ st.set_page_config(page_title="Quasa – Smart WhatsApp Chatbot", layout="wide")
19
 
20
+ # ---- SESSION STATE ---- #
21
+ if "conversation_sid" not in st.session_state:
22
+ st.session_state.conversation_sid = ""
 
 
 
23
 
24
+ if "user_message" not in st.session_state:
25
+ st.session_state.user_message = ""
26
+
27
+ if "response" not in st.session_state:
28
+ st.session_state.response = ""
29
+
30
+ # ---- SIDEBAR ---- #
31
+ with st.sidebar:
32
+ st.title("πŸ“± Quasa Setup")
33
+ groq_api_key = st.text_input("πŸ”‘ GROQ API Key", type="password")
34
+ twilio_sid = st.text_input("🧩 Twilio Account SID", type="password")
35
+ twilio_token = st.text_input("πŸ” Twilio Auth Token", type="password")
36
+ twilio_conv_sid = st.text_input("πŸ’¬ Twilio Conversation SID")
37
+
38
+ uploaded_file = st.file_uploader("πŸ“„ Upload Knowledge PDF", type=["pdf"])
39
+
40
+ if uploaded_file:
41
+ st.success("PDF uploaded. Ready to chat!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ # ---- LLM Setup ---- #
44
+ class SimpleGroqLLM(LLM):
45
+ def __init__(self, api_key: str, model_name="llama3-8b-8192"):
46
+ self.client = Groq(api_key=api_key)
47
+ self.model_name = model_name
48
+
49
+ def _call(self, prompt: str, stop=None) -> str:
50
+ response = self.client.chat.completions.create(
51
+ messages=[{"role": "user", "content": prompt}],
52
+ model=self.model_name
53
+ )
54
+ return response.choices[0].message.content
55
+
56
+ @property
57
+ def _llm_type(self) -> str:
58
+ return "simple_groq"
59
+
60
+ # ---- HELPER FUNCTIONS ---- #
61
+ def extract_text_from_pdf(file) -> str:
62
+ reader = PdfReader(file)
63
+ text = ""
64
+ for page in reader.pages:
65
+ text += page.extract_text() + "\n"
66
+ return text
67
+
68
+ def create_vector_store(text: str):
69
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
70
+ chunks = splitter.split_text(text)
71
+ docs = [Document(page_content=chunk) for chunk in chunks]
72
+ embeddings = HuggingFaceEmbeddings()
73
+ return FAISS.from_documents(docs, embeddings)
74
+
75
+ def get_response_from_rag(query, vectorstore, groq_api_key):
76
+ retriever = vectorstore.as_retriever()
77
+ llm = SimpleGroqLLM(api_key=groq_api_key)
78
+
79
+ qa_chain = RetrievalQA.from_chain_type(
80
+ llm=llm,
81
+ retriever=retriever,
82
+ return_source_documents=False
83
+ )
84
+ return qa_chain.run(query)
85
 
86
  def send_twilio_message(account_sid, auth_token, conversation_sid, body):
87
  try:
88
  client = Client(account_sid, auth_token)
 
 
 
 
 
 
 
 
 
89
  message = client.conversations.v1.conversations(conversation_sid).messages.create(
90
+ author='ChatBot', # Fixed: use static name instead of WhatsApp number
91
  body=body
92
  )
93
  return message.sid
94
  except Exception as e:
95
+ return f"⚠️ Failed to send message: {e}"
96
+
97
+ # ---- MAIN ---- #
98
+ st.title("πŸ€– Quasa – Smart WhatsApp Chatbot")
99
+
100
+ if uploaded_file and groq_api_key and twilio_sid and twilio_token and twilio_conv_sid:
101
+ st.session_state.conversation_sid = twilio_conv_sid
102
+
103
+ # Extract and vectorize
104
+ with st.spinner("πŸ” Reading and indexing document..."):
105
+ text = extract_text_from_pdf(uploaded_file)
106
+ vectorstore = create_vector_store(text)
107
+
108
+ # Input + response area
109
+ user_input = st.text_input("πŸ’¬ Ask a question (from WhatsApp user):", key="input")
110
+
111
+ if st.button("πŸ“© Respond & Send"):
112
+ if user_input:
113
+ with st.spinner("πŸ€– Generating response..."):
114
+ answer = get_response_from_rag(user_input, vectorstore, groq_api_key)
115
+ st.success("βœ… Response Generated:")
116
+ st.write(answer)
117
+
118
+ # Send to Twilio
119
+ with st.spinner("πŸ“€ Sending to WhatsApp..."):
120
+ msg_sid = send_twilio_message(twilio_sid, twilio_token, twilio_conv_sid, answer)
121
+ st.info(f"πŸ“¨ Message SID: `{msg_sid}`")
122
+ else:
123
+ st.warning("❗Please enter a question to proceed.")
124
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  else:
126
+ st.warning("🚧 Please upload a PDF and fill in all credentials in the sidebar to proceed.")