masadonline commited on
Commit
e992967
Β·
verified Β·
1 Parent(s): 40ee9a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -100
app.py CHANGED
@@ -2,25 +2,25 @@ import os
2
  import time
3
  import streamlit as st
4
  from twilio.rest import Client
 
5
  from pdfminer.high_level import extract_text
6
  from sentence_transformers import SentenceTransformer
7
  from transformers import AutoTokenizer
8
  import faiss
9
  import numpy as np
10
  import docx
 
11
  import PyPDF2
12
  import requests
13
 
14
- # --- Streamlit Config ---
15
- st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
16
-
17
- # --- Utility: Extract Text ---
18
  def extract_text_from_pdf(pdf_path):
19
  try:
20
  text = ""
21
  with open(pdf_path, 'rb') as file:
22
  pdf_reader = PyPDF2.PdfReader(file)
23
- for page in pdf_reader.pages:
 
24
  page_text = page.extract_text()
25
  if page_text:
26
  text += page_text
@@ -35,7 +35,6 @@ def extract_text_from_docx(docx_path):
35
  except:
36
  return ""
37
 
38
- # --- Chunking ---
39
  def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
40
  tokens = tokenizer.tokenize(text)
41
  chunks, start = [], 0
@@ -46,36 +45,24 @@ def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
46
  start += chunk_size - chunk_overlap
47
  return chunks
48
 
49
- # --- Retrieve Relevant Chunks ---
50
  def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
51
  question_embedding = embed_model.encode([question])[0]
52
  D, I = index.search(np.array([question_embedding]), k)
53
  return [text_chunks[i] for i in I[0]]
54
 
55
- # --- GROQ API Call ---
56
  def generate_answer_with_groq(question, context, retries=3, delay=2):
57
  url = "https://api.groq.com/openai/v1/chat/completions"
58
- api_key = os.environ.get("GROQ_API_KEY")
59
  headers = {
60
  "Authorization": f"Bearer {api_key}",
61
  "Content-Type": "application/json",
62
  }
63
- prompt = (
64
- f"Customer asked: '{question}'\n\n"
65
- f"Here is the relevant product or policy info to help:\n{context}\n\n"
66
- f"Respond in a friendly and helpful tone as a toy shop support agent."
67
- )
68
  payload = {
69
  "model": "llama3-8b-8192",
70
  "messages": [
71
- {
72
- "role": "system",
73
- "content": (
74
- "You are ToyBot, a friendly and helpful WhatsApp assistant for an online toy shop. "
75
- "Your goal is to politely answer customer questions, help them choose the right toys, "
76
- "provide order or delivery information, explain return policies, and guide them through purchases."
77
- )
78
- },
79
  {"role": "user", "content": prompt},
80
  ],
81
  "temperature": 0.5,
@@ -91,9 +78,10 @@ def generate_answer_with_groq(question, context, retries=3, delay=2):
91
  if "503" in str(e) and attempt < retries - 1:
92
  time.sleep(delay)
93
  continue
94
- return f"⚠️ Groq API Error: {str(e)}"
 
95
 
96
- # --- Twilio Helpers ---
97
  def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
98
  client = Client(account_sid, auth_token)
99
  messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
@@ -105,84 +93,67 @@ def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
105
  def send_twilio_message(account_sid, auth_token, conversation_sid, body):
106
  try:
107
  client = Client(account_sid, auth_token)
108
- message = client.conversations.v1.conversations(conversation_sid).messages.create(
109
- author="system",
110
- body=body
111
- )
112
  return message.sid
113
  except Exception as e:
114
  return str(e)
115
 
116
- # --- UI Styling ---
117
- st.markdown("""
118
- <style>
119
- .big-font { font-size: 28px !important; font-weight: bold; }
120
- .small-font { font-size: 16px; color: #555; }
121
- .stButton > button {
122
- background-color: #0066CC; color: white;
123
- padding: 0.5em 1em; border-radius: 8px; font-size: 18px;
124
- }
125
- .stTextInput > div > input { font-size: 16px; }
126
- </style>
127
- """, unsafe_allow_html=True)
128
-
129
- st.markdown('<div class="big-font">πŸ“± Quasa – A Smart WhatsApp Chatbot</div>', unsafe_allow_html=True)
130
- st.markdown('<div class="small-font">Talk to your documents using WhatsApp. Powered by Groq, Twilio, and RAG.</div>', unsafe_allow_html=True)
131
-
132
- # --- Credentials ---
133
- account_sid = st.secrets.get("TWILIO_SID") or st.text_input("πŸ” Twilio SID", "")
134
- auth_token = st.secrets.get("TWILIO_TOKEN") or st.text_input("πŸ” Twilio Auth Token", type="password")
135
- groq_key = st.secrets.get("GROQ_API_KEY") or st.text_input("πŸ” GROQ API Key", type="password")
136
-
137
- if all([account_sid, auth_token, groq_key]):
138
- os.environ["GROQ_API_KEY"] = groq_key
139
-
140
- # Conversation SID Input
141
- conversation_sid = st.text_input("πŸ’¬ Enter Twilio Conversation SID", key="conv_sid")
142
-
143
- if st.button("πŸ”„ Load Conversation"):
144
- if not conversation_sid:
145
- st.warning("Please enter a valid Conversation SID.")
146
- else:
147
- st.success(f"Conversation SID `{conversation_sid}` loaded!")
148
-
149
- @st.cache_resource
150
- def setup_knowledge_base():
151
- folder_path = "docs"
152
- all_text = ""
153
- for file in os.listdir(folder_path):
154
- if file.endswith(".pdf"):
155
- all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
156
- elif file.endswith((".docx", ".doc")):
157
- all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
158
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
159
- chunks = chunk_text(all_text, tokenizer)
160
- model = SentenceTransformer('all-mpnet-base-v2')
161
- embeddings = model.encode(chunks)
162
- dim = embeddings[0].shape[0]
163
- index = faiss.IndexFlatL2(dim)
164
- index.add(np.array(embeddings))
165
- return index, model, chunks
166
-
167
- index, embedding_model, text_chunks = setup_knowledge_base()
168
- st.success("βœ… Knowledge base loaded!")
169
-
170
- if "last_processed_index" not in st.session_state:
171
- st.session_state.last_processed_index = -1
172
-
173
- if st.button("πŸ“² Check WhatsApp for New Message"):
174
- with st.spinner("Checking messages..."):
175
- question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
176
- if question and msg_index != st.session_state.last_processed_index:
177
- st.session_state.last_processed_index = msg_index
178
- st.info(f"πŸ“₯ New message from **{sender}**:\n\n> {question}")
179
- relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
180
- context = "\n\n".join(relevant_chunks)
181
- answer = generate_answer_with_groq(question, context)
182
- send_twilio_message(account_sid, auth_token, conversation_sid, answer)
183
- st.success("πŸ“€ Answer sent to user!")
184
- st.markdown(f"### 🧠 Answer:\n\n{answer}")
185
- else:
186
- st.warning("No new messages found.")
187
  else:
188
- st.warning("Please enter all required credentials.")
 
2
  import time
3
  import streamlit as st
4
  from twilio.rest import Client
5
+ from twilio.base.exceptions import TwilioRestException
6
  from pdfminer.high_level import extract_text
7
  from sentence_transformers import SentenceTransformer
8
  from transformers import AutoTokenizer
9
  import faiss
10
  import numpy as np
11
  import docx
12
+ from groq import Groq
13
  import PyPDF2
14
  import requests
15
 
16
+ # --- Document Loaders ---
 
 
 
17
  def extract_text_from_pdf(pdf_path):
18
  try:
19
  text = ""
20
  with open(pdf_path, 'rb') as file:
21
  pdf_reader = PyPDF2.PdfReader(file)
22
+ for page_num in range(len(pdf_reader.pages)):
23
+ page = pdf_reader.pages[page_num]
24
  page_text = page.extract_text()
25
  if page_text:
26
  text += page_text
 
35
  except:
36
  return ""
37
 
 
38
  def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
39
  tokens = tokenizer.tokenize(text)
40
  chunks, start = [], 0
 
45
  start += chunk_size - chunk_overlap
46
  return chunks
47
 
 
48
  def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
49
  question_embedding = embed_model.encode([question])[0]
50
  D, I = index.search(np.array([question_embedding]), k)
51
  return [text_chunks[i] for i in I[0]]
52
 
53
+ # --- GROQ Answer Generator ---
54
  def generate_answer_with_groq(question, context, retries=3, delay=2):
55
  url = "https://api.groq.com/openai/v1/chat/completions"
56
+ api_key = os.environ["GROQ_API_KEY"]
57
  headers = {
58
  "Authorization": f"Bearer {api_key}",
59
  "Content-Type": "application/json",
60
  }
61
+ prompt = f"Based on the following context, answer the question: '{question}'\n\nContext:\n{context}"
 
 
 
 
62
  payload = {
63
  "model": "llama3-8b-8192",
64
  "messages": [
65
+ {"role": "system", "content": "You are a real human and you will respond as real human. You will be asked questions and you will respond like a human."},
 
 
 
 
 
 
 
66
  {"role": "user", "content": prompt},
67
  ],
68
  "temperature": 0.5,
 
78
  if "503" in str(e) and attempt < retries - 1:
79
  time.sleep(delay)
80
  continue
81
+ else:
82
+ return f"⚠️ Groq API Error: {str(e)}"
83
 
84
+ # --- Twilio Chat Handlers ---
85
  def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
86
  client = Client(account_sid, auth_token)
87
  messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
 
93
  def send_twilio_message(account_sid, auth_token, conversation_sid, body):
94
  try:
95
  client = Client(account_sid, auth_token)
96
+ message = client.conversations.v1.conversations(conversation_sid).messages.create(author="system", body=body)
 
 
 
97
  return message.sid
98
  except Exception as e:
99
  return str(e)
100
 
101
+ # --- Streamlit UI ---
102
+ st.set_page_config(page_title="SMEHelpBot – WhatsApp Integration", layout="wide")
103
+ st.title("πŸ“± SMEHelpBot + WhatsApp (via Twilio)")
104
+
105
+ # Load from secrets
106
+ account_sid = st.secrets.get("TWILIO_SID")
107
+ auth_token = st.secrets.get("TWILIO_TOKEN")
108
+ GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
109
+
110
+ # Fallback for manual input
111
+ if not all([account_sid, auth_token, GROQ_API_KEY]):
112
+ st.warning("⚠️ Some secrets not found. Please enter missing credentials below:")
113
+ account_sid = st.text_input("Twilio SID", value=account_sid or "")
114
+ auth_token = st.text_input("Twilio Auth Token", type="password", value=auth_token or "")
115
+ GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
116
+
117
+ # New: Let user enter conversation_sid manually
118
+ conversation_sid = st.text_input("Twilio Conversation SID")
119
+
120
+ if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
121
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
122
+
123
+ @st.cache_resource
124
+ def setup_knowledge_base():
125
+ folder_path = "docs"
126
+ all_text = ""
127
+ for file in os.listdir(folder_path):
128
+ if file.endswith(".pdf"):
129
+ all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
130
+ elif file.endswith((".docx", ".doc")):
131
+ all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
132
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
133
+ chunks = chunk_text(all_text, tokenizer)
134
+ model = SentenceTransformer('all-mpnet-base-v2')
135
+ embeddings = model.encode(chunks)
136
+ dim = embeddings[0].shape[0]
137
+ index = faiss.IndexFlatL2(dim)
138
+ index.add(np.array(embeddings))
139
+ return index, model, chunks
140
+
141
+ index, embedding_model, text_chunks = setup_knowledge_base()
142
+
143
+ st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
144
+
145
+ if st.button("πŸ” Check for New WhatsApp Query"):
146
+ with st.spinner("Checking messages..."):
147
+ question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
148
+ if question:
149
+ st.info(f"πŸ“₯ New Question from {sender}:\n\n> {question}")
150
+ relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
151
+ context = "\n\n".join(relevant_chunks)
152
+ answer = generate_answer_with_groq(question, context)
153
+ send_twilio_message(account_sid, auth_token, conversation_sid, answer)
154
+ st.success("πŸ“€ Answer sent via WhatsApp!")
155
+ st.markdown(f"### ✨ Answer:\n\n{answer}")
156
+ else:
157
+ st.warning("No new messages from users found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  else:
159
+ st.warning("❗ Please provide all required credentials including Conversation SID.")