masadonline commited on
Commit
c0270e5
Β·
verified Β·
1 Parent(s): e1f9b2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -44
app.py CHANGED
@@ -13,16 +13,18 @@ import PyPDF2
13
  import requests
14
  from streamlit_autorefresh import st_autorefresh
15
 
16
- # --- Text Extraction ---
 
17
  def extract_text_from_pdf(pdf_path):
18
  try:
19
  text = ""
20
  with open(pdf_path, 'rb') as file:
21
  pdf_reader = PyPDF2.PdfReader(file)
22
- for page in pdf_reader.pages:
23
- content = page.extract_text()
24
- if content:
25
- text += content
 
26
  return text
27
  except:
28
  return extract_text(pdf_path)
@@ -34,7 +36,6 @@ def extract_text_from_docx(docx_path):
34
  except:
35
  return ""
36
 
37
- # --- Chunking ---
38
  def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
39
  tokens = tokenizer.tokenize(text)
40
  chunks, start = [], 0
@@ -45,13 +46,12 @@ def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
45
  start += chunk_size - chunk_overlap
46
  return chunks
47
 
48
- # --- Retrieval ---
49
  def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
50
  question_embedding = embed_model.encode([question])[0]
51
  D, I = index.search(np.array([question_embedding]), k)
52
  return [text_chunks[i] for i in I[0]]
53
 
54
- # --- Answer Generation ---
55
  def generate_answer_with_groq(question, context, retries=3, delay=2):
56
  url = "https://api.groq.com/openai/v1/chat/completions"
57
  api_key = os.environ.get("GROQ_API_KEY")
@@ -100,7 +100,7 @@ def generate_answer_with_groq(question, context, retries=3, delay=2):
100
  except Exception as e:
101
  return f"⚠️ Groq API Error: {e}"
102
 
103
- # --- Twilio Messaging ---
104
  def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
105
  client = Client(account_sid, auth_token)
106
  messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
@@ -117,59 +117,59 @@ def send_twilio_message(account_sid, auth_token, conversation_sid, body):
117
  except Exception as e:
118
  return str(e)
119
 
120
- # --- Streamlit UI ---
121
  st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
122
  st.title("πŸ“± Quasa – A Smart WhatsApp Chatbot")
123
 
124
  if "last_index" not in st.session_state:
125
  st.session_state.last_index = -1
126
 
127
- # --- Credentials ---
128
  account_sid = st.secrets.get("TWILIO_SID")
129
  auth_token = st.secrets.get("TWILIO_TOKEN")
130
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
131
 
132
- account_sid = st.text_input("πŸ” Twilio SID", value=account_sid or "")
133
- auth_token = st.text_input("πŸ” Twilio Auth Token", type="password", value=auth_token or "")
134
- GROQ_API_KEY = st.text_input("πŸ” GROQ API Key", type="password", value=GROQ_API_KEY or "")
135
- conversation_sid = st.text_input("πŸ’¬ Twilio Conversation SID")
 
 
 
 
 
 
 
136
 
137
  if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
138
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
139
 
140
- @st.cache_resource(show_spinner=True)
141
  def setup_knowledge_base():
142
  folder_path = "docs"
143
  all_text = ""
144
- for file in os.listdir(folder_path):
145
- full_path = os.path.join(folder_path, file)
146
- if file.endswith(".pdf"):
147
- all_text += extract_text_from_pdf(full_path) + "\n"
148
- elif file.endswith((".docx", ".doc")):
149
- all_text += extract_text_from_docx(full_path) + "\n"
150
-
151
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
152
- chunks = chunk_text(all_text, tokenizer)
153
- model = SentenceTransformer('all-mpnet-base-v2')
154
- embeddings = model.encode(chunks)
155
- dim = embeddings[0].shape[0]
156
- index = faiss.IndexFlatL2(dim)
157
- index.add(np.array(embeddings).astype('float32'))
158
- return index, model, chunks
159
-
160
- st.info("βš™οΈ Preparing knowledge base...")
161
- try:
162
- index, embedding_model, text_chunks = setup_knowledge_base()
163
- st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
164
- except Exception as e:
165
- st.error(f"❌ Failed to prepare knowledge base: {e}")
166
  st.stop()
167
 
168
- # --- Auto Refresh ---
169
- enable_autorefresh = st.checkbox("πŸ”„ Enable Auto-Refresh", value=True)
170
- interval_seconds = st.selectbox("Refresh Interval (seconds)", options=[5, 10, 15, 30, 60], index=4)
171
- if enable_autorefresh:
172
- st_autorefresh(interval=interval_seconds * 1000, key="auto-refresh")
173
 
174
  with st.spinner("⏳ Checking for new WhatsApp messages..."):
175
  question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
@@ -185,4 +185,4 @@ if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
185
  else:
186
  st.caption("βœ… No new message yet. Waiting for refresh...")
187
  else:
188
- st.warning("❗ Please provide all required credentials and conversation SID.")
 
13
  import requests
14
  from streamlit_autorefresh import st_autorefresh
15
 
16
+ # Extract text from PDF with fallback
17
+ # --- Document Loaders ---
18
  def extract_text_from_pdf(pdf_path):
19
  try:
20
  text = ""
21
  with open(pdf_path, 'rb') as file:
22
  pdf_reader = PyPDF2.PdfReader(file)
23
+ for page_num in range(len(pdf_reader.pages)):
24
+ page = pdf_reader.pages[page_num]
25
+ page_text = page.extract_text()
26
+ if page_text:
27
+ text += page_text
28
  return text
29
  except:
30
  return extract_text(pdf_path)
 
36
  except:
37
  return ""
38
 
 
39
  def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
40
  tokens = tokenizer.tokenize(text)
41
  chunks, start = [], 0
 
46
  start += chunk_size - chunk_overlap
47
  return chunks
48
 
 
49
  def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
50
  question_embedding = embed_model.encode([question])[0]
51
  D, I = index.search(np.array([question_embedding]), k)
52
  return [text_chunks[i] for i in I[0]]
53
 
54
+ # Generate answer using Groq API with retries and timeout
55
  def generate_answer_with_groq(question, context, retries=3, delay=2):
56
  url = "https://api.groq.com/openai/v1/chat/completions"
57
  api_key = os.environ.get("GROQ_API_KEY")
 
100
  except Exception as e:
101
  return f"⚠️ Groq API Error: {e}"
102
 
103
+ # Twilio message fetch and send
104
  def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
105
  client = Client(account_sid, auth_token)
106
  messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
 
117
  except Exception as e:
118
  return str(e)
119
 
120
+ # Streamlit UI
121
  st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
122
  st.title("πŸ“± Quasa – A Smart WhatsApp Chatbot")
123
 
124
  if "last_index" not in st.session_state:
125
  st.session_state.last_index = -1
126
 
 
127
  account_sid = st.secrets.get("TWILIO_SID")
128
  auth_token = st.secrets.get("TWILIO_TOKEN")
129
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
130
 
131
+ if not all([account_sid, auth_token, GROQ_API_KEY]):
132
+ st.warning("⚠️ Some secrets not found. Please enter missing credentials below:")
133
+ account_sid = st.text_input("Twilio SID", value=account_sid or "")
134
+ auth_token = st.text_input("Twilio Auth Token", type="password", value=auth_token or "")
135
+ GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
136
+
137
+ enable_autorefresh = st.checkbox("πŸ”„ Enable Auto-Refresh", value=True)
138
+ interval_seconds = st.selectbox("Refresh Interval (seconds)", options=[5, 10, 15, 30, 60], index=5)
139
+
140
+ if enable_autorefresh:
141
+ st_autorefresh(interval=interval_seconds * 1000, key="auto-refresh")
142
 
143
  if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
144
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
145
 
146
+ @st.cache_data(show_spinner=False)
147
  def setup_knowledge_base():
148
  folder_path = "docs"
149
  all_text = ""
150
+ try:
151
+ for file in os.listdir(folder_path):
152
+ if file.endswith(".pdf"):
153
+ all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
154
+ elif file.endswith((".docx", ".doc")):
155
+ all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
156
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
157
+ chunks = chunk_text(all_text, tokenizer)
158
+ model = SentenceTransformer('all-mpnet-base-v2')
159
+ embeddings = model.encode(chunks)
160
+ dim = embeddings[0].shape[0]
161
+ index = faiss.IndexFlatL2(dim)
162
+ index.add(np.array(embeddings).astype('float32'))
163
+ return index, model, chunks
164
+ except Exception as e:
165
+ st.error(f"Error setting up knowledge base: {e}")
166
+ return None, None, None
167
+
168
+ index, embedding_model, text_chunks = setup_knowledge_base()
169
+ if index is None:
 
 
170
  st.stop()
171
 
172
+ st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
 
 
 
 
173
 
174
  with st.spinner("⏳ Checking for new WhatsApp messages..."):
175
  question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
 
185
  else:
186
  st.caption("βœ… No new message yet. Waiting for refresh...")
187
  else:
188
+ st.warning("❗ Please provide all required credentials and conversation SID.")