masadonline commited on
Commit
e1f9b2f
Β·
verified Β·
1 Parent(s): a0b543c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -44
app.py CHANGED
@@ -13,18 +13,16 @@ import PyPDF2
13
  import requests
14
  from streamlit_autorefresh import st_autorefresh
15
 
16
- # Extract text from PDF with fallback
17
- # --- Document Loaders ---
18
  def extract_text_from_pdf(pdf_path):
19
  try:
20
  text = ""
21
  with open(pdf_path, 'rb') as file:
22
  pdf_reader = PyPDF2.PdfReader(file)
23
- for page_num in range(len(pdf_reader.pages)):
24
- page = pdf_reader.pages[page_num]
25
- page_text = page.extract_text()
26
- if page_text:
27
- text += page_text
28
  return text
29
  except:
30
  return extract_text(pdf_path)
@@ -36,6 +34,7 @@ def extract_text_from_docx(docx_path):
36
  except:
37
  return ""
38
 
 
39
  def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
40
  tokens = tokenizer.tokenize(text)
41
  chunks, start = [], 0
@@ -46,12 +45,13 @@ def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
46
  start += chunk_size - chunk_overlap
47
  return chunks
48
 
 
49
  def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
50
  question_embedding = embed_model.encode([question])[0]
51
  D, I = index.search(np.array([question_embedding]), k)
52
  return [text_chunks[i] for i in I[0]]
53
 
54
- # Generate answer using Groq API with retries and timeout
55
  def generate_answer_with_groq(question, context, retries=3, delay=2):
56
  url = "https://api.groq.com/openai/v1/chat/completions"
57
  api_key = os.environ.get("GROQ_API_KEY")
@@ -100,7 +100,7 @@ def generate_answer_with_groq(question, context, retries=3, delay=2):
100
  except Exception as e:
101
  return f"⚠️ Groq API Error: {e}"
102
 
103
- # Twilio message fetch and send
104
  def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
105
  client = Client(account_sid, auth_token)
106
  messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
@@ -117,59 +117,59 @@ def send_twilio_message(account_sid, auth_token, conversation_sid, body):
117
  except Exception as e:
118
  return str(e)
119
 
120
- # Streamlit UI
121
  st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
122
  st.title("πŸ“± Quasa – A Smart WhatsApp Chatbot")
123
 
124
  if "last_index" not in st.session_state:
125
  st.session_state.last_index = -1
126
 
 
127
  account_sid = st.secrets.get("TWILIO_SID")
128
  auth_token = st.secrets.get("TWILIO_TOKEN")
129
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
130
 
131
- if not all([account_sid, auth_token, GROQ_API_KEY]):
132
- st.warning("⚠️ Some secrets not found. Please enter missing credentials below:")
133
- account_sid = st.text_input("Twilio SID", value=account_sid or "")
134
- auth_token = st.text_input("Twilio Auth Token", type="password", value=auth_token or "")
135
- GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
136
-
137
- enable_autorefresh = st.checkbox("πŸ”„ Enable Auto-Refresh", value=True)
138
- interval_seconds = st.selectbox("Refresh Interval (seconds)", options=[5, 10, 15, 30, 60], index=4)
139
-
140
- if enable_autorefresh:
141
- st_autorefresh(interval=interval_seconds * 1000, key="auto-refresh")
142
 
143
  if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
144
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
145
 
146
- @st.cache_data(show_spinner=False)
147
  def setup_knowledge_base():
148
  folder_path = "docs"
149
  all_text = ""
150
- try:
151
- for file in os.listdir(folder_path):
152
- if file.endswith(".pdf"):
153
- all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
154
- elif file.endswith((".docx", ".doc")):
155
- all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
156
- tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
157
- chunks = chunk_text(all_text, tokenizer)
158
- model = SentenceTransformer('all-mpnet-base-v2')
159
- embeddings = model.encode(chunks)
160
- dim = embeddings[0].shape[0]
161
- index = faiss.IndexFlatL2(dim)
162
- index.add(np.array(embeddings).astype('float32'))
163
- return index, model, chunks
164
- except Exception as e:
165
- st.error(f"Error setting up knowledge base: {e}")
166
- return None, None, None
167
-
168
- index, embedding_model, text_chunks = setup_knowledge_base()
169
- if index is None:
 
 
170
  st.stop()
171
 
172
- st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
 
 
 
 
173
 
174
  with st.spinner("⏳ Checking for new WhatsApp messages..."):
175
  question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
@@ -185,4 +185,4 @@ if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
185
  else:
186
  st.caption("βœ… No new message yet. Waiting for refresh...")
187
  else:
188
- st.warning("❗ Please provide all required credentials and conversation SID.")
 
13
  import requests
14
  from streamlit_autorefresh import st_autorefresh
15
 
16
+ # --- Text Extraction ---
 
17
  def extract_text_from_pdf(pdf_path):
18
  try:
19
  text = ""
20
  with open(pdf_path, 'rb') as file:
21
  pdf_reader = PyPDF2.PdfReader(file)
22
+ for page in pdf_reader.pages:
23
+ content = page.extract_text()
24
+ if content:
25
+ text += content
 
26
  return text
27
  except:
28
  return extract_text(pdf_path)
 
34
  except:
35
  return ""
36
 
37
+ # --- Chunking ---
38
  def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
39
  tokens = tokenizer.tokenize(text)
40
  chunks, start = [], 0
 
45
  start += chunk_size - chunk_overlap
46
  return chunks
47
 
48
+ # --- Retrieval ---
49
  def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
50
  question_embedding = embed_model.encode([question])[0]
51
  D, I = index.search(np.array([question_embedding]), k)
52
  return [text_chunks[i] for i in I[0]]
53
 
54
+ # --- Answer Generation ---
55
  def generate_answer_with_groq(question, context, retries=3, delay=2):
56
  url = "https://api.groq.com/openai/v1/chat/completions"
57
  api_key = os.environ.get("GROQ_API_KEY")
 
100
  except Exception as e:
101
  return f"⚠️ Groq API Error: {e}"
102
 
103
+ # --- Twilio Messaging ---
104
  def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
105
  client = Client(account_sid, auth_token)
106
  messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
 
117
  except Exception as e:
118
  return str(e)
119
 
120
+ # --- Streamlit UI ---
121
  st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
122
  st.title("πŸ“± Quasa – A Smart WhatsApp Chatbot")
123
 
124
  if "last_index" not in st.session_state:
125
  st.session_state.last_index = -1
126
 
127
+ # --- Credentials ---
128
  account_sid = st.secrets.get("TWILIO_SID")
129
  auth_token = st.secrets.get("TWILIO_TOKEN")
130
  GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
131
 
132
+ account_sid = st.text_input("πŸ” Twilio SID", value=account_sid or "")
133
+ auth_token = st.text_input("πŸ” Twilio Auth Token", type="password", value=auth_token or "")
134
+ GROQ_API_KEY = st.text_input("πŸ” GROQ API Key", type="password", value=GROQ_API_KEY or "")
135
+ conversation_sid = st.text_input("πŸ’¬ Twilio Conversation SID")
 
 
 
 
 
 
 
136
 
137
  if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
138
  os.environ["GROQ_API_KEY"] = GROQ_API_KEY
139
 
140
+ @st.cache_resource(show_spinner=True)
141
  def setup_knowledge_base():
142
  folder_path = "docs"
143
  all_text = ""
144
+ for file in os.listdir(folder_path):
145
+ full_path = os.path.join(folder_path, file)
146
+ if file.endswith(".pdf"):
147
+ all_text += extract_text_from_pdf(full_path) + "\n"
148
+ elif file.endswith((".docx", ".doc")):
149
+ all_text += extract_text_from_docx(full_path) + "\n"
150
+
151
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
152
+ chunks = chunk_text(all_text, tokenizer)
153
+ model = SentenceTransformer('all-mpnet-base-v2')
154
+ embeddings = model.encode(chunks)
155
+ dim = embeddings[0].shape[0]
156
+ index = faiss.IndexFlatL2(dim)
157
+ index.add(np.array(embeddings).astype('float32'))
158
+ return index, model, chunks
159
+
160
+ st.info("βš™οΈ Preparing knowledge base...")
161
+ try:
162
+ index, embedding_model, text_chunks = setup_knowledge_base()
163
+ st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
164
+ except Exception as e:
165
+ st.error(f"❌ Failed to prepare knowledge base: {e}")
166
  st.stop()
167
 
168
+ # --- Auto Refresh ---
169
+ enable_autorefresh = st.checkbox("πŸ”„ Enable Auto-Refresh", value=True)
170
+ interval_seconds = st.selectbox("Refresh Interval (seconds)", options=[5, 10, 15, 30, 60], index=4)
171
+ if enable_autorefresh:
172
+ st_autorefresh(interval=interval_seconds * 1000, key="auto-refresh")
173
 
174
  with st.spinner("⏳ Checking for new WhatsApp messages..."):
175
  question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
 
185
  else:
186
  st.caption("βœ… No new message yet. Waiting for refresh...")
187
  else:
188
+ st.warning("❗ Please provide all required credentials and conversation SID.")