masadonline commited on
Commit
60c8a15
Β·
verified Β·
1 Parent(s): 7088627

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -115
app.py CHANGED
@@ -1,126 +1,164 @@
 
 
1
  import streamlit as st
2
  from twilio.rest import Client
3
- import requests
4
- from PyPDF2 import PdfReader
 
 
 
 
 
5
  from groq import Groq
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
- from langchain.vectorstores import FAISS
8
- from langchain.text_splitter import RecursiveCharacterTextSplitter
9
- from langchain.docstore.document import Document
10
- from langchain.prompts import PromptTemplate
11
- from langchain.chains import RetrievalQA
12
- from langchain.llms.base import LLM
13
- from langchain_core.outputs import Generation
14
- import tempfile
15
- import os
16
 
17
- # ---- CONFIG ---- #
18
- st.set_page_config(page_title="Quasa – Smart WhatsApp Chatbot", layout="wide")
19
-
20
- # ---- SESSION STATE ---- #
21
- if "conversation_sid" not in st.session_state:
22
- st.session_state.conversation_sid = ""
23
-
24
- if "user_message" not in st.session_state:
25
- st.session_state.user_message = ""
26
-
27
- if "response" not in st.session_state:
28
- st.session_state.response = ""
29
-
30
- # ---- SIDEBAR ---- #
31
- with st.sidebar:
32
- st.title("πŸ“± Quasa Setup")
33
- groq_api_key = st.text_input("πŸ”‘ GROQ API Key", type="password")
34
- twilio_sid = st.text_input("🧩 Twilio Account SID", type="password")
35
- twilio_token = st.text_input("πŸ” Twilio Auth Token", type="password")
36
- twilio_conv_sid = st.text_input("πŸ’¬ Twilio Conversation SID")
37
-
38
- uploaded_file = st.file_uploader("πŸ“„ Upload Knowledge PDF", type=["pdf"])
39
-
40
- if uploaded_file:
41
- st.success("PDF uploaded. Ready to chat!")
42
-
43
- # ---- LLM Setup ---- #
44
- class SimpleGroqLLM(LLM):
45
- def __init__(self, api_key: str, model_name="llama3-8b-8192"):
46
- self.client = Groq(api_key=api_key)
47
- self.model_name = model_name
48
-
49
- def _call(self, prompt: str, stop=None) -> str:
50
- response = self.client.chat.completions.create(
51
- messages=[{"role": "user", "content": prompt}],
52
- model=self.model_name
53
- )
54
- return response.choices[0].message.content
55
-
56
- @property
57
- def _llm_type(self) -> str:
58
- return "simple_groq"
59
-
60
- # ---- HELPER FUNCTIONS ---- #
61
- def extract_text_from_pdf(file) -> str:
62
- reader = PdfReader(file)
63
- text = ""
64
- for page in reader.pages:
65
- text += page.extract_text() + "\n"
66
- return text
67
-
68
- def create_vector_store(text: str):
69
- splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
70
- chunks = splitter.split_text(text)
71
- docs = [Document(page_content=chunk) for chunk in chunks]
72
- embeddings = HuggingFaceEmbeddings()
73
- return FAISS.from_documents(docs, embeddings)
74
-
75
- def get_response_from_rag(query, vectorstore, groq_api_key):
76
- retriever = vectorstore.as_retriever()
77
- llm = SimpleGroqLLM(api_key=groq_api_key)
78
-
79
- qa_chain = RetrievalQA.from_chain_type(
80
- llm=llm,
81
- retriever=retriever,
82
- return_source_documents=False
83
- )
84
- return qa_chain.run(query)
 
 
 
 
 
 
 
 
85
 
86
  def send_twilio_message(account_sid, auth_token, conversation_sid, body):
87
  try:
88
  client = Client(account_sid, auth_token)
89
- message = client.conversations.v1.conversations(conversation_sid).messages.create(
90
- author='ChatBot', # Fixed: use static name instead of WhatsApp number
91
- body=body
92
- )
93
  return message.sid
94
  except Exception as e:
95
- return f"⚠️ Failed to send message: {e}"
96
-
97
- # ---- MAIN ---- #
98
- st.title("πŸ€– Quasa – Smart WhatsApp Chatbot")
99
-
100
- if uploaded_file and groq_api_key and twilio_sid and twilio_token and twilio_conv_sid:
101
- st.session_state.conversation_sid = twilio_conv_sid
102
-
103
- # Extract and vectorize
104
- with st.spinner("πŸ” Reading and indexing document..."):
105
- text = extract_text_from_pdf(uploaded_file)
106
- vectorstore = create_vector_store(text)
107
-
108
- # Input + response area
109
- user_input = st.text_input("πŸ’¬ Ask a question (from WhatsApp user):", key="input")
110
-
111
- if st.button("πŸ“© Respond & Send"):
112
- if user_input:
113
- with st.spinner("πŸ€– Generating response..."):
114
- answer = get_response_from_rag(user_input, vectorstore, groq_api_key)
115
- st.success("βœ… Response Generated:")
116
- st.write(answer)
117
-
118
- # Send to Twilio
119
- with st.spinner("πŸ“€ Sending to WhatsApp..."):
120
- msg_sid = send_twilio_message(twilio_sid, twilio_token, twilio_conv_sid, answer)
121
- st.info(f"πŸ“¨ Message SID: `{msg_sid}`")
122
- else:
123
- st.warning("❗Please enter a question to proceed.")
124
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  else:
126
- st.warning("🚧 Please upload a PDF and fill in all credentials in the sidebar to proceed.")
 
1
+ import os
2
+ import time
3
  import streamlit as st
4
  from twilio.rest import Client
5
+ from twilio.base.exceptions import TwilioRestException
6
+ from pdfminer.high_level import extract_text
7
+ from sentence_transformers import SentenceTransformer
8
+ from transformers import AutoTokenizer
9
+ import faiss
10
+ import numpy as np
11
+ import docx
12
  from groq import Groq
13
+ import PyPDF2
14
+ import requests
 
 
 
 
 
 
 
 
15
 
16
+ # --- Document Loaders ---
17
+ def extract_text_from_pdf(pdf_path):
18
+ try:
19
+ text = ""
20
+ with open(pdf_path, 'rb') as file:
21
+ pdf_reader = PyPDF2.PdfReader(file)
22
+ for page_num in range(len(pdf_reader.pages)):
23
+ page = pdf_reader.pages[page_num]
24
+ page_text = page.extract_text()
25
+ if page_text:
26
+ text += page_text
27
+ return text
28
+ except:
29
+ return extract_text(pdf_path)
30
+
31
+ def extract_text_from_docx(docx_path):
32
+ try:
33
+ doc = docx.Document(docx_path)
34
+ return '\n'.join(para.text for para in doc.paragraphs)
35
+ except:
36
+ return ""
37
+
38
+ def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
39
+ tokens = tokenizer.tokenize(text)
40
+ chunks, start = [], 0
41
+ while start < len(tokens):
42
+ end = min(start + chunk_size, len(tokens))
43
+ chunk_tokens = tokens[start:end]
44
+ chunks.append(tokenizer.convert_tokens_to_string(chunk_tokens))
45
+ start += chunk_size - chunk_overlap
46
+ return chunks
47
+
48
+ def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
49
+ question_embedding = embed_model.encode([question])[0]
50
+ D, I = index.search(np.array([question_embedding]), k)
51
+ return [text_chunks[i] for i in I[0]]
52
+
53
+ # --- GROQ Answer Generation ---
54
+ def generate_answer_with_groq(question, context, retries=3, delay=2):
55
+ url = "https://api.groq.com/openai/v1/chat/completions"
56
+ api_key = os.environ["GROQ_API_KEY"]
57
+ headers = {
58
+ "Authorization": f"Bearer {api_key}",
59
+ "Content-Type": "application/json",
60
+ }
61
+ prompt = f"Based on the following context, answer the question: '{question}'\n\nContext:\n{context}"
62
+ payload = {
63
+ "model": "llama3-8b-8192",
64
+ "messages": [
65
+ {"role": "system", "content": "Hey there! I'm designed to respond just like a real person would. Ask me anything, and I'll do my best to give you a thoughtful and courteous answer."},
66
+ {"role": "user", "content": prompt},
67
+ ],
68
+ "temperature": 0.5,
69
+ "max_tokens": 300,
70
+ }
71
+
72
+ for attempt in range(retries):
73
+ try:
74
+ response = requests.post(url, headers=headers, json=payload)
75
+ result = response.json()
76
+ return result['choices'][0]['message']['content'].strip()
77
+ except Exception as e:
78
+ if "503" in str(e) and attempt < retries - 1:
79
+ time.sleep(delay)
80
+ continue
81
+ else:
82
+ return f"⚠️ Groq API Error: {str(e)}"
83
+
84
+ # --- Twilio Chat Handlers ---
85
+ def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
86
+ client = Client(account_sid, auth_token)
87
+ messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
88
+ for msg in reversed(messages):
89
+ if msg.author.startswith("whatsapp:"):
90
+ return msg.body, msg.author, msg.index
91
+ return None, None, None
92
 
93
  def send_twilio_message(account_sid, auth_token, conversation_sid, body):
94
  try:
95
  client = Client(account_sid, auth_token)
96
+ message = client.conversations.v1.conversations(conversation_sid).messages.create(author="system", body=body)
 
 
 
97
  return message.sid
98
  except Exception as e:
99
+ return str(e)
100
+
101
+ # --- Streamlit UI ---
102
+ st.set_page_config(page_title="Quasa – A Smart WhatsApp Chatbot", layout="wide")
103
+ st.title("πŸ“± Quasa – A Smart WhatsApp Chatbot")
104
+
105
+ # Load from Hugging Face secrets
106
+ account_sid = st.secrets.get("TWILIO_SID")
107
+ auth_token = st.secrets.get("TWILIO_TOKEN")
108
+ GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
109
+
110
+ # Fallback for testing
111
+ if not all([account_sid, auth_token, GROQ_API_KEY]):
112
+ st.warning("⚠️ Some secrets not found. Please enter missing credentials below:")
113
+ account_sid = st.text_input("Twilio SID", value=account_sid or "")
114
+ auth_token = st.text_input("Twilio Auth Token", type="password", value=auth_token or "")
115
+ GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
116
+
117
+ # Always show conversation SID input
118
+ conversation_sid = st.text_input("Enter Conversation SID", value="")
119
+
120
+ # Initialize session state to track last message
121
+ if "last_index" not in st.session_state:
122
+ st.session_state.last_index = -1
123
+
124
+ if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
125
+ os.environ["GROQ_API_KEY"] = GROQ_API_KEY
126
+
127
+ @st.cache_resource
128
+ def setup_knowledge_base():
129
+ folder_path = "docs"
130
+ all_text = ""
131
+ for file in os.listdir(folder_path):
132
+ if file.endswith(".pdf"):
133
+ all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
134
+ elif file.endswith((".docx", ".doc")):
135
+ all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
136
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
137
+ chunks = chunk_text(all_text, tokenizer)
138
+ model = SentenceTransformer('all-mpnet-base-v2')
139
+ embeddings = model.encode(chunks)
140
+ dim = embeddings[0].shape[0]
141
+ index = faiss.IndexFlatL2(dim)
142
+ index.add(np.array(embeddings))
143
+ return index, model, chunks
144
+
145
+ index, embedding_model, text_chunks = setup_knowledge_base()
146
+
147
+ st.success("βœ… Knowledge base ready. Monitoring WhatsApp...")
148
+
149
+ if st.button("πŸ” Check for New WhatsApp Query"):
150
+ with st.spinner("Checking messages..."):
151
+ question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
152
+ if question and msg_index > st.session_state.last_index:
153
+ st.session_state.last_index = msg_index
154
+ st.info(f"πŸ“₯ New Question from {sender}:\n\n> {question}")
155
+ relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
156
+ context = "\n\n".join(relevant_chunks)
157
+ answer = generate_answer_with_groq(question, context)
158
+ send_twilio_message(account_sid, auth_token, conversation_sid, answer)
159
+ st.success("πŸ“€ Answer sent via WhatsApp!")
160
+ st.markdown(f"### ✨ Answer:\n\n{answer}")
161
+ else:
162
+ st.warning("No new messages from users found.")
163
  else:
164
+ st.warning("❗ Please provide all required credentials and conversation SID.")