Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,7 +16,7 @@ from io import StringIO
|
|
16 |
from pdfminer.high_level import extract_text_to_fp
|
17 |
from pdfminer.layout import LAParams
|
18 |
|
19 |
-
# --- PDF Extraction
|
20 |
def extract_text_from_pdf(pdf_path):
|
21 |
output_string = StringIO()
|
22 |
with open(pdf_path, 'rb') as file:
|
@@ -29,7 +29,7 @@ def clean_extracted_text(text):
|
|
29 |
for line in lines:
|
30 |
line = line.strip()
|
31 |
if line:
|
32 |
-
line = ' '.join(line.split())
|
33 |
cleaned.append(line)
|
34 |
return '\n'.join(cleaned)
|
35 |
|
@@ -41,7 +41,7 @@ def extract_text_from_docx(docx_path):
|
|
41 |
except:
|
42 |
return ""
|
43 |
|
44 |
-
# --- Chunking
|
45 |
def chunk_text(text, tokenizer, chunk_size=128, chunk_overlap=32, max_tokens=512):
|
46 |
tokens = tokenizer.tokenize(text)
|
47 |
chunks = []
|
@@ -95,21 +95,6 @@ def generate_answer_with_groq(question, context):
|
|
95 |
return response.json()['choices'][0]['message']['content'].strip()
|
96 |
|
97 |
# --- Twilio Functions ---
|
98 |
-
def get_latest_whatsapp_conversation_sid(client):
|
99 |
-
conversations = client.conversations.v1.conversations.list(limit=10)
|
100 |
-
for convo in conversations:
|
101 |
-
try:
|
102 |
-
participants = client.conversations.v1.conversations(convo.sid).participants.list()
|
103 |
-
for p in participants:
|
104 |
-
if (p.identity and p.identity.startswith("whatsapp:")) or (
|
105 |
-
p.messaging_binding and p.messaging_binding.get("address", "").startswith("whatsapp:")
|
106 |
-
):
|
107 |
-
return convo.sid
|
108 |
-
except:
|
109 |
-
continue
|
110 |
-
return None
|
111 |
-
|
112 |
-
|
113 |
def fetch_latest_incoming_message(client, conversation_sid):
|
114 |
messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
|
115 |
for msg in reversed(messages):
|
@@ -149,31 +134,50 @@ def setup_knowledge_base():
|
|
149 |
return index, model, chunks
|
150 |
|
151 |
# --- Monitor Conversations ---
|
152 |
-
def
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
def poll_new_conversations():
|
156 |
-
|
157 |
while True:
|
158 |
try:
|
159 |
conversations = client.conversations.v1.conversations.list(limit=20)
|
160 |
for convo in conversations:
|
161 |
if convo.sid not in processed_convos:
|
162 |
-
# Check if WhatsApp participant exists
|
163 |
participants = client.conversations.v1.conversations(convo.sid).participants.list()
|
164 |
for p in participants:
|
165 |
address = p.messaging_binding.get("address", "") if p.messaging_binding else ""
|
166 |
if address.startswith("whatsapp:"):
|
|
|
167 |
processed_convos.add(convo.sid)
|
168 |
-
print(f"π New conversation detected: {convo.sid}")
|
169 |
threading.Thread(target=poll_conversation, args=(convo.sid,), daemon=True).start()
|
170 |
except Exception as e:
|
171 |
-
print("β Error
|
172 |
time.sleep(5)
|
173 |
|
174 |
threading.Thread(target=poll_new_conversations, daemon=True).start()
|
175 |
|
176 |
-
|
177 |
# --- Streamlit UI ---
|
178 |
st.set_page_config(page_title="Quasa β A Smart WhatsApp Chatbot", layout="wide")
|
179 |
st.title("π± Quasa β A Smart WhatsApp Chatbot")
|
@@ -191,8 +195,8 @@ if not all([account_sid, auth_token, GROQ_API_KEY]):
|
|
191 |
if all([account_sid, auth_token, GROQ_API_KEY]):
|
192 |
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
|
193 |
client = Client(account_sid, auth_token)
|
194 |
-
latest_sid = get_latest_whatsapp_conversation_sid(client)
|
195 |
|
196 |
-
st.success("π’ Monitoring new WhatsApp conversations...
|
197 |
-
index, model, chunks = setup_knowledge_base()
|
198 |
-
threading.Thread(target=start_conversation_monitor, args=(client, index, model, chunks), daemon=True).start()
|
|
|
|
16 |
from pdfminer.high_level import extract_text_to_fp
|
17 |
from pdfminer.layout import LAParams
|
18 |
|
19 |
+
# --- PDF Extraction ---
|
20 |
def extract_text_from_pdf(pdf_path):
|
21 |
output_string = StringIO()
|
22 |
with open(pdf_path, 'rb') as file:
|
|
|
29 |
for line in lines:
|
30 |
line = line.strip()
|
31 |
if line:
|
32 |
+
line = ' '.join(line.split())
|
33 |
cleaned.append(line)
|
34 |
return '\n'.join(cleaned)
|
35 |
|
|
|
41 |
except:
|
42 |
return ""
|
43 |
|
44 |
+
# --- Chunking ---
|
45 |
def chunk_text(text, tokenizer, chunk_size=128, chunk_overlap=32, max_tokens=512):
|
46 |
tokens = tokenizer.tokenize(text)
|
47 |
chunks = []
|
|
|
95 |
return response.json()['choices'][0]['message']['content'].strip()
|
96 |
|
97 |
# --- Twilio Functions ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
def fetch_latest_incoming_message(client, conversation_sid):
|
99 |
messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
|
100 |
for msg in reversed(messages):
|
|
|
134 |
return index, model, chunks
|
135 |
|
136 |
# --- Monitor Conversations ---
|
137 |
+
def start_conversation_monitor(client, index, embed_model, text_chunks):
|
138 |
+
processed_convos = set()
|
139 |
+
last_processed_timestamp = {}
|
140 |
+
|
141 |
+
def poll_conversation(convo_sid):
|
142 |
+
while True:
|
143 |
+
try:
|
144 |
+
latest_msg = fetch_latest_incoming_message(client, convo_sid)
|
145 |
+
if latest_msg:
|
146 |
+
msg_time = latest_msg["timestamp"]
|
147 |
+
if convo_sid not in last_processed_timestamp or msg_time > last_processed_timestamp[convo_sid]:
|
148 |
+
last_processed_timestamp[convo_sid] = msg_time
|
149 |
+
question = latest_msg["body"]
|
150 |
+
sender = latest_msg["author"]
|
151 |
+
print(f"\nπ₯ New message from {sender} in {convo_sid}: {question}")
|
152 |
+
context = "\n\n".join(retrieve_chunks(question, index, embed_model, text_chunks))
|
153 |
+
answer = generate_answer_with_groq(question, context)
|
154 |
+
send_twilio_message(client, convo_sid, answer)
|
155 |
+
print(f"π€ Replied to {sender}: {answer}")
|
156 |
+
time.sleep(3)
|
157 |
+
except Exception as e:
|
158 |
+
print(f"β Error in convo {convo_sid} polling:", e)
|
159 |
+
time.sleep(5)
|
160 |
|
161 |
def poll_new_conversations():
|
162 |
+
print("β‘οΈ Monitoring for new WhatsApp conversations...")
|
163 |
while True:
|
164 |
try:
|
165 |
conversations = client.conversations.v1.conversations.list(limit=20)
|
166 |
for convo in conversations:
|
167 |
if convo.sid not in processed_convos:
|
|
|
168 |
participants = client.conversations.v1.conversations(convo.sid).participants.list()
|
169 |
for p in participants:
|
170 |
address = p.messaging_binding.get("address", "") if p.messaging_binding else ""
|
171 |
if address.startswith("whatsapp:"):
|
172 |
+
print(f"π New WhatsApp convo found: {convo.sid}")
|
173 |
processed_convos.add(convo.sid)
|
|
|
174 |
threading.Thread(target=poll_conversation, args=(convo.sid,), daemon=True).start()
|
175 |
except Exception as e:
|
176 |
+
print("β Error polling conversations:", e)
|
177 |
time.sleep(5)
|
178 |
|
179 |
threading.Thread(target=poll_new_conversations, daemon=True).start()
|
180 |
|
|
|
181 |
# --- Streamlit UI ---
|
182 |
st.set_page_config(page_title="Quasa β A Smart WhatsApp Chatbot", layout="wide")
|
183 |
st.title("π± Quasa β A Smart WhatsApp Chatbot")
|
|
|
195 |
if all([account_sid, auth_token, GROQ_API_KEY]):
|
196 |
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
|
197 |
client = Client(account_sid, auth_token)
|
|
|
198 |
|
199 |
+
st.success("π’ Monitoring new WhatsApp conversations...")
|
200 |
+
index, model, chunks = setup_knowledge_base()
|
201 |
+
threading.Thread(target=start_conversation_monitor, args=(client, index, model, chunks), daemon=True).start()
|
202 |
+
st.info("β³ Waiting for new messages...")
|