Spaces:
Sleeping
Sleeping
File size: 7,519 Bytes
1725afa 717234d 6bda95c 717234d 1086067 717234d 1086067 6785822 717234d 39eae61 b036db9 bce7695 717234d 1086067 717234d 1086067 717234d 1086067 f5fc1c4 6bda95c 717234d f5fc1c4 717234d 1086067 6bda95c 1086067 717234d 1086067 717234d 6bda95c 1718c82 29dcf19 39eae61 29dcf19 6e25eb2 29dcf19 d9461a1 29dcf19 39eae61 46ca20e 39eae61 46ca20e 39eae61 717234d 5a62060 1086067 717234d 5a62060 717234d 1086067 717234d 6bda95c 3c7dca6 717234d 97626e0 5a62060 b036db9 5a62060 97626e0 5a62060 717234d 91e81b4 1718c82 717234d 91e81b4 97626e0 fdcadd7 91e81b4 b036db9 717234d 91e81b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
import os
import time
import streamlit as st
from twilio.rest import Client
from pdfminer.high_level import extract_text
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer
import faiss
import numpy as np
import docx
from groq import Groq
import PyPDF2
import requests
# --- Auto-refresh every 10 seconds ---
if "last_refresh" not in st.session_state:
st.session_state.last_refresh = time.time()
elif time.time() - st.session_state.last_refresh > 10:
st.session_state.last_refresh = time.time()
st.experimental_rerun()
# --- Document Loaders ---
def extract_text_from_pdf(pdf_path):
try:
text = ""
with open(pdf_path, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
page_text = page.extract_text()
if page_text:
text += page_text
return text
except:
return extract_text(pdf_path)
def extract_text_from_docx(docx_path):
try:
doc = docx.Document(docx_path)
return '\n'.join(para.text for para in doc.paragraphs)
except:
return ""
def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
tokens = tokenizer.tokenize(text)
chunks, start = [], 0
while start < len(tokens):
end = min(start + chunk_size, len(tokens))
chunk_tokens = tokens[start:end]
chunks.append(tokenizer.convert_tokens_to_string(chunk_tokens))
start += chunk_size - chunk_overlap
return chunks
def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
question_embedding = embed_model.encode([question])[0]
D, I = index.search(np.array([question_embedding]), k)
return [text_chunks[i] for i in I[0]]
# --- GROQ Answer Generation ---
def generate_answer_with_groq(question, context, retries=3, delay=2):
url = "https://api.groq.com/openai/v1/chat/completions"
api_key = os.environ["GROQ_API_KEY"]
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
prompt = f"Thank you for reaching us out! Based on your question: '{question}'\n\n\n{context}"
payload = {
"model": "llama3-8b-8192",
"messages": [
{"role": "system", "content": "Hey there! I'm designed to respond just like a real person would. Ask me anything, and I'll do my best to give you a thoughtful and courteous answer."},
{"role": "user", "content": prompt},
],
"temperature": 0.5,
"max_tokens": 300,
}
for attempt in range(retries):
try:
response = requests.post(url, headers=headers, json=payload)
result = response.json()
return result['choices'][0]['message']['content'].strip()
except Exception as e:
if "503" in str(e) and attempt < retries - 1:
time.sleep(delay)
continue
else:
return f"β οΈ Groq API Error: {str(e)}"
# --- Twilio Chat Handlers ---
def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
client = Client(account_sid, auth_token)
messages = client.conversations.v1.conversations(conversation_sid).messages.list(limit=10)
for msg in reversed(messages):
if msg.author.startswith("whatsapp:"):
return msg.body, msg.author, msg.index
return None, None, None
def send_twilio_message(account_sid, auth_token, conversation_sid, body):
try:
client = Client(account_sid, auth_token)
message = client.conversations.v1.conversations(conversation_sid).messages.create(author="system", body=body)
return message.sid
except Exception as e:
return str(e)
# --- Streamlit UI ---
st.set_page_config(page_title="Quasa β A Smart WhatsApp Chatbot", layout="wide")
# Styling
st.markdown("""
<style>
.big-font {
font-size: 28px !important;
font-weight: bold;
}
.small-font {
font-size: 16px !important;
color: #555;
}
.stButton > button {
background-color: #0066CC;
color: white;
padding: 0.5em 1em;
border-radius: 8px;
font-size: 18px;
}
.stTextInput > div > input {
font-size: 16px;
}
</style>
""", unsafe_allow_html=True)
st.markdown('<div class="big-font">π± Quasa β A Smart WhatsApp Chatbot</div>', unsafe_allow_html=True)
st.markdown('<div class="small-font">Talk to your documents using WhatsApp. Powered by Groq, Twilio, and RAG.</div>', unsafe_allow_html=True)
# Load secrets
account_sid = st.secrets.get("TWILIO_SID")
auth_token = st.secrets.get("TWILIO_TOKEN")
GROQ_API_KEY = st.secrets.get("GROQ_API_KEY")
# Allow user input fallback
if not all([account_sid, auth_token, GROQ_API_KEY]):
st.warning("β οΈ Some secrets are missing. Please provide them manually:")
account_sid = st.text_input("Twilio SID", value=account_sid or "")
auth_token = st.text_input("Twilio Auth Token", type="password", value=auth_token or "")
GROQ_API_KEY = st.text_input("GROQ API Key", type="password", value=GROQ_API_KEY or "")
conversation_sid = st.text_input("Enter Twilio Conversation SID", value="")
if all([account_sid, auth_token, GROQ_API_KEY, conversation_sid]):
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
@st.cache_resource
def setup_knowledge_base():
folder_path = "docs"
all_text = ""
for file in os.listdir(folder_path):
if file.endswith(".pdf"):
all_text += extract_text_from_pdf(os.path.join(folder_path, file)) + "\n"
elif file.endswith((".docx", ".doc")):
all_text += extract_text_from_docx(os.path.join(folder_path, file)) + "\n"
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
chunks = chunk_text(all_text, tokenizer)
model = SentenceTransformer('all-mpnet-base-v2')
embeddings = model.encode(chunks)
dim = embeddings[0].shape[0]
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeddings))
return index, model, chunks
index, embedding_model, text_chunks = setup_knowledge_base()
st.success("β
Knowledge base ready. Monitoring WhatsApp messages...")
if "last_processed_index" not in st.session_state:
st.session_state.last_processed_index = -1
with st.spinner("Checking for new WhatsApp messages..."):
question, sender, msg_index = fetch_latest_incoming_message(account_sid, auth_token, conversation_sid)
if question and msg_index != st.session_state.last_processed_index:
st.session_state.last_processed_index = msg_index
st.info(f"π₯ New question from **{sender}**:\n\n> {question}")
relevant_chunks = retrieve_chunks(question, index, embedding_model, text_chunks)
context = "\n\n".join(relevant_chunks)
answer = generate_answer_with_groq(question, context)
send_twilio_message(account_sid, auth_token, conversation_sid, answer)
st.success("π€ Answer sent back to user on WhatsApp!")
st.markdown(f"### β¨ Answer:\n\n{answer}")
else:
st.warning("No new messages found.")
else:
st.warning("β Please provide all required credentials and conversation SID.")
|