Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -50,19 +50,72 @@ def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
|
|
50 |
D, I = index.search(np.array([question_embedding]), k)
|
51 |
return [text_chunks[i] for i in I[0]]
|
52 |
|
53 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
def generate_answer_with_groq(question, context, retries=3, delay=2):
|
55 |
url = "https://api.groq.com/openai/v1/chat/completions"
|
56 |
-
api_key = os.environ
|
|
|
|
|
|
|
57 |
headers = {
|
58 |
"Authorization": f"Bearer {api_key}",
|
59 |
"Content-Type": "application/json",
|
60 |
}
|
61 |
-
prompt =
|
|
|
|
|
|
|
|
|
62 |
payload = {
|
63 |
"model": "llama3-8b-8192",
|
64 |
"messages": [
|
65 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
{"role": "user", "content": prompt},
|
67 |
],
|
68 |
"temperature": 0.5,
|
@@ -71,15 +124,18 @@ def generate_answer_with_groq(question, context, retries=3, delay=2):
|
|
71 |
|
72 |
for attempt in range(retries):
|
73 |
try:
|
74 |
-
response = requests.post(url, headers=headers, json=payload)
|
|
|
75 |
result = response.json()
|
76 |
return result['choices'][0]['message']['content'].strip()
|
77 |
-
except
|
78 |
-
if
|
79 |
time.sleep(delay)
|
80 |
continue
|
81 |
else:
|
82 |
-
return f"⚠️ Groq API
|
|
|
|
|
83 |
|
84 |
# --- Twilio Chat Handlers ---
|
85 |
def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
|
|
|
50 |
D, I = index.search(np.array([question_embedding]), k)
|
51 |
return [text_chunks[i] for i in I[0]]
|
52 |
|
53 |
+
# Extract text from PDF with fallback
|
54 |
+
# --- Document Loaders ---
|
55 |
+
def extract_text_from_pdf(pdf_path):
|
56 |
+
try:
|
57 |
+
text = ""
|
58 |
+
with open(pdf_path, 'rb') as file:
|
59 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
60 |
+
for page_num in range(len(pdf_reader.pages)):
|
61 |
+
page = pdf_reader.pages[page_num]
|
62 |
+
page_text = page.extract_text()
|
63 |
+
if page_text:
|
64 |
+
text += page_text
|
65 |
+
return text
|
66 |
+
except:
|
67 |
+
return extract_text(pdf_path)
|
68 |
+
|
69 |
+
def extract_text_from_docx(docx_path):
|
70 |
+
try:
|
71 |
+
doc = docx.Document(docx_path)
|
72 |
+
return '\n'.join(para.text for para in doc.paragraphs)
|
73 |
+
except:
|
74 |
+
return ""
|
75 |
+
|
76 |
+
def chunk_text(text, tokenizer, chunk_size=150, chunk_overlap=30):
|
77 |
+
tokens = tokenizer.tokenize(text)
|
78 |
+
chunks, start = [], 0
|
79 |
+
while start < len(tokens):
|
80 |
+
end = min(start + chunk_size, len(tokens))
|
81 |
+
chunk_tokens = tokens[start:end]
|
82 |
+
chunks.append(tokenizer.convert_tokens_to_string(chunk_tokens))
|
83 |
+
start += chunk_size - chunk_overlap
|
84 |
+
return chunks
|
85 |
+
|
86 |
+
def retrieve_chunks(question, index, embed_model, text_chunks, k=3):
|
87 |
+
question_embedding = embed_model.encode([question])[0]
|
88 |
+
D, I = index.search(np.array([question_embedding]), k)
|
89 |
+
return [text_chunks[i] for i in I[0]]
|
90 |
+
|
91 |
+
# Generate answer using Groq API with retries and timeout
|
92 |
def generate_answer_with_groq(question, context, retries=3, delay=2):
|
93 |
url = "https://api.groq.com/openai/v1/chat/completions"
|
94 |
+
api_key = os.environ.get("GROQ_API_KEY")
|
95 |
+
if not api_key:
|
96 |
+
return "⚠️ GROQ_API_KEY not set."
|
97 |
+
|
98 |
headers = {
|
99 |
"Authorization": f"Bearer {api_key}",
|
100 |
"Content-Type": "application/json",
|
101 |
}
|
102 |
+
prompt = (
|
103 |
+
f"Customer asked: '{question}'\n\n"
|
104 |
+
f"Here is the relevant product or policy info to help:\n{context}\n\n"
|
105 |
+
f"Respond in a friendly and helpful tone as a toy shop support agent."
|
106 |
+
)
|
107 |
payload = {
|
108 |
"model": "llama3-8b-8192",
|
109 |
"messages": [
|
110 |
+
{
|
111 |
+
"role": "system",
|
112 |
+
"content": (
|
113 |
+
"You are ToyBot, a friendly and helpful WhatsApp assistant for an online toy shop. "
|
114 |
+
"Your goal is to politely answer customer questions, help them choose the right toys, "
|
115 |
+
"provide order or delivery information, explain return policies, and guide them through purchases. "
|
116 |
+
"Always sound warm, helpful, and trustworthy like a professional customer support agent."
|
117 |
+
)
|
118 |
+
},
|
119 |
{"role": "user", "content": prompt},
|
120 |
],
|
121 |
"temperature": 0.5,
|
|
|
124 |
|
125 |
for attempt in range(retries):
|
126 |
try:
|
127 |
+
response = requests.post(url, headers=headers, json=payload, timeout=10)
|
128 |
+
response.raise_for_status()
|
129 |
result = response.json()
|
130 |
return result['choices'][0]['message']['content'].strip()
|
131 |
+
except requests.exceptions.HTTPError as e:
|
132 |
+
if response.status_code == 503 and attempt < retries - 1:
|
133 |
time.sleep(delay)
|
134 |
continue
|
135 |
else:
|
136 |
+
return f"⚠️ Groq API HTTPError: {e}"
|
137 |
+
except Exception as e:
|
138 |
+
return f"⚠️ Groq API Error: {e}"
|
139 |
|
140 |
# --- Twilio Chat Handlers ---
|
141 |
def fetch_latest_incoming_message(account_sid, auth_token, conversation_sid):
|