fastAPIv2 / components /handlers /whatsapp_handlers.py
ragV98's picture
eli5 response quality revamp
1d4183c
raw
history blame
12.3 kB
# handlers/whatsapp_handlers.py
import logging
import os
import re
from typing import Optional, Dict
from fastapi.responses import JSONResponse
from components.gateways.headlines_to_wa import fetch_cached_headlines, send_to_whatsapp
from components.LLMs.Mistral import MistralTogetherClient, build_messages
# ------------------------------------------------------------
# Utilities
# ------------------------------------------------------------
def _safe_send(text: str, to: str) -> dict:
"""Wrap send_to_whatsapp with logging & safe error handling."""
try:
res = send_to_whatsapp(text, destination_number=to)
if res.get("status") == "success":
logging.info(f"Sent message to {to}")
else:
logging.error(f"Failed to send message to {to}: {res}")
return res
except Exception as e:
logging.exception(f"Exception while sending WhatsApp message to {to}: {e}")
return {"status": "error", "error": str(e)}
# ------------------------------------------------------------
# Headlines
# ------------------------------------------------------------
def handle_headlines(from_number: str) -> JSONResponse:
full_message_text = fetch_cached_headlines()
if full_message_text.startswith("❌") or full_message_text.startswith("⚠️"):
logging.error(f"Failed to fetch digest for {from_number}: {full_message_text}")
_safe_send(f"Sorry, I couldn't fetch the news digest today. {full_message_text}", to=from_number)
return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to fetch digest"})
result = _safe_send(full_message_text, to=from_number)
if result.get("status") == "success":
return JSONResponse(status_code=200, content={"status": "success", "message": "Digest sent"})
else:
_safe_send(
f"Sorry, I couldn't send the news digest to you. Error: {result.get('error', 'unknown')}",
to=from_number,
)
return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to send digest"})
# ------------------------------------------------------------
# Preferences / Greeting / Help / Unsubscribe / Small Talk
# ------------------------------------------------------------
def handle_preferences(from_number: str) -> JSONResponse:
msg = (
"Let’s tune your feed. Reply with topics you like:\n"
"• world • india • finance • sports • entertainment\n\n"
"You can send multiple, e.g.: india, finance"
)
_safe_send(msg, to=from_number)
return JSONResponse(status_code=200, content={"status": "success", "message": "Preferences prompt sent"})
def handle_greeting(from_number: str) -> JSONResponse:
msg = (
"Hey! 👋 I’m NuseAI.\n"
"• Type *headlines* to get today’s digest.\n"
"• Type *preferences* to set your topics.\n"
"• Type *help* to see what I can do."
)
_safe_send(msg, to=from_number)
return JSONResponse(status_code=200, content={"status": "success", "message": "Greeting sent"})
def handle_help(from_number: str) -> JSONResponse:
msg = (
"Here’s how I can help:\n"
"• *Headlines* — today’s 🗞️ Daily Digest 🟡\n"
"• *Preferences* — choose topics/regions\n"
"• *Unsubscribe* — stop messages\n"
"Ask me a question anytime (e.g., “What’s India’s CPI outlook?”)."
)
_safe_send(msg, to=from_number)
return JSONResponse(status_code=200, content={"status": "success", "message": "Help sent"})
def handle_unsubscribe(from_number: str) -> JSONResponse:
_safe_send("You’re unsubscribed. If you change your mind, just say *hi*.", to=from_number)
return JSONResponse(status_code=200, content={"status": "success", "message": "Unsubscribed"})
def handle_small_talk(from_number: str) -> JSONResponse:
_safe_send("🙂 Got it. If you’d like the news, just say *headlines*.", to=from_number)
return JSONResponse(status_code=200, content={"status": "success", "message": "Small talk"})
# ------------------------------------------------------------
# Chat Question → “Explain by number” flow (structured + quality-guarded)
# ------------------------------------------------------------
_HEADLINE_LINE_RE = re.compile(r"^\s*(\d+)\.\s+(.*)$")
def _extract_number_ref(text: str) -> Optional[int]:
"""
Find a referenced headline number in free text, e.g.:
'explain number 14', 'no. 7 please', '#9', '14', 'explain 14 like I am 5'
Returns int or None.
"""
s = (text or "").lower()
# explicit forms
m = re.search(r"(?:number|no\.?|num|#)\s*(\d+)", s)
if m:
return int(m.group(1))
# a bare number (avoid picking up years like 2025; cap at 1..200)
m2 = re.search(r"\b(\d{1,3})\b", s)
if m2:
n = int(m2.group(1))
if 1 <= n <= 200:
return n
return None
def _parse_rendered_digest(rendered: str) -> Dict[int, str]:
"""
Parse the same rendered digest string you send on WhatsApp and build a map:
{ number -> headline_line_text }
"""
mapping: Dict[int, str] = {}
for line in (rendered or "").splitlines():
m = _HEADLINE_LINE_RE.match(line)
if not m:
continue
num = int(m.group(1))
headline_txt = m.group(2).strip()
mapping[num] = headline_txt
return mapping
def _retrieve_context_for_headline(headline_text: str, top_k: int = 15) -> str:
"""
Use the vector index to pull contextual passages related to the headline.
- Uses a higher top_k to widen coverage (quality over speed).
- Gracefully degrades if index is unavailable or not yet built.
"""
# Defer the import so a missing/invalid index module won't break imports
try:
from components.indexers.news_indexer import load_news_index # type: ignore
except Exception as e:
logging.warning(f"Index module not available yet: {e}")
return ""
# Try to load the index; if persist_dir is wrong/missing, swallow and return ""
try:
index = load_news_index()
try:
# LlamaIndex v0.10+
qe = index.as_query_engine(similarity_top_k=top_k)
except Exception:
# Older API fallback
from llama_index.core.query_engine import RetrievalQueryEngine # type: ignore
qe = RetrievalQueryEngine(index=index, similarity_top_k=top_k)
query = (
"Retrieve concise, factual context that best explains this headline:\n"
f"{headline_text}\n"
"Focus on who/what/when/where/why, include crucial numbers, avoid speculation."
)
resp = qe.query(query)
return str(resp)
except Exception as e:
# Avoid noisy tracebacks in normal operation; index may simply not exist yet
persist_dir = os.getenv("NEWS_INDEX_PERSIST_DIR") or os.getenv("PERSIST_DIR") or "<unset>"
logging.warning(f"Vector retrieval skipped (no index at {persist_dir}): {e}")
return ""
def _eli5_answer_structured(question: str, context: str, headline_only: Optional[str] = None) -> str:
"""
Generate a structured, quality-guarded ELI5 answer.
Format:
Headline #N — <short title>
Key points:
• ...
• ...
Numbers & facts:
• ...
Why it matters:
• ...
Caveats:
• ...
Confidence: High/Medium/Low
Rules:
- 120–180 words total.
- Use ONLY the provided context/headline; if missing, write “Not in context”.
- No speculation; keep neutral tone; be brief.
"""
sys_prompt = (
"You are a rigorous, concise explainer for a news assistant. "
"Produce clear, structured outputs with bullet points. "
"If any detail is not present in context, write 'Not in context'. "
"Avoid flowery language; be factual and neutral."
)
if context.strip():
user_prompt = (
f"QUESTION:\n{question}\n\n"
f"CONTEXT (may be partial, use ONLY this):\n{context}\n\n"
"Write 120–180 words in this exact structure:\n"
"Headline:\n"
"Key points:\n"
"• ...\n• ...\n• ...\n"
"Numbers & facts:\n"
"• ...\n• ...\n"
"Why it matters:\n"
"• ...\n"
"Caveats:\n"
"• ...\n"
"Confidence: High | Medium | Low\n"
"Rules:\n"
"- If you can't find a detail in CONTEXT, write 'Not in context'.\n"
"- Do NOT add sources or links unless they appear in CONTEXT.\n"
"- Keep it short, precise, and neutral.\n"
)
else:
# fallback: rely on the headline only
headline_text = headline_only or question
user_prompt = (
"CONTEXT is empty. You must base the answer ONLY on the HEADLINE below; "
"write 'Not in context' for any missing specifics.\n\n"
f"HEADLINE:\n{headline_text}\n\n"
"Write 90–140 words in this exact structure:\n"
"Headline:\n"
"Key points:\n"
"• ...\n• ...\n"
"Numbers & facts:\n"
"• Not in context\n"
"Why it matters:\n"
"• ...\n"
"Caveats:\n"
"• Limited details available\n"
"Confidence: Low\n"
)
try:
llm = MistralTogetherClient()
msgs = build_messages(user_prompt, sys_prompt)
out, _usage = llm.chat(msgs, temperature=0.2, max_tokens=400)
return out.strip()
except Exception as e:
logging.exception(f"Mistral structured ELI5 generation failed: {e}")
return (
"Headline:\n"
"Key points:\n"
"• I couldn’t generate an explanation right now.\n"
"Numbers & facts:\n"
"• Not in context\n"
"Why it matters:\n"
"• Not in context\n"
"Caveats:\n"
"• System error\n"
"Confidence: Low"
)
def handle_chat_question(from_number: str, message_text: str) -> JSONResponse:
"""
Smart handler:
- If the user references a headline number (“explain 14 like I’m 5”),
1) Parse the number
2) Look up that numbered line from the rendered digest
3) Retrieve vector context (top_k widened for coverage)
4) Generate a STRUCTURED ELI5 answer (with quality guardrails)
- Otherwise, provide a gentle hint (for now).
"""
logging.info(f"Chat question from {from_number}: {message_text}")
# 1) Try to find a headline number reference
number = _extract_number_ref(message_text or "")
if number is not None:
# 2) Load rendered digest and map numbers to lines
rendered = fetch_cached_headlines()
mapping = _parse_rendered_digest(rendered)
target_line = mapping.get(number)
if not target_line:
_safe_send(
f"I couldn’t find headline *{number}* in today’s digest. "
"Try another number or say *headlines* to see today’s list.",
to=from_number,
)
return JSONResponse(status_code=200, content={"status": "success", "message": "Number not found"})
# 3) Retrieve broader context from the vector index using the headline line
ctx = _retrieve_context_for_headline(target_line, top_k=15)
# 4) Generate STRUCTURED ELI5 answer (works even if ctx == "")
question = f"Explain headline #{number}: {target_line}"
answer = _eli5_answer_structured(question, ctx, headline_only=target_line)
# 5) Send back
_safe_send(answer, to=from_number)
return JSONResponse(status_code=200, content={"status": "success", "message": "ELI5 sent"})
# No number found → for now, guide the user
_safe_send(
"Ask me about a specific headline by number, e.g., *explain 7 like I’m 5*.\n"
"Or type *headlines* for today’s digest.",
to=from_number,
)
return JSONResponse(status_code=200, content={"status": "success", "message": "Generic reply"})