Spaces:

nuseAI
/

fastAPIv2

Sleeping

App Files Files Community

fastAPIv2 / components /handlers /whatsapp_handlers.py

ragV98

eli5 response quality revamp

1d4183c 11 days ago

raw

history blame

12.3 kB

	# handlers/whatsapp_handlers.py
	import logging
	import os
	import re
	from typing import Optional, Dict

	from fastapi.responses import JSONResponse

	from components.gateways.headlines_to_wa import fetch_cached_headlines, send_to_whatsapp
	from components.LLMs.Mistral import MistralTogetherClient, build_messages

	# ------------------------------------------------------------
	# Utilities
	# ------------------------------------------------------------

	def _safe_send(text: str, to: str) -> dict:
	"""Wrap send_to_whatsapp with logging & safe error handling."""
	try:
	res = send_to_whatsapp(text, destination_number=to)
	if res.get("status") == "success":
	logging.info(f"Sent message to {to}")
	else:
	logging.error(f"Failed to send message to {to}: {res}")
	return res
	except Exception as e:
	logging.exception(f"Exception while sending WhatsApp message to {to}: {e}")
	return {"status": "error", "error": str(e)}


	# ------------------------------------------------------------
	# Headlines
	# ------------------------------------------------------------

	def handle_headlines(from_number: str) -> JSONResponse:
	full_message_text = fetch_cached_headlines()

	if full_message_text.startswith("❌") or full_message_text.startswith("⚠️"):
	logging.error(f"Failed to fetch digest for {from_number}: {full_message_text}")
	_safe_send(f"Sorry, I couldn't fetch the news digest today. {full_message_text}", to=from_number)
	return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to fetch digest"})

	result = _safe_send(full_message_text, to=from_number)
	if result.get("status") == "success":
	return JSONResponse(status_code=200, content={"status": "success", "message": "Digest sent"})
	else:
	_safe_send(
	f"Sorry, I couldn't send the news digest to you. Error: {result.get('error', 'unknown')}",
	to=from_number,
	)
	return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to send digest"})


	# ------------------------------------------------------------
	# Preferences / Greeting / Help / Unsubscribe / Small Talk
	# ------------------------------------------------------------

	def handle_preferences(from_number: str) -> JSONResponse:
	msg = (
	"Let’s tune your feed. Reply with topics you like:\n"
	"• world • india • finance • sports • entertainment\n\n"
	"You can send multiple, e.g.: india, finance"
	)
	_safe_send(msg, to=from_number)
	return JSONResponse(status_code=200, content={"status": "success", "message": "Preferences prompt sent"})


	def handle_greeting(from_number: str) -> JSONResponse:
	msg = (
	"Hey! 👋 I’m NuseAI.\n"
	"• Type headlines to get today’s digest.\n"
	"• Type preferences to set your topics.\n"
	"• Type help to see what I can do."
	)
	_safe_send(msg, to=from_number)
	return JSONResponse(status_code=200, content={"status": "success", "message": "Greeting sent"})


	def handle_help(from_number: str) -> JSONResponse:
	msg = (
	"Here’s how I can help:\n"
	"• Headlines — today’s 🗞️ Daily Digest 🟡\n"
	"• Preferences — choose topics/regions\n"
	"• Unsubscribe — stop messages\n"
	"Ask me a question anytime (e.g., “What’s India’s CPI outlook?”)."
	)
	_safe_send(msg, to=from_number)
	return JSONResponse(status_code=200, content={"status": "success", "message": "Help sent"})


	def handle_unsubscribe(from_number: str) -> JSONResponse:
	_safe_send("You’re unsubscribed. If you change your mind, just say hi.", to=from_number)
	return JSONResponse(status_code=200, content={"status": "success", "message": "Unsubscribed"})


	def handle_small_talk(from_number: str) -> JSONResponse:
	_safe_send("🙂 Got it. If you’d like the news, just say headlines.", to=from_number)
	return JSONResponse(status_code=200, content={"status": "success", "message": "Small talk"})


	# ------------------------------------------------------------
	# Chat Question → “Explain by number” flow (structured + quality-guarded)
	# ------------------------------------------------------------

	_HEADLINE_LINE_RE = re.compile(r"^\s(\d+)\.\s+(.)$")

	def _extract_number_ref(text: str) -> Optional[int]:
	"""
	Find a referenced headline number in free text, e.g.:
	'explain number 14', 'no. 7 please', '#9', '14', 'explain 14 like I am 5'
	Returns int or None.
	"""
	s = (text or "").lower()

	# explicit forms
	m = re.search(r"(?:number\|no\.?\|num\|#)\s*(\d+)", s)
	if m:
	return int(m.group(1))

	# a bare number (avoid picking up years like 2025; cap at 1..200)
	m2 = re.search(r"\b(\d{1,3})\b", s)
	if m2:
	n = int(m2.group(1))
	if 1 <= n <= 200:
	return n

	return None


	def _parse_rendered_digest(rendered: str) -> Dict[int, str]:
	"""
	Parse the same rendered digest string you send on WhatsApp and build a map:
	{ number -> headline_line_text }
	"""
	mapping: Dict[int, str] = {}
	for line in (rendered or "").splitlines():
	m = _HEADLINE_LINE_RE.match(line)
	if not m:
	continue
	num = int(m.group(1))
	headline_txt = m.group(2).strip()
	mapping[num] = headline_txt
	return mapping


	def _retrieve_context_for_headline(headline_text: str, top_k: int = 15) -> str:
	"""
	Use the vector index to pull contextual passages related to the headline.
	- Uses a higher top_k to widen coverage (quality over speed).
	- Gracefully degrades if index is unavailable or not yet built.
	"""
	# Defer the import so a missing/invalid index module won't break imports
	try:
	from components.indexers.news_indexer import load_news_index # type: ignore
	except Exception as e:
	logging.warning(f"Index module not available yet: {e}")
	return ""

	# Try to load the index; if persist_dir is wrong/missing, swallow and return ""
	try:
	index = load_news_index()
	try:
	# LlamaIndex v0.10+
	qe = index.as_query_engine(similarity_top_k=top_k)
	except Exception:
	# Older API fallback
	from llama_index.core.query_engine import RetrievalQueryEngine # type: ignore
	qe = RetrievalQueryEngine(index=index, similarity_top_k=top_k)

	query = (
	"Retrieve concise, factual context that best explains this headline:\n"
	f"{headline_text}\n"
	"Focus on who/what/when/where/why, include crucial numbers, avoid speculation."
	)
	resp = qe.query(query)
	return str(resp)
	except Exception as e:
	# Avoid noisy tracebacks in normal operation; index may simply not exist yet
	persist_dir = os.getenv("NEWS_INDEX_PERSIST_DIR") or os.getenv("PERSIST_DIR") or "<unset>"
	logging.warning(f"Vector retrieval skipped (no index at {persist_dir}): {e}")
	return ""


	def _eli5_answer_structured(question: str, context: str, headline_only: Optional[str] = None) -> str:
	"""
	Generate a structured, quality-guarded ELI5 answer.
	Format:
	Headline #N — <short title>
	Key points:
	• ...
	• ...
	Numbers & facts:
	• ...
	Why it matters:
	• ...
	Caveats:
	• ...
	Confidence: High/Medium/Low

	Rules:
	- 120–180 words total.
	- Use ONLY the provided context/headline; if missing, write “Not in context”.
	- No speculation; keep neutral tone; be brief.
	"""
	sys_prompt = (
	"You are a rigorous, concise explainer for a news assistant. "
	"Produce clear, structured outputs with bullet points. "
	"If any detail is not present in context, write 'Not in context'. "
	"Avoid flowery language; be factual and neutral."
	)

	if context.strip():
	user_prompt = (
	f"QUESTION:\n{question}\n\n"
	f"CONTEXT (may be partial, use ONLY this):\n{context}\n\n"
	"Write 120–180 words in this exact structure:\n"
	"Headline:\n"
	"Key points:\n"
	"• ...\n• ...\n• ...\n"
	"Numbers & facts:\n"
	"• ...\n• ...\n"
	"Why it matters:\n"
	"• ...\n"
	"Caveats:\n"
	"• ...\n"
	"Confidence: High \| Medium \| Low\n"
	"Rules:\n"
	"- If you can't find a detail in CONTEXT, write 'Not in context'.\n"
	"- Do NOT add sources or links unless they appear in CONTEXT.\n"
	"- Keep it short, precise, and neutral.\n"
	)
	else:
	# fallback: rely on the headline only
	headline_text = headline_only or question
	user_prompt = (
	"CONTEXT is empty. You must base the answer ONLY on the HEADLINE below; "
	"write 'Not in context' for any missing specifics.\n\n"
	f"HEADLINE:\n{headline_text}\n\n"
	"Write 90–140 words in this exact structure:\n"
	"Headline:\n"
	"Key points:\n"
	"• ...\n• ...\n"
	"Numbers & facts:\n"
	"• Not in context\n"
	"Why it matters:\n"
	"• ...\n"
	"Caveats:\n"
	"• Limited details available\n"
	"Confidence: Low\n"
	)

	try:
	llm = MistralTogetherClient()
	msgs = build_messages(user_prompt, sys_prompt)
	out, _usage = llm.chat(msgs, temperature=0.2, max_tokens=400)
	return out.strip()
	except Exception as e:
	logging.exception(f"Mistral structured ELI5 generation failed: {e}")
	return (
	"Headline:\n"
	"Key points:\n"
	"• I couldn’t generate an explanation right now.\n"
	"Numbers & facts:\n"
	"• Not in context\n"
	"Why it matters:\n"
	"• Not in context\n"
	"Caveats:\n"
	"• System error\n"
	"Confidence: Low"
	)


	def handle_chat_question(from_number: str, message_text: str) -> JSONResponse:
	"""
	Smart handler:
	- If the user references a headline number (“explain 14 like I’m 5”),
	1) Parse the number
	2) Look up that numbered line from the rendered digest
	3) Retrieve vector context (top_k widened for coverage)
	4) Generate a STRUCTURED ELI5 answer (with quality guardrails)
	- Otherwise, provide a gentle hint (for now).
	"""
	logging.info(f"Chat question from {from_number}: {message_text}")

	# 1) Try to find a headline number reference
	number = _extract_number_ref(message_text or "")
	if number is not None:
	# 2) Load rendered digest and map numbers to lines
	rendered = fetch_cached_headlines()
	mapping = _parse_rendered_digest(rendered)
	target_line = mapping.get(number)

	if not target_line:
	_safe_send(
	f"I couldn’t find headline {number} in today’s digest. "
	"Try another number or say headlines to see today’s list.",
	to=from_number,
	)
	return JSONResponse(status_code=200, content={"status": "success", "message": "Number not found"})

	# 3) Retrieve broader context from the vector index using the headline line
	ctx = _retrieve_context_for_headline(target_line, top_k=15)

	# 4) Generate STRUCTURED ELI5 answer (works even if ctx == "")
	question = f"Explain headline #{number}: {target_line}"
	answer = _eli5_answer_structured(question, ctx, headline_only=target_line)

	# 5) Send back
	_safe_send(answer, to=from_number)
	return JSONResponse(status_code=200, content={"status": "success", "message": "ELI5 sent"})

	# No number found → for now, guide the user
	_safe_send(
	"Ask me about a specific headline by number, e.g., explain 7 like I’m 5.\n"
	"Or type headlines for today’s digest.",
	to=from_number,
	)
	return JSONResponse(status_code=200, content={"status": "success", "message": "Generic reply"})