# handlers/whatsapp_handlers.py import logging import os import re from typing import Optional, Dict from fastapi.responses import JSONResponse from components.gateways.headlines_to_wa import fetch_cached_headlines, send_to_whatsapp from components.LLMs.Mistral import MistralTogetherClient, build_messages # ------------------------------------------------------------ # Utilities # ------------------------------------------------------------ def _safe_send(text: str, to: str) -> dict: """Wrap send_to_whatsapp with logging & safe error handling.""" try: res = send_to_whatsapp(text, destination_number=to) if res.get("status") == "success": logging.info(f"Sent message to {to}") else: logging.error(f"Failed to send message to {to}: {res}") return res except Exception as e: logging.exception(f"Exception while sending WhatsApp message to {to}: {e}") return {"status": "error", "error": str(e)} # ------------------------------------------------------------ # Headlines # ------------------------------------------------------------ def handle_headlines(from_number: str) -> JSONResponse: full_message_text = fetch_cached_headlines() if full_message_text.startswith("❌") or full_message_text.startswith("⚠️"): logging.error(f"Failed to fetch digest for {from_number}: {full_message_text}") _safe_send(f"Sorry, I couldn't fetch the news digest today. {full_message_text}", to=from_number) return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to fetch digest"}) result = _safe_send(full_message_text, to=from_number) if result.get("status") == "success": return JSONResponse(status_code=200, content={"status": "success", "message": "Digest sent"}) else: _safe_send( f"Sorry, I couldn't send the news digest to you. Error: {result.get('error', 'unknown')}", to=from_number, ) return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to send digest"}) # ------------------------------------------------------------ # Preferences / Greeting / Help / Unsubscribe / Small Talk # ------------------------------------------------------------ def handle_preferences(from_number: str) -> JSONResponse: msg = ( "Let’s tune your feed. Reply with topics you like:\n" "• world • india • finance • sports • entertainment\n\n" "You can send multiple, e.g.: india, finance" ) _safe_send(msg, to=from_number) return JSONResponse(status_code=200, content={"status": "success", "message": "Preferences prompt sent"}) def handle_greeting(from_number: str) -> JSONResponse: msg = ( "Hey! 👋 I’m NuseAI.\n" "• Type *headlines* to get today’s digest.\n" "• Type *preferences* to set your topics.\n" "• Type *help* to see what I can do." ) _safe_send(msg, to=from_number) return JSONResponse(status_code=200, content={"status": "success", "message": "Greeting sent"}) def handle_help(from_number: str) -> JSONResponse: msg = ( "Here’s how I can help:\n" "• *Headlines* — today’s 🗞️ Daily Digest 🟡\n" "• *Preferences* — choose topics/regions\n" "• *Unsubscribe* — stop messages\n" "Ask me a question anytime (e.g., “What’s India’s CPI outlook?”)." ) _safe_send(msg, to=from_number) return JSONResponse(status_code=200, content={"status": "success", "message": "Help sent"}) def handle_unsubscribe(from_number: str) -> JSONResponse: _safe_send("You’re unsubscribed. If you change your mind, just say *hi*.", to=from_number) return JSONResponse(status_code=200, content={"status": "success", "message": "Unsubscribed"}) def handle_small_talk(from_number: str) -> JSONResponse: _safe_send("🙂 Got it. If you’d like the news, just say *headlines*.", to=from_number) return JSONResponse(status_code=200, content={"status": "success", "message": "Small talk"}) # ------------------------------------------------------------ # Chat Question → “Explain by number” flow (structured + quality-guarded) # ------------------------------------------------------------ _HEADLINE_LINE_RE = re.compile(r"^\s*(\d+)\.\s+(.*)$") def _extract_number_ref(text: str) -> Optional[int]: """ Find a referenced headline number in free text, e.g.: 'explain number 14', 'no. 7 please', '#9', '14', 'explain 14 like I am 5' Returns int or None. """ s = (text or "").lower() # explicit forms m = re.search(r"(?:number|no\.?|num|#)\s*(\d+)", s) if m: return int(m.group(1)) # a bare number (avoid picking up years like 2025; cap at 1..200) m2 = re.search(r"\b(\d{1,3})\b", s) if m2: n = int(m2.group(1)) if 1 <= n <= 200: return n return None def _parse_rendered_digest(rendered: str) -> Dict[int, str]: """ Parse the same rendered digest string you send on WhatsApp and build a map: { number -> headline_line_text } """ mapping: Dict[int, str] = {} for line in (rendered or "").splitlines(): m = _HEADLINE_LINE_RE.match(line) if not m: continue num = int(m.group(1)) headline_txt = m.group(2).strip() mapping[num] = headline_txt return mapping def _retrieve_context_for_headline(headline_text: str, top_k: int = 15) -> str: """ Use the vector index to pull contextual passages related to the headline. - Uses a higher top_k to widen coverage (quality over speed). - Gracefully degrades if index is unavailable or not yet built. """ # Defer the import so a missing/invalid index module won't break imports try: from components.indexers.news_indexer import load_news_index # type: ignore except Exception as e: logging.warning(f"Index module not available yet: {e}") return "" # Try to load the index; if persist_dir is wrong/missing, swallow and return "" try: index = load_news_index() try: # LlamaIndex v0.10+ qe = index.as_query_engine(similarity_top_k=top_k) except Exception: # Older API fallback from llama_index.core.query_engine import RetrievalQueryEngine # type: ignore qe = RetrievalQueryEngine(index=index, similarity_top_k=top_k) query = ( "Retrieve concise, factual context that best explains this headline:\n" f"{headline_text}\n" "Focus on who/what/when/where/why, include crucial numbers, avoid speculation." ) resp = qe.query(query) return str(resp) except Exception as e: # Avoid noisy tracebacks in normal operation; index may simply not exist yet persist_dir = os.getenv("NEWS_INDEX_PERSIST_DIR") or os.getenv("PERSIST_DIR") or "" logging.warning(f"Vector retrieval skipped (no index at {persist_dir}): {e}") return "" def _eli5_answer_structured(question: str, context: str, headline_only: Optional[str] = None) -> str: """ Generate a structured, quality-guarded ELI5 answer. Format: Headline #N — Key points: • ... • ... Numbers & facts: • ... Why it matters: • ... Caveats: • ... Confidence: High/Medium/Low Rules: - 120–180 words total. - Use ONLY the provided context/headline; if missing, write “Not in context”. - No speculation; keep neutral tone; be brief. """ sys_prompt = ( "You are a rigorous, concise explainer for a news assistant. " "Produce clear, structured outputs with bullet points. " "If any detail is not present in context, write 'Not in context'. " "Avoid flowery language; be factual and neutral." ) if context.strip(): user_prompt = ( f"QUESTION:\n{question}\n\n" f"CONTEXT (may be partial, use ONLY this):\n{context}\n\n" "Write 120–180 words in this exact structure:\n" "Headline:\n" "Key points:\n" "• ...\n• ...\n• ...\n" "Numbers & facts:\n" "• ...\n• ...\n" "Why it matters:\n" "• ...\n" "Caveats:\n" "• ...\n" "Confidence: High | Medium | Low\n" "Rules:\n" "- If you can't find a detail in CONTEXT, write 'Not in context'.\n" "- Do NOT add sources or links unless they appear in CONTEXT.\n" "- Keep it short, precise, and neutral.\n" ) else: # fallback: rely on the headline only headline_text = headline_only or question user_prompt = ( "CONTEXT is empty. You must base the answer ONLY on the HEADLINE below; " "write 'Not in context' for any missing specifics.\n\n" f"HEADLINE:\n{headline_text}\n\n" "Write 90–140 words in this exact structure:\n" "Headline:\n" "Key points:\n" "• ...\n• ...\n" "Numbers & facts:\n" "• Not in context\n" "Why it matters:\n" "• ...\n" "Caveats:\n" "• Limited details available\n" "Confidence: Low\n" ) try: llm = MistralTogetherClient() msgs = build_messages(user_prompt, sys_prompt) out, _usage = llm.chat(msgs, temperature=0.2, max_tokens=400) return out.strip() except Exception as e: logging.exception(f"Mistral structured ELI5 generation failed: {e}") return ( "Headline:\n" "Key points:\n" "• I couldn’t generate an explanation right now.\n" "Numbers & facts:\n" "• Not in context\n" "Why it matters:\n" "• Not in context\n" "Caveats:\n" "• System error\n" "Confidence: Low" ) def handle_chat_question(from_number: str, message_text: str) -> JSONResponse: """ Smart handler: - If the user references a headline number (“explain 14 like I’m 5”), 1) Parse the number 2) Look up that numbered line from the rendered digest 3) Retrieve vector context (top_k widened for coverage) 4) Generate a STRUCTURED ELI5 answer (with quality guardrails) - Otherwise, provide a gentle hint (for now). """ logging.info(f"Chat question from {from_number}: {message_text}") # 1) Try to find a headline number reference number = _extract_number_ref(message_text or "") if number is not None: # 2) Load rendered digest and map numbers to lines rendered = fetch_cached_headlines() mapping = _parse_rendered_digest(rendered) target_line = mapping.get(number) if not target_line: _safe_send( f"I couldn’t find headline *{number}* in today’s digest. " "Try another number or say *headlines* to see today’s list.", to=from_number, ) return JSONResponse(status_code=200, content={"status": "success", "message": "Number not found"}) # 3) Retrieve broader context from the vector index using the headline line ctx = _retrieve_context_for_headline(target_line, top_k=15) # 4) Generate STRUCTURED ELI5 answer (works even if ctx == "") question = f"Explain headline #{number}: {target_line}" answer = _eli5_answer_structured(question, ctx, headline_only=target_line) # 5) Send back _safe_send(answer, to=from_number) return JSONResponse(status_code=200, content={"status": "success", "message": "ELI5 sent"}) # No number found → for now, guide the user _safe_send( "Ask me about a specific headline by number, e.g., *explain 7 like I’m 5*.\n" "Or type *headlines* for today’s digest.", to=from_number, ) return JSONResponse(status_code=200, content={"status": "success", "message": "Generic reply"})