Spaces:

nuseAI
/

fastAPIv2

Sleeping

File size: 12,277 Bytes

# handlers/whatsapp_handlers.py
import logging
import os
import re
from typing import Optional, Dict

from fastapi.responses import JSONResponse

from components.gateways.headlines_to_wa import fetch_cached_headlines, send_to_whatsapp
from components.LLMs.Mistral import MistralTogetherClient, build_messages

# ------------------------------------------------------------
# Utilities
# ------------------------------------------------------------

def _safe_send(text: str, to: str) -> dict:
    """Wrap send_to_whatsapp with logging & safe error handling."""
    try:
        res = send_to_whatsapp(text, destination_number=to)
        if res.get("status") == "success":
            logging.info(f"Sent message to {to}")
        else:
            logging.error(f"Failed to send message to {to}: {res}")
        return res
    except Exception as e:
        logging.exception(f"Exception while sending WhatsApp message to {to}: {e}")
        return {"status": "error", "error": str(e)}


# ------------------------------------------------------------
# Headlines
# ------------------------------------------------------------

def handle_headlines(from_number: str) -> JSONResponse:
    full_message_text = fetch_cached_headlines()

    if full_message_text.startswith("❌") or full_message_text.startswith("⚠️"):
        logging.error(f"Failed to fetch digest for {from_number}: {full_message_text}")
        _safe_send(f"Sorry, I couldn't fetch the news digest today. {full_message_text}", to=from_number)
        return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to fetch digest"})

    result = _safe_send(full_message_text, to=from_number)
    if result.get("status") == "success":
        return JSONResponse(status_code=200, content={"status": "success", "message": "Digest sent"})
    else:
        _safe_send(
            f"Sorry, I couldn't send the news digest to you. Error: {result.get('error', 'unknown')}",
            to=from_number,
        )
        return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to send digest"})


# ------------------------------------------------------------
# Preferences / Greeting / Help / Unsubscribe / Small Talk
# ------------------------------------------------------------

def handle_preferences(from_number: str) -> JSONResponse:
    msg = (
        "Let’s tune your feed. Reply with topics you like:\n"
        "• world • india • finance • sports • entertainment\n\n"
        "You can send multiple, e.g.: india, finance"
    )
    _safe_send(msg, to=from_number)
    return JSONResponse(status_code=200, content={"status": "success", "message": "Preferences prompt sent"})


def handle_greeting(from_number: str) -> JSONResponse:
    msg = (
        "Hey! 👋 I’m NuseAI.\n"
        "• Type *headlines* to get today’s digest.\n"
        "• Type *preferences* to set your topics.\n"
        "• Type *help* to see what I can do."
    )
    _safe_send(msg, to=from_number)
    return JSONResponse(status_code=200, content={"status": "success", "message": "Greeting sent"})


def handle_help(from_number: str) -> JSONResponse:
    msg = (
        "Here’s how I can help:\n"
        "• *Headlines* — today’s 🗞️ Daily Digest 🟡\n"
        "• *Preferences* — choose topics/regions\n"
        "• *Unsubscribe* — stop messages\n"
        "Ask me a question anytime (e.g., “What’s India’s CPI outlook?”)."
    )
    _safe_send(msg, to=from_number)
    return JSONResponse(status_code=200, content={"status": "success", "message": "Help sent"})


def handle_unsubscribe(from_number: str) -> JSONResponse:
    _safe_send("You’re unsubscribed. If you change your mind, just say *hi*.", to=from_number)
    return JSONResponse(status_code=200, content={"status": "success", "message": "Unsubscribed"})


def handle_small_talk(from_number: str) -> JSONResponse:
    _safe_send("🙂 Got it. If you’d like the news, just say *headlines*.", to=from_number)
    return JSONResponse(status_code=200, content={"status": "success", "message": "Small talk"})


# ------------------------------------------------------------
# Chat Question → “Explain by number” flow (structured + quality-guarded)
# ------------------------------------------------------------

_HEADLINE_LINE_RE = re.compile(r"^\s*(\d+)\.\s+(.*)$")

def _extract_number_ref(text: str) -> Optional[int]:
    """
    Find a referenced headline number in free text, e.g.:
    'explain number 14', 'no. 7 please', '#9', '14', 'explain 14 like I am 5'
    Returns int or None.
    """
    s = (text or "").lower()

    # explicit forms
    m = re.search(r"(?:number|no\.?|num|#)\s*(\d+)", s)
    if m:
        return int(m.group(1))

    # a bare number (avoid picking up years like 2025; cap at 1..200)
    m2 = re.search(r"\b(\d{1,3})\b", s)
    if m2:
        n = int(m2.group(1))
        if 1 <= n <= 200:
            return n

    return None


def _parse_rendered_digest(rendered: str) -> Dict[int, str]:
    """
    Parse the same rendered digest string you send on WhatsApp and build a map:
    { number -> headline_line_text }
    """
    mapping: Dict[int, str] = {}
    for line in (rendered or "").splitlines():
        m = _HEADLINE_LINE_RE.match(line)
        if not m:
            continue
        num = int(m.group(1))
        headline_txt = m.group(2).strip()
        mapping[num] = headline_txt
    return mapping


def _retrieve_context_for_headline(headline_text: str, top_k: int = 15) -> str:
    """
    Use the vector index to pull contextual passages related to the headline.
    - Uses a higher top_k to widen coverage (quality over speed).
    - Gracefully degrades if index is unavailable or not yet built.
    """
    # Defer the import so a missing/invalid index module won't break imports
    try:
        from components.indexers.news_indexer import load_news_index  # type: ignore
    except Exception as e:
        logging.warning(f"Index module not available yet: {e}")
        return ""

    # Try to load the index; if persist_dir is wrong/missing, swallow and return ""
    try:
        index = load_news_index()
        try:
            # LlamaIndex v0.10+
            qe = index.as_query_engine(similarity_top_k=top_k)
        except Exception:
            # Older API fallback
            from llama_index.core.query_engine import RetrievalQueryEngine  # type: ignore
            qe = RetrievalQueryEngine(index=index, similarity_top_k=top_k)

        query = (
            "Retrieve concise, factual context that best explains this headline:\n"
            f"{headline_text}\n"
            "Focus on who/what/when/where/why, include crucial numbers, avoid speculation."
        )
        resp = qe.query(query)
        return str(resp)
    except Exception as e:
        # Avoid noisy tracebacks in normal operation; index may simply not exist yet
        persist_dir = os.getenv("NEWS_INDEX_PERSIST_DIR") or os.getenv("PERSIST_DIR") or "<unset>"
        logging.warning(f"Vector retrieval skipped (no index at {persist_dir}): {e}")
        return ""


def _eli5_answer_structured(question: str, context: str, headline_only: Optional[str] = None) -> str:
    """
    Generate a structured, quality-guarded ELI5 answer.
    Format:
    Headline #N — <short title>
    Key points:
    • ...
    • ...
    Numbers & facts:
    • ...
    Why it matters:
    • ...
    Caveats:
    • ...
    Confidence: High/Medium/Low

    Rules:
    - 120–180 words total.
    - Use ONLY the provided context/headline; if missing, write “Not in context”.
    - No speculation; keep neutral tone; be brief.
    """
    sys_prompt = (
        "You are a rigorous, concise explainer for a news assistant. "
        "Produce clear, structured outputs with bullet points. "
        "If any detail is not present in context, write 'Not in context'. "
        "Avoid flowery language; be factual and neutral."
    )

    if context.strip():
        user_prompt = (
            f"QUESTION:\n{question}\n\n"
            f"CONTEXT (may be partial, use ONLY this):\n{context}\n\n"
            "Write 120–180 words in this exact structure:\n"
            "Headline:\n"
            "Key points:\n"
            "• ...\n• ...\n• ...\n"
            "Numbers & facts:\n"
            "• ...\n• ...\n"
            "Why it matters:\n"
            "• ...\n"
            "Caveats:\n"
            "• ...\n"
            "Confidence: High | Medium | Low\n"
            "Rules:\n"
            "- If you can't find a detail in CONTEXT, write 'Not in context'.\n"
            "- Do NOT add sources or links unless they appear in CONTEXT.\n"
            "- Keep it short, precise, and neutral.\n"
        )
    else:
        # fallback: rely on the headline only
        headline_text = headline_only or question
        user_prompt = (
            "CONTEXT is empty. You must base the answer ONLY on the HEADLINE below; "
            "write 'Not in context' for any missing specifics.\n\n"
            f"HEADLINE:\n{headline_text}\n\n"
            "Write 90–140 words in this exact structure:\n"
            "Headline:\n"
            "Key points:\n"
            "• ...\n• ...\n"
            "Numbers & facts:\n"
            "• Not in context\n"
            "Why it matters:\n"
            "• ...\n"
            "Caveats:\n"
            "• Limited details available\n"
            "Confidence: Low\n"
        )

    try:
        llm = MistralTogetherClient()
        msgs = build_messages(user_prompt, sys_prompt)
        out, _usage = llm.chat(msgs, temperature=0.2, max_tokens=400)
        return out.strip()
    except Exception as e:
        logging.exception(f"Mistral structured ELI5 generation failed: {e}")
        return (
            "Headline:\n"
            "Key points:\n"
            "• I couldn’t generate an explanation right now.\n"
            "Numbers & facts:\n"
            "• Not in context\n"
            "Why it matters:\n"
            "• Not in context\n"
            "Caveats:\n"
            "• System error\n"
            "Confidence: Low"
        )


def handle_chat_question(from_number: str, message_text: str) -> JSONResponse:
    """
    Smart handler:
      - If the user references a headline number (“explain 14 like I’m 5”),
        1) Parse the number
        2) Look up that numbered line from the rendered digest
        3) Retrieve vector context (top_k widened for coverage)
        4) Generate a STRUCTURED ELI5 answer (with quality guardrails)
      - Otherwise, provide a gentle hint (for now).
    """
    logging.info(f"Chat question from {from_number}: {message_text}")

    # 1) Try to find a headline number reference
    number = _extract_number_ref(message_text or "")
    if number is not None:
        # 2) Load rendered digest and map numbers to lines
        rendered = fetch_cached_headlines()
        mapping = _parse_rendered_digest(rendered)
        target_line = mapping.get(number)

        if not target_line:
            _safe_send(
                f"I couldn’t find headline *{number}* in today’s digest. "
                "Try another number or say *headlines* to see today’s list.",
                to=from_number,
            )
            return JSONResponse(status_code=200, content={"status": "success", "message": "Number not found"})

        # 3) Retrieve broader context from the vector index using the headline line
        ctx = _retrieve_context_for_headline(target_line, top_k=15)

        # 4) Generate STRUCTURED ELI5 answer (works even if ctx == "")
        question = f"Explain headline #{number}: {target_line}"
        answer = _eli5_answer_structured(question, ctx, headline_only=target_line)

        # 5) Send back
        _safe_send(answer, to=from_number)
        return JSONResponse(status_code=200, content={"status": "success", "message": "ELI5 sent"})

    # No number found → for now, guide the user
    _safe_send(
        "Ask me about a specific headline by number, e.g., *explain 7 like I’m 5*.\n"
        "Or type *headlines* for today’s digest.",
        to=from_number,
    )
    return JSONResponse(status_code=200, content={"status": "success", "message": "Generic reply"})