|
|
|
import logging |
|
import os |
|
import re |
|
from typing import Optional, Dict |
|
|
|
from fastapi.responses import JSONResponse |
|
|
|
from components.gateways.headlines_to_wa import fetch_cached_headlines, send_to_whatsapp |
|
from components.LLMs.Mistral import MistralTogetherClient, build_messages |
|
|
|
|
|
|
|
|
|
|
|
def _safe_send(text: str, to: str) -> dict: |
|
"""Wrap send_to_whatsapp with logging & safe error handling.""" |
|
try: |
|
res = send_to_whatsapp(text, destination_number=to) |
|
if res.get("status") == "success": |
|
logging.info(f"Sent message to {to}") |
|
else: |
|
logging.error(f"Failed to send message to {to}: {res}") |
|
return res |
|
except Exception as e: |
|
logging.exception(f"Exception while sending WhatsApp message to {to}: {e}") |
|
return {"status": "error", "error": str(e)} |
|
|
|
|
|
|
|
|
|
|
|
|
|
def handle_headlines(from_number: str) -> JSONResponse: |
|
full_message_text = fetch_cached_headlines() |
|
|
|
if full_message_text.startswith("❌") or full_message_text.startswith("⚠️"): |
|
logging.error(f"Failed to fetch digest for {from_number}: {full_message_text}") |
|
_safe_send(f"Sorry, I couldn't fetch the news digest today. {full_message_text}", to=from_number) |
|
return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to fetch digest"}) |
|
|
|
result = _safe_send(full_message_text, to=from_number) |
|
if result.get("status") == "success": |
|
return JSONResponse(status_code=200, content={"status": "success", "message": "Digest sent"}) |
|
else: |
|
_safe_send( |
|
f"Sorry, I couldn't send the news digest to you. Error: {result.get('error', 'unknown')}", |
|
to=from_number, |
|
) |
|
return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to send digest"}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
def handle_preferences(from_number: str) -> JSONResponse: |
|
msg = ( |
|
"Let’s tune your feed. Reply with topics you like:\n" |
|
"• world • india • finance • sports • entertainment\n\n" |
|
"You can send multiple, e.g.: india, finance" |
|
) |
|
_safe_send(msg, to=from_number) |
|
return JSONResponse(status_code=200, content={"status": "success", "message": "Preferences prompt sent"}) |
|
|
|
|
|
def handle_greeting(from_number: str) -> JSONResponse: |
|
msg = ( |
|
"Hey! 👋 I’m NuseAI.\n" |
|
"• Type *headlines* to get today’s digest.\n" |
|
"• Type *preferences* to set your topics.\n" |
|
"• Type *help* to see what I can do." |
|
) |
|
_safe_send(msg, to=from_number) |
|
return JSONResponse(status_code=200, content={"status": "success", "message": "Greeting sent"}) |
|
|
|
|
|
def handle_help(from_number: str) -> JSONResponse: |
|
msg = ( |
|
"Here’s how I can help:\n" |
|
"• *Headlines* — today’s 🗞️ Daily Digest 🟡\n" |
|
"• *Preferences* — choose topics/regions\n" |
|
"• *Unsubscribe* — stop messages\n" |
|
"Ask me a question anytime (e.g., “What’s India’s CPI outlook?”)." |
|
) |
|
_safe_send(msg, to=from_number) |
|
return JSONResponse(status_code=200, content={"status": "success", "message": "Help sent"}) |
|
|
|
|
|
def handle_unsubscribe(from_number: str) -> JSONResponse: |
|
_safe_send("You’re unsubscribed. If you change your mind, just say *hi*.", to=from_number) |
|
return JSONResponse(status_code=200, content={"status": "success", "message": "Unsubscribed"}) |
|
|
|
|
|
def handle_small_talk(from_number: str) -> JSONResponse: |
|
_safe_send("🙂 Got it. If you’d like the news, just say *headlines*.", to=from_number) |
|
return JSONResponse(status_code=200, content={"status": "success", "message": "Small talk"}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
_HEADLINE_LINE_RE = re.compile(r"^\s*(\d+)\.\s+(.*)$") |
|
|
|
def _extract_number_ref(text: str) -> Optional[int]: |
|
""" |
|
Find a referenced headline number in free text, e.g.: |
|
'explain number 14', 'no. 7 please', '#9', '14', 'explain 14 like I am 5' |
|
Returns int or None. |
|
""" |
|
s = (text or "").lower() |
|
|
|
|
|
m = re.search(r"(?:number|no\.?|num|#)\s*(\d+)", s) |
|
if m: |
|
return int(m.group(1)) |
|
|
|
|
|
m2 = re.search(r"\b(\d{1,3})\b", s) |
|
if m2: |
|
n = int(m2.group(1)) |
|
if 1 <= n <= 200: |
|
return n |
|
|
|
return None |
|
|
|
|
|
def _parse_rendered_digest(rendered: str) -> Dict[int, str]: |
|
""" |
|
Parse the same rendered digest string you send on WhatsApp and build a map: |
|
{ number -> headline_line_text } |
|
""" |
|
mapping: Dict[int, str] = {} |
|
for line in (rendered or "").splitlines(): |
|
m = _HEADLINE_LINE_RE.match(line) |
|
if not m: |
|
continue |
|
num = int(m.group(1)) |
|
headline_txt = m.group(2).strip() |
|
mapping[num] = headline_txt |
|
return mapping |
|
|
|
|
|
def _retrieve_context_for_headline(headline_text: str, top_k: int = 15) -> str: |
|
""" |
|
Use the vector index to pull contextual passages related to the headline. |
|
- Uses a higher top_k to widen coverage (quality over speed). |
|
- Gracefully degrades if index is unavailable or not yet built. |
|
""" |
|
|
|
try: |
|
from components.indexers.news_indexer import load_news_index |
|
except Exception as e: |
|
logging.warning(f"Index module not available yet: {e}") |
|
return "" |
|
|
|
|
|
try: |
|
index = load_news_index() |
|
try: |
|
|
|
qe = index.as_query_engine(similarity_top_k=top_k) |
|
except Exception: |
|
|
|
from llama_index.core.query_engine import RetrievalQueryEngine |
|
qe = RetrievalQueryEngine(index=index, similarity_top_k=top_k) |
|
|
|
query = ( |
|
"Retrieve concise, factual context that best explains this headline:\n" |
|
f"{headline_text}\n" |
|
"Focus on who/what/when/where/why, include crucial numbers, avoid speculation." |
|
) |
|
resp = qe.query(query) |
|
return str(resp) |
|
except Exception as e: |
|
|
|
persist_dir = os.getenv("NEWS_INDEX_PERSIST_DIR") or os.getenv("PERSIST_DIR") or "<unset>" |
|
logging.warning(f"Vector retrieval skipped (no index at {persist_dir}): {e}") |
|
return "" |
|
|
|
|
|
def _eli5_answer_structured(question: str, context: str, headline_only: Optional[str] = None) -> str: |
|
""" |
|
Generate a structured, quality-guarded ELI5 answer. |
|
Format: |
|
Headline #N — <short title> |
|
Key points: |
|
• ... |
|
• ... |
|
Numbers & facts: |
|
• ... |
|
Why it matters: |
|
• ... |
|
Caveats: |
|
• ... |
|
Confidence: High/Medium/Low |
|
|
|
Rules: |
|
- 120–180 words total. |
|
- Use ONLY the provided context/headline; if missing, write “Not in context”. |
|
- No speculation; keep neutral tone; be brief. |
|
""" |
|
sys_prompt = ( |
|
"You are a rigorous, concise explainer for a news assistant. " |
|
"Produce clear, structured outputs with bullet points. " |
|
"If any detail is not present in context, write 'Not in context'. " |
|
"Avoid flowery language; be factual and neutral." |
|
) |
|
|
|
if context.strip(): |
|
user_prompt = ( |
|
f"QUESTION:\n{question}\n\n" |
|
f"CONTEXT (may be partial, use ONLY this):\n{context}\n\n" |
|
"Write 120–180 words in this exact structure:\n" |
|
"Headline:\n" |
|
"Key points:\n" |
|
"• ...\n• ...\n• ...\n" |
|
"Numbers & facts:\n" |
|
"• ...\n• ...\n" |
|
"Why it matters:\n" |
|
"• ...\n" |
|
"Caveats:\n" |
|
"• ...\n" |
|
"Confidence: High | Medium | Low\n" |
|
"Rules:\n" |
|
"- If you can't find a detail in CONTEXT, write 'Not in context'.\n" |
|
"- Do NOT add sources or links unless they appear in CONTEXT.\n" |
|
"- Keep it short, precise, and neutral.\n" |
|
) |
|
else: |
|
|
|
headline_text = headline_only or question |
|
user_prompt = ( |
|
"CONTEXT is empty. You must base the answer ONLY on the HEADLINE below; " |
|
"write 'Not in context' for any missing specifics.\n\n" |
|
f"HEADLINE:\n{headline_text}\n\n" |
|
"Write 90–140 words in this exact structure:\n" |
|
"Headline:\n" |
|
"Key points:\n" |
|
"• ...\n• ...\n" |
|
"Numbers & facts:\n" |
|
"• Not in context\n" |
|
"Why it matters:\n" |
|
"• ...\n" |
|
"Caveats:\n" |
|
"• Limited details available\n" |
|
"Confidence: Low\n" |
|
) |
|
|
|
try: |
|
llm = MistralTogetherClient() |
|
msgs = build_messages(user_prompt, sys_prompt) |
|
out, _usage = llm.chat(msgs, temperature=0.2, max_tokens=400) |
|
return out.strip() |
|
except Exception as e: |
|
logging.exception(f"Mistral structured ELI5 generation failed: {e}") |
|
return ( |
|
"Headline:\n" |
|
"Key points:\n" |
|
"• I couldn’t generate an explanation right now.\n" |
|
"Numbers & facts:\n" |
|
"• Not in context\n" |
|
"Why it matters:\n" |
|
"• Not in context\n" |
|
"Caveats:\n" |
|
"• System error\n" |
|
"Confidence: Low" |
|
) |
|
|
|
|
|
def handle_chat_question(from_number: str, message_text: str) -> JSONResponse: |
|
""" |
|
Smart handler: |
|
- If the user references a headline number (“explain 14 like I’m 5”), |
|
1) Parse the number |
|
2) Look up that numbered line from the rendered digest |
|
3) Retrieve vector context (top_k widened for coverage) |
|
4) Generate a STRUCTURED ELI5 answer (with quality guardrails) |
|
- Otherwise, provide a gentle hint (for now). |
|
""" |
|
logging.info(f"Chat question from {from_number}: {message_text}") |
|
|
|
|
|
number = _extract_number_ref(message_text or "") |
|
if number is not None: |
|
|
|
rendered = fetch_cached_headlines() |
|
mapping = _parse_rendered_digest(rendered) |
|
target_line = mapping.get(number) |
|
|
|
if not target_line: |
|
_safe_send( |
|
f"I couldn’t find headline *{number}* in today’s digest. " |
|
"Try another number or say *headlines* to see today’s list.", |
|
to=from_number, |
|
) |
|
return JSONResponse(status_code=200, content={"status": "success", "message": "Number not found"}) |
|
|
|
|
|
ctx = _retrieve_context_for_headline(target_line, top_k=15) |
|
|
|
|
|
question = f"Explain headline #{number}: {target_line}" |
|
answer = _eli5_answer_structured(question, ctx, headline_only=target_line) |
|
|
|
|
|
_safe_send(answer, to=from_number) |
|
return JSONResponse(status_code=200, content={"status": "success", "message": "ELI5 sent"}) |
|
|
|
|
|
_safe_send( |
|
"Ask me about a specific headline by number, e.g., *explain 7 like I’m 5*.\n" |
|
"Or type *headlines* for today’s digest.", |
|
to=from_number, |
|
) |
|
return JSONResponse(status_code=200, content={"status": "success", "message": "Generic reply"}) |
|
|