File size: 12,277 Bytes
8d4ff93
 
1d4183c
5908e3c
 
 
8d4ff93
5908e3c
8d4ff93
5908e3c
 
 
 
 
8d4ff93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5908e3c
 
 
 
8d4ff93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5908e3c
 
 
 
8d4ff93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5908e3c
1d4183c
5908e3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d4183c
5908e3c
 
1d4183c
 
5908e3c
1d4183c
 
 
 
 
 
 
 
5908e3c
 
 
1d4183c
5908e3c
 
1d4183c
 
5908e3c
 
 
 
 
 
 
 
 
 
1d4183c
 
 
5908e3c
 
 
1d4183c
5908e3c
1d4183c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5908e3c
 
1d4183c
 
 
 
5908e3c
 
1d4183c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5908e3c
 
 
1d4183c
5908e3c
 
1d4183c
 
 
 
 
 
 
 
 
 
 
 
 
5908e3c
 
8d4ff93
5908e3c
 
 
 
 
1d4183c
 
5908e3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d4183c
 
5908e3c
1d4183c
5908e3c
1d4183c
5908e3c
 
 
 
 
 
8d4ff93
5908e3c
 
8d4ff93
 
5908e3c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
# handlers/whatsapp_handlers.py
import logging
import os
import re
from typing import Optional, Dict

from fastapi.responses import JSONResponse

from components.gateways.headlines_to_wa import fetch_cached_headlines, send_to_whatsapp
from components.LLMs.Mistral import MistralTogetherClient, build_messages

# ------------------------------------------------------------
# Utilities
# ------------------------------------------------------------

def _safe_send(text: str, to: str) -> dict:
    """Wrap send_to_whatsapp with logging & safe error handling."""
    try:
        res = send_to_whatsapp(text, destination_number=to)
        if res.get("status") == "success":
            logging.info(f"Sent message to {to}")
        else:
            logging.error(f"Failed to send message to {to}: {res}")
        return res
    except Exception as e:
        logging.exception(f"Exception while sending WhatsApp message to {to}: {e}")
        return {"status": "error", "error": str(e)}


# ------------------------------------------------------------
# Headlines
# ------------------------------------------------------------

def handle_headlines(from_number: str) -> JSONResponse:
    full_message_text = fetch_cached_headlines()

    if full_message_text.startswith("❌") or full_message_text.startswith("⚠️"):
        logging.error(f"Failed to fetch digest for {from_number}: {full_message_text}")
        _safe_send(f"Sorry, I couldn't fetch the news digest today. {full_message_text}", to=from_number)
        return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to fetch digest"})

    result = _safe_send(full_message_text, to=from_number)
    if result.get("status") == "success":
        return JSONResponse(status_code=200, content={"status": "success", "message": "Digest sent"})
    else:
        _safe_send(
            f"Sorry, I couldn't send the news digest to you. Error: {result.get('error', 'unknown')}",
            to=from_number,
        )
        return JSONResponse(status_code=500, content={"status": "error", "message": "Failed to send digest"})


# ------------------------------------------------------------
# Preferences / Greeting / Help / Unsubscribe / Small Talk
# ------------------------------------------------------------

def handle_preferences(from_number: str) -> JSONResponse:
    msg = (
        "Let’s tune your feed. Reply with topics you like:\n"
        "• world • india • finance • sports • entertainment\n\n"
        "You can send multiple, e.g.: india, finance"
    )
    _safe_send(msg, to=from_number)
    return JSONResponse(status_code=200, content={"status": "success", "message": "Preferences prompt sent"})


def handle_greeting(from_number: str) -> JSONResponse:
    msg = (
        "Hey! 👋 I’m NuseAI.\n"
        "• Type *headlines* to get today’s digest.\n"
        "• Type *preferences* to set your topics.\n"
        "• Type *help* to see what I can do."
    )
    _safe_send(msg, to=from_number)
    return JSONResponse(status_code=200, content={"status": "success", "message": "Greeting sent"})


def handle_help(from_number: str) -> JSONResponse:
    msg = (
        "Here’s how I can help:\n"
        "• *Headlines* — today’s 🗞️ Daily Digest 🟡\n"
        "• *Preferences* — choose topics/regions\n"
        "• *Unsubscribe* — stop messages\n"
        "Ask me a question anytime (e.g., “What’s India’s CPI outlook?”)."
    )
    _safe_send(msg, to=from_number)
    return JSONResponse(status_code=200, content={"status": "success", "message": "Help sent"})


def handle_unsubscribe(from_number: str) -> JSONResponse:
    _safe_send("You’re unsubscribed. If you change your mind, just say *hi*.", to=from_number)
    return JSONResponse(status_code=200, content={"status": "success", "message": "Unsubscribed"})


def handle_small_talk(from_number: str) -> JSONResponse:
    _safe_send("🙂 Got it. If you’d like the news, just say *headlines*.", to=from_number)
    return JSONResponse(status_code=200, content={"status": "success", "message": "Small talk"})


# ------------------------------------------------------------
# Chat Question → “Explain by number” flow (structured + quality-guarded)
# ------------------------------------------------------------

_HEADLINE_LINE_RE = re.compile(r"^\s*(\d+)\.\s+(.*)$")

def _extract_number_ref(text: str) -> Optional[int]:
    """
    Find a referenced headline number in free text, e.g.:
    'explain number 14', 'no. 7 please', '#9', '14', 'explain 14 like I am 5'
    Returns int or None.
    """
    s = (text or "").lower()

    # explicit forms
    m = re.search(r"(?:number|no\.?|num|#)\s*(\d+)", s)
    if m:
        return int(m.group(1))

    # a bare number (avoid picking up years like 2025; cap at 1..200)
    m2 = re.search(r"\b(\d{1,3})\b", s)
    if m2:
        n = int(m2.group(1))
        if 1 <= n <= 200:
            return n

    return None


def _parse_rendered_digest(rendered: str) -> Dict[int, str]:
    """
    Parse the same rendered digest string you send on WhatsApp and build a map:
    { number -> headline_line_text }
    """
    mapping: Dict[int, str] = {}
    for line in (rendered or "").splitlines():
        m = _HEADLINE_LINE_RE.match(line)
        if not m:
            continue
        num = int(m.group(1))
        headline_txt = m.group(2).strip()
        mapping[num] = headline_txt
    return mapping


def _retrieve_context_for_headline(headline_text: str, top_k: int = 15) -> str:
    """
    Use the vector index to pull contextual passages related to the headline.
    - Uses a higher top_k to widen coverage (quality over speed).
    - Gracefully degrades if index is unavailable or not yet built.
    """
    # Defer the import so a missing/invalid index module won't break imports
    try:
        from components.indexers.news_indexer import load_news_index  # type: ignore
    except Exception as e:
        logging.warning(f"Index module not available yet: {e}")
        return ""

    # Try to load the index; if persist_dir is wrong/missing, swallow and return ""
    try:
        index = load_news_index()
        try:
            # LlamaIndex v0.10+
            qe = index.as_query_engine(similarity_top_k=top_k)
        except Exception:
            # Older API fallback
            from llama_index.core.query_engine import RetrievalQueryEngine  # type: ignore
            qe = RetrievalQueryEngine(index=index, similarity_top_k=top_k)

        query = (
            "Retrieve concise, factual context that best explains this headline:\n"
            f"{headline_text}\n"
            "Focus on who/what/when/where/why, include crucial numbers, avoid speculation."
        )
        resp = qe.query(query)
        return str(resp)
    except Exception as e:
        # Avoid noisy tracebacks in normal operation; index may simply not exist yet
        persist_dir = os.getenv("NEWS_INDEX_PERSIST_DIR") or os.getenv("PERSIST_DIR") or "<unset>"
        logging.warning(f"Vector retrieval skipped (no index at {persist_dir}): {e}")
        return ""


def _eli5_answer_structured(question: str, context: str, headline_only: Optional[str] = None) -> str:
    """
    Generate a structured, quality-guarded ELI5 answer.
    Format:
    Headline #N — <short title>
    Key points:
    • ...
    • ...
    Numbers & facts:
    • ...
    Why it matters:
    • ...
    Caveats:
    • ...
    Confidence: High/Medium/Low

    Rules:
    - 120–180 words total.
    - Use ONLY the provided context/headline; if missing, write “Not in context”.
    - No speculation; keep neutral tone; be brief.
    """
    sys_prompt = (
        "You are a rigorous, concise explainer for a news assistant. "
        "Produce clear, structured outputs with bullet points. "
        "If any detail is not present in context, write 'Not in context'. "
        "Avoid flowery language; be factual and neutral."
    )

    if context.strip():
        user_prompt = (
            f"QUESTION:\n{question}\n\n"
            f"CONTEXT (may be partial, use ONLY this):\n{context}\n\n"
            "Write 120–180 words in this exact structure:\n"
            "Headline:\n"
            "Key points:\n"
            "• ...\n• ...\n• ...\n"
            "Numbers & facts:\n"
            "• ...\n• ...\n"
            "Why it matters:\n"
            "• ...\n"
            "Caveats:\n"
            "• ...\n"
            "Confidence: High | Medium | Low\n"
            "Rules:\n"
            "- If you can't find a detail in CONTEXT, write 'Not in context'.\n"
            "- Do NOT add sources or links unless they appear in CONTEXT.\n"
            "- Keep it short, precise, and neutral.\n"
        )
    else:
        # fallback: rely on the headline only
        headline_text = headline_only or question
        user_prompt = (
            "CONTEXT is empty. You must base the answer ONLY on the HEADLINE below; "
            "write 'Not in context' for any missing specifics.\n\n"
            f"HEADLINE:\n{headline_text}\n\n"
            "Write 90–140 words in this exact structure:\n"
            "Headline:\n"
            "Key points:\n"
            "• ...\n• ...\n"
            "Numbers & facts:\n"
            "• Not in context\n"
            "Why it matters:\n"
            "• ...\n"
            "Caveats:\n"
            "• Limited details available\n"
            "Confidence: Low\n"
        )

    try:
        llm = MistralTogetherClient()
        msgs = build_messages(user_prompt, sys_prompt)
        out, _usage = llm.chat(msgs, temperature=0.2, max_tokens=400)
        return out.strip()
    except Exception as e:
        logging.exception(f"Mistral structured ELI5 generation failed: {e}")
        return (
            "Headline:\n"
            "Key points:\n"
            "• I couldn’t generate an explanation right now.\n"
            "Numbers & facts:\n"
            "• Not in context\n"
            "Why it matters:\n"
            "• Not in context\n"
            "Caveats:\n"
            "• System error\n"
            "Confidence: Low"
        )


def handle_chat_question(from_number: str, message_text: str) -> JSONResponse:
    """
    Smart handler:
      - If the user references a headline number (“explain 14 like I’m 5”),
        1) Parse the number
        2) Look up that numbered line from the rendered digest
        3) Retrieve vector context (top_k widened for coverage)
        4) Generate a STRUCTURED ELI5 answer (with quality guardrails)
      - Otherwise, provide a gentle hint (for now).
    """
    logging.info(f"Chat question from {from_number}: {message_text}")

    # 1) Try to find a headline number reference
    number = _extract_number_ref(message_text or "")
    if number is not None:
        # 2) Load rendered digest and map numbers to lines
        rendered = fetch_cached_headlines()
        mapping = _parse_rendered_digest(rendered)
        target_line = mapping.get(number)

        if not target_line:
            _safe_send(
                f"I couldn’t find headline *{number}* in today’s digest. "
                "Try another number or say *headlines* to see today’s list.",
                to=from_number,
            )
            return JSONResponse(status_code=200, content={"status": "success", "message": "Number not found"})

        # 3) Retrieve broader context from the vector index using the headline line
        ctx = _retrieve_context_for_headline(target_line, top_k=15)

        # 4) Generate STRUCTURED ELI5 answer (works even if ctx == "")
        question = f"Explain headline #{number}: {target_line}"
        answer = _eli5_answer_structured(question, ctx, headline_only=target_line)

        # 5) Send back
        _safe_send(answer, to=from_number)
        return JSONResponse(status_code=200, content={"status": "success", "message": "ELI5 sent"})

    # No number found → for now, guide the user
    _safe_send(
        "Ask me about a specific headline by number, e.g., *explain 7 like I’m 5*.\n"
        "Or type *headlines* for today’s digest.",
        to=from_number,
    )
    return JSONResponse(status_code=200, content={"status": "success", "message": "Generic reply"})