File size: 3,130 Bytes
2a79ea8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""
evo_inference.py
Step 5: Evo synthesis hook.

(Objective)
- Define `synthesize_with_evo(user_query, lang, hits)` that returns a clean,
  step-by-step style answer grounded in the retrieved chunks.
- For now, we DO NOT use a neural generator; we synthesize from the hits to
  avoid hallucinations. Later, you can plug your Evo model here.

How to integrate your real Evo model later (Objective):
1) Load your Evo weights once at module import time.
2) Build a prompt with the top retrieved chunks.
3) Generate a response (max_new_tokens ~ 200–300).
4) Always include the key fields users expect (docs required, fees, where to apply, timing).
5) Return the generated text.
"""

from typing import List, Dict
from utils_lang import L, normalize_lang

MAX_SNIPPET_CHARS = 400  # (Objective) keep answer concise


def _bulletize(snippet: str) -> str:
    """
    (Objective) Clean a text snippet for bullet display.
    """
    snippet = " ".join(snippet.split())
    if len(snippet) > MAX_SNIPPET_CHARS:
        snippet = snippet[:MAX_SNIPPET_CHARS] + "..."
    return f"- {snippet}"


def synthesize_with_evo(user_query: str, lang: str, hits: List[Dict]) -> str:
    """
    (Objective)
    Build a grounded answer in the user's language from the retrieved hits.
    This is extractive + templated; swap with your Evo generator later.

    Inputs:
      user_query: the user's question (string)
      lang: 'en' | 'fr' | 'mfe'
      hits: list of dicts with keys: 'text', 'meta', 'score'

    Output:
      A markdown string to show in the UI.
    """
    lang = normalize_lang(lang)

    if not hits:
        return L(lang, "intro_err")

    # Take the top ~4 chunks and present them as actionable bullets.
    bullets = [_bulletize(h["text"]) for h in hits[:4]]
    bullets_md = "\n".join(bullets)

    # Tiny language-specific headings (Objective)
    headings = {
        "en": [
            "• Step 1: Check eligibility & gather required documents.",
            "• Step 2: Confirm fees & payment options.",
            "• Step 3: Apply online or at the indicated office.",
            "• Step 4: Keep reference/receipt; track processing time.",
        ],
        "fr": [
            "• Étape 1 : Vérifiez l’éligibilité et rassemblez les documents requis.",
            "• Étape 2 : Confirmez les frais et les moyens de paiement.",
            "• Étape 3 : Déposez la demande en ligne ou au bureau indiqué.",
            "• Étape 4 : Conservez le reçu/la référence et suivez le délai de traitement.",
        ],
        "mfe": [
            "• Step 1: Get dokiman neseser ek verifie si to elegib.",
            "• Step 2: Konfirm fre ek manyer peyman.",
            "• Step 3: Fer demand online ouswa dan biro ki indike.",
            "• Step 4: Gard referans/reso; swiv letan tretman.",
        ],
    }[lang]

    # Compose final answer (Objective)
    intro = L(lang, "intro_ok")
    md = (
        f"**{intro}**\n\n"
        f"**Q:** {user_query}\n\n"
        f"**Key information:**\n{bullets_md}\n\n"
        f"**Suggested steps:**\n" + "\n".join(headings)
    )
    return md