HemanM commited on
Commit
2a79ea8
·
verified ·
1 Parent(s): fe75261

Create evo_inference.py

Browse files
Files changed (1) hide show
  1. evo_inference.py +88 -0
evo_inference.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ evo_inference.py
3
+ Step 5: Evo synthesis hook.
4
+
5
+ (Objective)
6
+ - Define `synthesize_with_evo(user_query, lang, hits)` that returns a clean,
7
+ step-by-step style answer grounded in the retrieved chunks.
8
+ - For now, we DO NOT use a neural generator; we synthesize from the hits to
9
+ avoid hallucinations. Later, you can plug your Evo model here.
10
+
11
+ How to integrate your real Evo model later (Objective):
12
+ 1) Load your Evo weights once at module import time.
13
+ 2) Build a prompt with the top retrieved chunks.
14
+ 3) Generate a response (max_new_tokens ~ 200–300).
15
+ 4) Always include the key fields users expect (docs required, fees, where to apply, timing).
16
+ 5) Return the generated text.
17
+ """
18
+
19
+ from typing import List, Dict
20
+ from utils_lang import L, normalize_lang
21
+
22
+ MAX_SNIPPET_CHARS = 400 # (Objective) keep answer concise
23
+
24
+
25
+ def _bulletize(snippet: str) -> str:
26
+ """
27
+ (Objective) Clean a text snippet for bullet display.
28
+ """
29
+ snippet = " ".join(snippet.split())
30
+ if len(snippet) > MAX_SNIPPET_CHARS:
31
+ snippet = snippet[:MAX_SNIPPET_CHARS] + "..."
32
+ return f"- {snippet}"
33
+
34
+
35
+ def synthesize_with_evo(user_query: str, lang: str, hits: List[Dict]) -> str:
36
+ """
37
+ (Objective)
38
+ Build a grounded answer in the user's language from the retrieved hits.
39
+ This is extractive + templated; swap with your Evo generator later.
40
+
41
+ Inputs:
42
+ user_query: the user's question (string)
43
+ lang: 'en' | 'fr' | 'mfe'
44
+ hits: list of dicts with keys: 'text', 'meta', 'score'
45
+
46
+ Output:
47
+ A markdown string to show in the UI.
48
+ """
49
+ lang = normalize_lang(lang)
50
+
51
+ if not hits:
52
+ return L(lang, "intro_err")
53
+
54
+ # Take the top ~4 chunks and present them as actionable bullets.
55
+ bullets = [_bulletize(h["text"]) for h in hits[:4]]
56
+ bullets_md = "\n".join(bullets)
57
+
58
+ # Tiny language-specific headings (Objective)
59
+ headings = {
60
+ "en": [
61
+ "• Step 1: Check eligibility & gather required documents.",
62
+ "• Step 2: Confirm fees & payment options.",
63
+ "• Step 3: Apply online or at the indicated office.",
64
+ "• Step 4: Keep reference/receipt; track processing time.",
65
+ ],
66
+ "fr": [
67
+ "• Étape 1 : Vérifiez l’éligibilité et rassemblez les documents requis.",
68
+ "• Étape 2 : Confirmez les frais et les moyens de paiement.",
69
+ "• Étape 3 : Déposez la demande en ligne ou au bureau indiqué.",
70
+ "• Étape 4 : Conservez le reçu/la référence et suivez le délai de traitement.",
71
+ ],
72
+ "mfe": [
73
+ "• Step 1: Get dokiman neseser ek verifie si to elegib.",
74
+ "• Step 2: Konfirm fre ek manyer peyman.",
75
+ "• Step 3: Fer demand online ouswa dan biro ki indike.",
76
+ "• Step 4: Gard referans/reso; swiv letan tretman.",
77
+ ],
78
+ }[lang]
79
+
80
+ # Compose final answer (Objective)
81
+ intro = L(lang, "intro_ok")
82
+ md = (
83
+ f"**{intro}**\n\n"
84
+ f"**Q:** {user_query}\n\n"
85
+ f"**Key information:**\n{bullets_md}\n\n"
86
+ f"**Suggested steps:**\n" + "\n".join(headings)
87
+ )
88
+ return md