Spaces:
Sleeping
Sleeping
Update evo_inference.py
Browse files- evo_inference.py +38 -54
evo_inference.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
"""
|
2 |
-
evo_inference.py —
|
3 |
-
-
|
4 |
- Filters placeholder chunks
|
5 |
-
- Cleans
|
6 |
-
-
|
|
|
7 |
"""
|
8 |
|
9 |
from typing import List, Dict
|
@@ -13,28 +14,24 @@ from utils_lang import L, normalize_lang
|
|
13 |
# Try to load your real Evo plugin first; else use the example; else None.
|
14 |
_GENERATOR = None
|
15 |
try:
|
16 |
-
from evo_plugin import load_model as _load_real
|
17 |
_GENERATOR = _load_real()
|
18 |
except Exception:
|
19 |
try:
|
20 |
from evo_plugin_example import load_model as _load_example
|
21 |
_GENERATOR = _load_example()
|
22 |
except Exception:
|
23 |
-
_GENERATOR = None
|
24 |
|
25 |
MAX_SNIPPET_CHARS = 400
|
26 |
|
27 |
-
|
28 |
def _snippet(text: str) -> str:
|
29 |
text = " ".join(text.split())
|
30 |
return text[:MAX_SNIPPET_CHARS] + ("..." if len(text) > MAX_SNIPPET_CHARS else "")
|
31 |
|
32 |
-
|
33 |
def _extractive_answer(user_query: str, lang: str, hits: List[Dict]) -> str:
|
34 |
-
"""Old safe mode: show top snippets + standard steps, now labeled."""
|
35 |
if not hits:
|
36 |
return "**[Extractive]**\n\n" + L(lang, "intro_err")
|
37 |
-
|
38 |
bullets = [f"- {_snippet(h['text'])}" for h in hits[:4]]
|
39 |
steps = {
|
40 |
"en": [
|
@@ -56,7 +53,6 @@ def _extractive_answer(user_query: str, lang: str, hits: List[Dict]) -> str:
|
|
56 |
"• Step 4: Gard referans/reso; swiv letan tretman.",
|
57 |
],
|
58 |
}[normalize_lang(lang)]
|
59 |
-
|
60 |
return (
|
61 |
"**[Extractive]**\n\n"
|
62 |
f"**{L(lang, 'intro_ok')}**\n\n"
|
@@ -65,71 +61,58 @@ def _extractive_answer(user_query: str, lang: str, hits: List[Dict]) -> str:
|
|
65 |
f"**Suggested steps:**\n" + "\n".join(steps)
|
66 |
)
|
67 |
|
68 |
-
|
69 |
def _lang_name(code: str) -> str:
|
70 |
return {"en": "English", "fr": "French", "mfe": "Kreol Morisien"}.get(code, "English")
|
71 |
|
72 |
-
|
73 |
def _filter_hits(hits: List[Dict], keep: int = 6) -> List[Dict]:
|
74 |
-
|
75 |
-
|
76 |
-
"""
|
77 |
-
filtered = [
|
78 |
-
h for h in hits
|
79 |
-
if "placeholder" not in h["text"].lower() and "disclaimer" not in h["text"].lower()
|
80 |
-
]
|
81 |
if not filtered:
|
82 |
filtered = hits
|
83 |
return filtered[:keep]
|
84 |
|
85 |
-
|
86 |
def _build_grounded_prompt(question: str, lang: str, hits: List[Dict]) -> str:
|
87 |
"""
|
88 |
FLAN-style prompt:
|
89 |
-
Instruction:
|
90 |
-
Context:
|
91 |
-
1) ...
|
92 |
-
2) ...
|
93 |
Question: ...
|
94 |
-
Answer:
|
95 |
"""
|
96 |
lang = normalize_lang(lang)
|
97 |
lang_readable = _lang_name(lang)
|
98 |
|
99 |
-
instruction = (
|
100 |
-
"You are the Mauritius Government Copilot. Answer ONLY using the provided context. "
|
101 |
-
"If a detail is missing (fees, required docs, office or processing time), say so clearly. "
|
102 |
-
"Structure the answer as short bullet points with: Required documents, Fees, Where to apply, "
|
103 |
-
"Processing time, and Steps. Keep it concise (6–10 lines)."
|
104 |
-
)
|
105 |
if lang == "fr":
|
106 |
instruction = (
|
107 |
-
"Tu es le Copilote Gouvernemental de Maurice. Réponds UNIQUEMENT à partir du contexte
|
108 |
-
"
|
109 |
-
"
|
110 |
-
"Reste concis (6–10 lignes)."
|
111 |
)
|
112 |
elif lang == "mfe":
|
113 |
instruction = (
|
114 |
-
"To enn Copilot Gouv Moris. Reponn zis lor konteks
|
115 |
-
"
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
118 |
)
|
119 |
|
120 |
chosen = _filter_hits(hits, keep=6)
|
121 |
ctx_lines = [f"{i+1}) {_snippet(h['text'])}" for i, h in enumerate(chosen)]
|
122 |
ctx_block = "\n".join(ctx_lines) if ctx_lines else "(none)"
|
123 |
|
|
|
124 |
prompt = (
|
125 |
f"Instruction ({lang_readable}): {instruction}\n\n"
|
126 |
f"Context:\n{ctx_block}\n\n"
|
127 |
f"Question: {question}\n\n"
|
128 |
-
f"Answer ({lang_readable})
|
129 |
)
|
130 |
return prompt
|
131 |
|
132 |
-
|
133 |
_ECHO_PATTERNS = [
|
134 |
r"^\s*Instruction.*$", r"^\s*Context:.*$", r"^\s*Question:.*$", r"^\s*Answer.*$",
|
135 |
r"^\s*\[Instructions?\].*$", r"^\s*Be concise.*$", r"^\s*Do not invent.*$",
|
@@ -137,9 +120,7 @@ _ECHO_PATTERNS = [
|
|
137 |
]
|
138 |
|
139 |
def _clean_generated(text: str) -> str:
|
140 |
-
|
141 |
-
Remove common echoed lines from the model output.
|
142 |
-
"""
|
143 |
lines = [ln.strip() for ln in text.strip().splitlines()]
|
144 |
out = []
|
145 |
for ln in lines:
|
@@ -147,10 +128,18 @@ def _clean_generated(text: str) -> str:
|
|
147 |
continue
|
148 |
out.append(ln)
|
149 |
cleaned = "\n".join(out).strip()
|
150 |
-
# extra guard: collapse repeated blank lines
|
151 |
cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
|
152 |
return cleaned
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
def synthesize_with_evo(
|
156 |
user_query: str,
|
@@ -160,16 +149,12 @@ def synthesize_with_evo(
|
|
160 |
max_new_tokens: int = 192,
|
161 |
temperature: float = 0.4,
|
162 |
) -> str:
|
163 |
-
|
164 |
-
If mode=='generative' and a generator exists, generate a grounded answer
|
165 |
-
(labeled [Generative]). Otherwise, return the labeled extractive fallback.
|
166 |
-
"""
|
167 |
lang = normalize_lang(lang)
|
168 |
-
|
169 |
-
# No retrieved context? Stay safe.
|
170 |
if not hits:
|
171 |
return _extractive_answer(user_query, lang, hits)
|
172 |
|
|
|
173 |
if mode != "generative" or _GENERATOR is None:
|
174 |
return _extractive_answer(user_query, lang, hits)
|
175 |
|
@@ -181,8 +166,7 @@ def synthesize_with_evo(
|
|
181 |
temperature=float(temperature),
|
182 |
)
|
183 |
text = _clean_generated(text)
|
184 |
-
|
185 |
-
if not text or len(text) < 20:
|
186 |
return _extractive_answer(user_query, lang, hits)
|
187 |
return "**[Generative]**\n\n" + text
|
188 |
except Exception:
|
|
|
1 |
"""
|
2 |
+
evo_inference.py — FLAN-optimized + anti-echo
|
3 |
+
- FLAN-friendly prompt with explicit bullet structure
|
4 |
- Filters placeholder chunks
|
5 |
+
- Cleans prompt-echo lines
|
6 |
+
- Anti-echo guard: if the model repeats the question or outputs too little, we fall back to Extractive
|
7 |
+
- Labeled outputs: [Generative] / [Extractive]
|
8 |
"""
|
9 |
|
10 |
from typing import List, Dict
|
|
|
14 |
# Try to load your real Evo plugin first; else use the example; else None.
|
15 |
_GENERATOR = None
|
16 |
try:
|
17 |
+
from evo_plugin import load_model as _load_real
|
18 |
_GENERATOR = _load_real()
|
19 |
except Exception:
|
20 |
try:
|
21 |
from evo_plugin_example import load_model as _load_example
|
22 |
_GENERATOR = _load_example()
|
23 |
except Exception:
|
24 |
+
_GENERATOR = None
|
25 |
|
26 |
MAX_SNIPPET_CHARS = 400
|
27 |
|
|
|
28 |
def _snippet(text: str) -> str:
|
29 |
text = " ".join(text.split())
|
30 |
return text[:MAX_SNIPPET_CHARS] + ("..." if len(text) > MAX_SNIPPET_CHARS else "")
|
31 |
|
|
|
32 |
def _extractive_answer(user_query: str, lang: str, hits: List[Dict]) -> str:
|
|
|
33 |
if not hits:
|
34 |
return "**[Extractive]**\n\n" + L(lang, "intro_err")
|
|
|
35 |
bullets = [f"- {_snippet(h['text'])}" for h in hits[:4]]
|
36 |
steps = {
|
37 |
"en": [
|
|
|
53 |
"• Step 4: Gard referans/reso; swiv letan tretman.",
|
54 |
],
|
55 |
}[normalize_lang(lang)]
|
|
|
56 |
return (
|
57 |
"**[Extractive]**\n\n"
|
58 |
f"**{L(lang, 'intro_ok')}**\n\n"
|
|
|
61 |
f"**Suggested steps:**\n" + "\n".join(steps)
|
62 |
)
|
63 |
|
|
|
64 |
def _lang_name(code: str) -> str:
|
65 |
return {"en": "English", "fr": "French", "mfe": "Kreol Morisien"}.get(code, "English")
|
66 |
|
|
|
67 |
def _filter_hits(hits: List[Dict], keep: int = 6) -> List[Dict]:
|
68 |
+
# Prefer non-placeholder chunks; if all are placeholders, return originals.
|
69 |
+
filtered = [h for h in hits if "placeholder" not in h["text"].lower() and "disclaimer" not in h["text"].lower()]
|
|
|
|
|
|
|
|
|
|
|
70 |
if not filtered:
|
71 |
filtered = hits
|
72 |
return filtered[:keep]
|
73 |
|
|
|
74 |
def _build_grounded_prompt(question: str, lang: str, hits: List[Dict]) -> str:
|
75 |
"""
|
76 |
FLAN-style prompt:
|
77 |
+
Instruction: (clear constraints)
|
78 |
+
Context: 1) ... 2) ...
|
|
|
|
|
79 |
Question: ...
|
80 |
+
Answer: - bullet - bullet ...
|
81 |
"""
|
82 |
lang = normalize_lang(lang)
|
83 |
lang_readable = _lang_name(lang)
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
if lang == "fr":
|
86 |
instruction = (
|
87 |
+
"Tu es le Copilote Gouvernemental de Maurice. Réponds UNIQUEMENT à partir du contexte. "
|
88 |
+
"Ne répète pas la question. Donne 6–10 puces courtes couvrant: Documents requis, Frais, "
|
89 |
+
"Où postuler, Délai de traitement, Étapes. Si une info manque, dis-le. Pas d'autres sections."
|
|
|
90 |
)
|
91 |
elif lang == "mfe":
|
92 |
instruction = (
|
93 |
+
"To enn Copilot Gouv Moris. Reponn zis lor konteks. Pa repete kestyon. Donn 6–10 pwin kout "
|
94 |
+
"lor: Dokiman, Fre, Kot pou al, Letan tretman, Steps. Si info manke, dir li. Pa azout seksion anplis."
|
95 |
+
)
|
96 |
+
else:
|
97 |
+
instruction = (
|
98 |
+
"You are the Mauritius Government Copilot. Use ONLY the context. Do not repeat the question. "
|
99 |
+
"Write 6–10 short bullet points covering: Required documents, Fees, Where to apply, "
|
100 |
+
"Processing time, and Steps. If something is missing, say so. No extra sections."
|
101 |
)
|
102 |
|
103 |
chosen = _filter_hits(hits, keep=6)
|
104 |
ctx_lines = [f"{i+1}) {_snippet(h['text'])}" for i, h in enumerate(chosen)]
|
105 |
ctx_block = "\n".join(ctx_lines) if ctx_lines else "(none)"
|
106 |
|
107 |
+
# Prime with a leading dash to encourage bullets.
|
108 |
prompt = (
|
109 |
f"Instruction ({lang_readable}): {instruction}\n\n"
|
110 |
f"Context:\n{ctx_block}\n\n"
|
111 |
f"Question: {question}\n\n"
|
112 |
+
f"Answer ({lang_readable}):\n- "
|
113 |
)
|
114 |
return prompt
|
115 |
|
|
|
116 |
_ECHO_PATTERNS = [
|
117 |
r"^\s*Instruction.*$", r"^\s*Context:.*$", r"^\s*Question:.*$", r"^\s*Answer.*$",
|
118 |
r"^\s*\[Instructions?\].*$", r"^\s*Be concise.*$", r"^\s*Do not invent.*$",
|
|
|
120 |
]
|
121 |
|
122 |
def _clean_generated(text: str) -> str:
|
123 |
+
# Remove common echoed lines from the model output.
|
|
|
|
|
124 |
lines = [ln.strip() for ln in text.strip().splitlines()]
|
125 |
out = []
|
126 |
for ln in lines:
|
|
|
128 |
continue
|
129 |
out.append(ln)
|
130 |
cleaned = "\n".join(out).strip()
|
|
|
131 |
cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
|
132 |
return cleaned
|
133 |
|
134 |
+
def _is_echo_or_too_short(ans: str, question: str) -> bool:
|
135 |
+
# Normalize and check if answer is basically the question or too short.
|
136 |
+
a = re.sub(r"\W+", " ", (ans or "").lower()).strip()
|
137 |
+
q = re.sub(r"\W+", " ", (question or "").lower()).strip()
|
138 |
+
if len(a) < 40:
|
139 |
+
return True
|
140 |
+
if q and (a.startswith(q) or q in a[: max(80, len(q) + 10)]):
|
141 |
+
return True
|
142 |
+
return False
|
143 |
|
144 |
def synthesize_with_evo(
|
145 |
user_query: str,
|
|
|
149 |
max_new_tokens: int = 192,
|
150 |
temperature: float = 0.4,
|
151 |
) -> str:
|
152 |
+
# No context → safe fallback
|
|
|
|
|
|
|
153 |
lang = normalize_lang(lang)
|
|
|
|
|
154 |
if not hits:
|
155 |
return _extractive_answer(user_query, lang, hits)
|
156 |
|
157 |
+
# Extractive path or no generator available
|
158 |
if mode != "generative" or _GENERATOR is None:
|
159 |
return _extractive_answer(user_query, lang, hits)
|
160 |
|
|
|
166 |
temperature=float(temperature),
|
167 |
)
|
168 |
text = _clean_generated(text)
|
169 |
+
if _is_echo_or_too_short(text, user_query):
|
|
|
170 |
return _extractive_answer(user_query, lang, hits)
|
171 |
return "**[Generative]**\n\n" + text
|
172 |
except Exception:
|