evo-gov-copilot-mu / evo_inference.py
HemanM's picture
Update evo_inference.py
af358ab verified
raw
history blame
6.66 kB
"""
evo_inference.py — FLAN-optimized + anti-echo
- FLAN-friendly prompt with explicit bullet structure
- Filters placeholder chunks
- Cleans prompt-echo lines
- Anti-echo guard: if the model repeats the question or outputs too little, we fall back to Extractive
- Labeled outputs: [Generative] / [Extractive]
"""
from typing import List, Dict
import re
from utils_lang import L, normalize_lang
# Try to load your real Evo plugin first; else use the example; else None.
_GENERATOR = None
try:
from evo_plugin import load_model as _load_real
_GENERATOR = _load_real()
except Exception:
try:
from evo_plugin_example import load_model as _load_example
_GENERATOR = _load_example()
except Exception:
_GENERATOR = None
MAX_SNIPPET_CHARS = 400
def _snippet(text: str) -> str:
text = " ".join(text.split())
return text[:MAX_SNIPPET_CHARS] + ("..." if len(text) > MAX_SNIPPET_CHARS else "")
def _extractive_answer(user_query: str, lang: str, hits: List[Dict]) -> str:
if not hits:
return "**[Extractive]**\n\n" + L(lang, "intro_err")
bullets = [f"- {_snippet(h['text'])}" for h in hits[:4]]
steps = {
"en": [
"• Step 1: Check eligibility & gather required documents.",
"• Step 2: Confirm fees & payment options.",
"• Step 3: Apply online or at the indicated office.",
"• Step 4: Keep reference/receipt; track processing time.",
],
"fr": [
"• Étape 1 : Vérifiez l’éligibilité et rassemblez les documents requis.",
"• Étape 2 : Confirmez les frais et les moyens de paiement.",
"• Étape 3 : Déposez la demande en ligne ou au bureau indiqué.",
"• Étape 4 : Conservez le reçu/la référence et suivez le délai de traitement.",
],
"mfe": [
"• Step 1: Get dokiman neseser ek verifie si to elegib.",
"• Step 2: Konfirm fre ek manyer peyman.",
"• Step 3: Fer demand online ouswa dan biro ki indike.",
"• Step 4: Gard referans/reso; swiv letan tretman.",
],
}[normalize_lang(lang)]
return (
"**[Extractive]**\n\n"
f"**{L(lang, 'intro_ok')}**\n\n"
f"**Q:** {user_query}\n\n"
f"**Key information:**\n" + "\n".join(bullets) + "\n\n"
f"**Suggested steps:**\n" + "\n".join(steps)
)
def _lang_name(code: str) -> str:
return {"en": "English", "fr": "French", "mfe": "Kreol Morisien"}.get(code, "English")
def _filter_hits(hits: List[Dict], keep: int = 6) -> List[Dict]:
# Prefer non-placeholder chunks; if all are placeholders, return originals.
filtered = [h for h in hits if "placeholder" not in h["text"].lower() and "disclaimer" not in h["text"].lower()]
if not filtered:
filtered = hits
return filtered[:keep]
def _build_grounded_prompt(question: str, lang: str, hits: List[Dict]) -> str:
"""
FLAN-style prompt:
Instruction: (clear constraints)
Context: 1) ... 2) ...
Question: ...
Answer: - bullet - bullet ...
"""
lang = normalize_lang(lang)
lang_readable = _lang_name(lang)
if lang == "fr":
instruction = (
"Tu es le Copilote Gouvernemental de Maurice. Réponds UNIQUEMENT à partir du contexte. "
"Ne répète pas la question. Donne 6–10 puces courtes couvrant: Documents requis, Frais, "
"Où postuler, Délai de traitement, Étapes. Si une info manque, dis-le. Pas d'autres sections."
)
elif lang == "mfe":
instruction = (
"To enn Copilot Gouv Moris. Reponn zis lor konteks. Pa repete kestyon. Donn 6–10 pwin kout "
"lor: Dokiman, Fre, Kot pou al, Letan tretman, Steps. Si info manke, dir li. Pa azout seksion anplis."
)
else:
instruction = (
"You are the Mauritius Government Copilot. Use ONLY the context. Do not repeat the question. "
"Write 6–10 short bullet points covering: Required documents, Fees, Where to apply, "
"Processing time, and Steps. If something is missing, say so. No extra sections."
)
chosen = _filter_hits(hits, keep=6)
ctx_lines = [f"{i+1}) {_snippet(h['text'])}" for i, h in enumerate(chosen)]
ctx_block = "\n".join(ctx_lines) if ctx_lines else "(none)"
# Prime with a leading dash to encourage bullets.
prompt = (
f"Instruction ({lang_readable}): {instruction}\n\n"
f"Context:\n{ctx_block}\n\n"
f"Question: {question}\n\n"
f"Answer ({lang_readable}):\n- "
)
return prompt
_ECHO_PATTERNS = [
r"^\s*Instruction.*$", r"^\s*Context:.*$", r"^\s*Question:.*$", r"^\s*Answer.*$",
r"^\s*\[Instructions?\].*$", r"^\s*Be concise.*$", r"^\s*Do not invent.*$",
r"^\s*(en|fr|mfe)\s*$",
]
def _clean_generated(text: str) -> str:
# Remove common echoed lines from the model output.
lines = [ln.strip() for ln in text.strip().splitlines()]
out = []
for ln in lines:
if any(re.match(pat, ln, flags=re.IGNORECASE) for pat in _ECHO_PATTERNS):
continue
out.append(ln)
cleaned = "\n".join(out).strip()
cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
return cleaned
def _is_echo_or_too_short(ans: str, question: str) -> bool:
# Normalize and check if answer is basically the question or too short.
a = re.sub(r"\W+", " ", (ans or "").lower()).strip()
q = re.sub(r"\W+", " ", (question or "").lower()).strip()
if len(a) < 40:
return True
if q and (a.startswith(q) or q in a[: max(80, len(q) + 10)]):
return True
return False
def synthesize_with_evo(
user_query: str,
lang: str,
hits: List[Dict],
mode: str = "extractive",
max_new_tokens: int = 192,
temperature: float = 0.4,
) -> str:
# No context → safe fallback
lang = normalize_lang(lang)
if not hits:
return _extractive_answer(user_query, lang, hits)
# Extractive path or no generator available
if mode != "generative" or _GENERATOR is None:
return _extractive_answer(user_query, lang, hits)
prompt = _build_grounded_prompt(user_query, lang, hits)
try:
text = _GENERATOR.generate(
prompt,
max_new_tokens=int(max_new_tokens),
temperature=float(temperature),
)
text = _clean_generated(text)
if _is_echo_or_too_short(text, user_query):
return _extractive_answer(user_query, lang, hits)
return "**[Generative]**\n\n" + text
except Exception:
return _extractive_answer(user_query, lang, hits)