"""
evo_inference.py — Step 8 (FLAN-optimized)
- Generative path uses a FLAN-friendly prompt: Instruction / Context / Question / Answer
- Filters placeholder chunks
- Cleans common prompt-echo lines
- Keeps labeled [Generative] / [Extractive] outputs with safe fallback
"""

from typing import List, Dict
import re
from utils_lang import L, normalize_lang

# Try to load your real Evo plugin first; else use the example; else None.
_GENERATOR = None
try:
    from evo_plugin import load_model as _load_real   # your future file (optional)
    _GENERATOR = _load_real()
except Exception:
    try:
        from evo_plugin_example import load_model as _load_example
        _GENERATOR = _load_example()
    except Exception:
        _GENERATOR = None  # no generator available

MAX_SNIPPET_CHARS = 400


def _snippet(text: str) -> str:
    text = " ".join(text.split())
    return text[:MAX_SNIPPET_CHARS] + ("..." if len(text) > MAX_SNIPPET_CHARS else "")


def _extractive_answer(user_query: str, lang: str, hits: List[Dict]) -> str:
    """Old safe mode: show top snippets + standard steps, now labeled."""
    if not hits:
        return "**[Extractive]**\n\n" + L(lang, "intro_err")

    bullets = [f"- {_snippet(h['text'])}" for h in hits[:4]]
    steps = {
        "en": [
            "• Step 1: Check eligibility & gather required documents.",
            "• Step 2: Confirm fees & payment options.",
            "• Step 3: Apply online or at the indicated office.",
            "• Step 4: Keep reference/receipt; track processing time.",
        ],
        "fr": [
            "• Étape 1 : Vérifiez l’éligibilité et rassemblez les documents requis.",
            "• Étape 2 : Confirmez les frais et les moyens de paiement.",
            "• Étape 3 : Déposez la demande en ligne ou au bureau indiqué.",
            "• Étape 4 : Conservez le reçu/la référence et suivez le délai de traitement.",
        ],
        "mfe": [
            "• Step 1: Get dokiman neseser ek verifie si to elegib.",
            "• Step 2: Konfirm fre ek manyer peyman.",
            "• Step 3: Fer demand online ouswa dan biro ki indike.",
            "• Step 4: Gard referans/reso; swiv letan tretman.",
        ],
    }[normalize_lang(lang)]

    return (
        "**[Extractive]**\n\n"
        f"**{L(lang, 'intro_ok')}**\n\n"
        f"**Q:** {user_query}\n\n"
        f"**Key information:**\n" + "\n".join(bullets) + "\n\n"
        f"**Suggested steps:**\n" + "\n".join(steps)
    )


def _lang_name(code: str) -> str:
    return {"en": "English", "fr": "French", "mfe": "Kreol Morisien"}.get(code, "English")


def _filter_hits(hits: List[Dict], keep: int = 6) -> List[Dict]:
    """
    Prefer non-placeholder chunks; if all are placeholders, return originals.
    """
    filtered = [
        h for h in hits
        if "placeholder" not in h["text"].lower() and "disclaimer" not in h["text"].lower()
    ]
    if not filtered:
        filtered = hits
    return filtered[:keep]


def _build_grounded_prompt(question: str, lang: str, hits: List[Dict]) -> str:
    """
    FLAN-style prompt:
    Instruction: ...
    Context:
    1) ...
    2) ...
    Question: ...
    Answer:
    """
    lang = normalize_lang(lang)
    lang_readable = _lang_name(lang)

    instruction = (
        "You are the Mauritius Government Copilot. Answer ONLY using the provided context. "
        "If a detail is missing (fees, required docs, office or processing time), say so clearly. "
        "Structure the answer as short bullet points with: Required documents, Fees, Where to apply, "
        "Processing time, and Steps. Keep it concise (6–10 lines)."
    )
    if lang == "fr":
        instruction = (
            "Tu es le Copilote Gouvernemental de Maurice. Réponds UNIQUEMENT à partir du contexte fourni. "
            "Si une information manque (frais, documents requis, bureau ou délai), dis-le clairement. "
            "Structure en puces courtes : Documents requis, Frais, Où postuler, Délai de traitement, Étapes. "
            "Reste concis (6–10 lignes)."
        )
    elif lang == "mfe":
        instruction = (
            "To enn Copilot Gouv Moris. Reponn zis lor konteks ki donn. "
            "Si enn detay manke (fre, dokiman, biro, letan tretman), dir li kler. "
            "Servi pwen kout: Dokiman, Fre, Kot pou al, Letan tretman, Steps. "
            "Reste kout (6–10 ligner)."
        )

    chosen = _filter_hits(hits, keep=6)
    ctx_lines = [f"{i+1}) {_snippet(h['text'])}" for i, h in enumerate(chosen)]
    ctx_block = "\n".join(ctx_lines) if ctx_lines else "(none)"

    prompt = (
        f"Instruction ({lang_readable}): {instruction}\n\n"
        f"Context:\n{ctx_block}\n\n"
        f"Question: {question}\n\n"
        f"Answer ({lang_readable}):"
    )
    return prompt


_ECHO_PATTERNS = [
    r"^\s*Instruction.*$", r"^\s*Context:.*$", r"^\s*Question:.*$", r"^\s*Answer.*$",
    r"^\s*\[Instructions?\].*$", r"^\s*Be concise.*$", r"^\s*Do not invent.*$",
    r"^\s*(en|fr|mfe)\s*$",
]

def _clean_generated(text: str) -> str:
    """
    Remove common echoed lines from the model output.
    """
    lines = [ln.strip() for ln in text.strip().splitlines()]
    out = []
    for ln in lines:
        if any(re.match(pat, ln, flags=re.IGNORECASE) for pat in _ECHO_PATTERNS):
            continue
        out.append(ln)
    cleaned = "\n".join(out).strip()
    # extra guard: collapse repeated blank lines
    cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
    return cleaned


def synthesize_with_evo(
    user_query: str,
    lang: str,
    hits: List[Dict],
    mode: str = "extractive",
    max_new_tokens: int = 192,
    temperature: float = 0.4,
) -> str:
    """
    If mode=='generative' and a generator exists, generate a grounded answer
    (labeled [Generative]). Otherwise, return the labeled extractive fallback.
    """
    lang = normalize_lang(lang)

    # No retrieved context? Stay safe.
    if not hits:
        return _extractive_answer(user_query, lang, hits)

    if mode != "generative" or _GENERATOR is None:
        return _extractive_answer(user_query, lang, hits)

    prompt = _build_grounded_prompt(user_query, lang, hits)
    try:
        text = _GENERATOR.generate(
            prompt,
            max_new_tokens=int(max_new_tokens),
            temperature=float(temperature),
        )
        text = _clean_generated(text)
        # Fallback if empty or suspiciously short
        if not text or len(text) < 20:
            return _extractive_answer(user_query, lang, hits)
        return "**[Generative]**\n\n" + text
    except Exception:
        return _extractive_answer(user_query, lang, hits)