""" app.py — Step 9 Adds: - Feedback logger (Helpful / Not helpful) with optional note → data/feedback.csv - Per-session ID so you can trace usage (no cookies needed) - Auto-build index on first question (from Step 8 refined) Everything else (RAG + Extractive/Generative toggle) stays the same. """ from pathlib import Path from typing import List import secrets import time import gradio as gr from indexer import build_index from rag_search import ( RAGSearcher, format_sources, extract_links, split_form_links, links_markdown, ) from evo_inference import synthesize_with_evo from utils_lang import SUPPORTED, normalize_lang, L from feedback import log_feedback # NEW _searcher = None DATA_SEED_DIR = Path("data/seed") ALLOWED_EXTS = {".txt", ".pdf", ".html", ".htm", ".xhtml"} def ensure_searcher(): """Create (or return) a global searcher bound to the current index.""" global _searcher if _searcher is None: _searcher = RAGSearcher() # may raise RuntimeError if index missing return _searcher def on_build_index(): """Build the FAISS index and reset the cached searcher.""" try: msg = build_index() global _searcher _searcher = None # force reload next time except Exception as e: msg = f"Error while building index: {e}" return msg def _save_files(files: List[gr.File]) -> List[str]: """Save uploaded files (txt/pdf/html) to data/seed/.""" saved = [] DATA_SEED_DIR.mkdir(parents=True, exist_ok=True) if not files: return saved for f in files: try: path = None if isinstance(f, dict) and "name" in f: path = Path(f["name"]) elif hasattr(f, "name"): path = Path(getattr(f, "name")) elif isinstance(f, str): path = Path(f) if not path or not path.exists(): continue if path.suffix.lower() not in ALLOWED_EXTS: continue (DATA_SEED_DIR / path.name).write_bytes(path.read_bytes()) saved.append(path.name) except Exception: continue return saved def on_upload_and_reindex(files): """Save uploaded files then rebuild the index.""" saved = _save_files(files or []) if not saved: return "No valid .txt/.pdf/.html uploaded." status = on_build_index() return f"Uploaded: {', '.join(saved)}\n{status}" def on_ask( question: str, top_k: int, lang_code: str, mode_label: str, temp: float, max_tokens: int, history: list, session_id: str, # NEW: State input ): """ Handle a user question end-to-end: - Ensure index exists (auto-build on first use) - Retrieve top-k chunks - Synthesize answer (Extractive or Generative) - Show sources + extracted links/forms - Append to chat history - Return a structured "last answer" payload for feedback logging """ lang = normalize_lang(lang_code) if not question or len(question.strip()) < 3: last_payload = {} return history, L(lang, "intro_err"), f"{L(lang, 'sources')}: (none)", "", "", last_payload try: # Ensure searcher; if index missing, build once automatically then retry try: searcher = ensure_searcher() except RuntimeError: on_build_index() searcher = ensure_searcher() hits = searcher.search(question, k=int(top_k)) # Map UI label to internal mode flag mode = "generative" if mode_label.startswith("Generative") else "extractive" answer = synthesize_with_evo( question, lang, hits, mode=mode, max_new_tokens=int(max_tokens), temperature=float(temp), ) # Localized "Sources" header srcs = format_sources(hits) if srcs.startswith("Sources:"): srcs = srcs.replace("Sources:", f"{L(lang, 'sources')}:") # Extract URLs and form links all_links = extract_links(hits) form_links, other_links = split_form_links(all_links) links_md = links_markdown(L(lang, "links"), other_links) forms_md = links_markdown(L(lang, "forms"), form_links) # Build "last answer" payload for feedback last_payload = { "ts_question": int(time.time()), "session": session_id or "", "lang": lang, "mode": mode, "temperature": float(temp), "max_tokens": int(max_tokens), "top_k": int(top_k), "question": question, "answer": answer, "sources": srcs, "links": links_md, "forms": forms_md, } except Exception as e: answer = f"Error: {e}\n\n{L(lang, 'tip_build')}" srcs = f"{L(lang, 'sources')}: (none)" links_md = f"**{L(lang, 'links')}:** (none)" forms_md = f"**{L(lang, 'forms')}:** (none)" last_payload = {} history = history + [(question, answer)] return history, answer, srcs, links_md, forms_md, last_payload def on_feedback_submit(kind: str, note: str, last_payload: dict) -> str: """ Log feedback to CSV. 'kind' is 'helpful' or 'not_helpful'. """ if not last_payload: return "Ask a question first, then rate the answer." entry = dict(last_payload) # copy entry["feedback"] = kind entry["note"] = note or "" return log_feedback(entry) def _new_session_id(): """Generate a short session id on app load.""" return secrets.token_hex(8) with gr.Blocks(title=L("en", "title")) as demo: gr.Markdown(f"# 🇲🇺 **{L('en','title')}** — Step 9") # --- Hidden state: session id + last answer payload session = gr.State() last_answer = gr.State() # Generate a session id when the app loads demo.load(fn=_new_session_id, inputs=None, outputs=session) # --- Admin: build index / upload & reindex with gr.Row(): build_btn = gr.Button(L("en", "build_btn")) status = gr.Markdown(L("en", "status_idle")) build_btn.click(fn=on_build_index, outputs=status) with gr.Accordion("Admin: upload TXT / PDF / HTML and auto-reindex", open=False): upload = gr.File( file_count="multiple", file_types=["file"], label="Upload .txt / .pdf / .html", ) save_btn = gr.Button("Save & Reindex") upload_status = gr.Markdown() save_btn.click(fn=on_upload_and_reindex, inputs=upload, outputs=upload_status) # --- User controls gr.Markdown("### Language / Langue / Langaz") lang = gr.Dropdown(choices=list(SUPPORTED.keys()), value="en", label="EN / FR / MFE") gr.Markdown("### Answer mode & controls") with gr.Row(): mode = gr.Radio( choices=["Extractive (safe)", "Generative (Evo)"], value="Extractive (safe)", label="Mode", ) temp = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature (gen)") max_tokens = gr.Slider(64, 384, value=192, step=16, label="Max new tokens (gen)") # --- Q&A gr.Markdown("### Ask a question / Posez une question / Poz enn kestyon") with gr.Row(): q = gr.Textbox(placeholder=L("en", "ask_placeholder"), label=L("en", "ask_label")) k = gr.Slider(3, 12, step=1, value=6, label=L("en", "k_label")) ask_btn = gr.Button("Ask / Demander / Rode") chat = gr.Chatbot(label="Assistant", height=360) answer_md = gr.Markdown() sources_md = gr.Markdown() links_md = gr.Markdown() forms_md = gr.Markdown() ask_btn.click( fn=on_ask, inputs=[q, k, lang, mode, temp, max_tokens, chat, session], outputs=[chat, answer_md, sources_md, links_md, forms_md, last_answer], ) q.submit( fn=on_ask, inputs=[q, k, lang, mode, temp, max_tokens, chat, session], outputs=[chat, answer_md, sources_md, links_md, forms_md, last_answer], ) # --- Feedback UI gr.Markdown("### Rate this answer") with gr.Row(): note = gr.Textbox(placeholder="Optional: tell us what was good or missing", label="Your note", lines=2) with gr.Row(): helpful_btn = gr.Button("👍 Helpful") not_helpful_btn = gr.Button("👎 Not helpful") fb_status = gr.Markdown() helpful_btn.click( fn=lambda n, last: on_feedback_submit("helpful", n, last), inputs=[note, last_answer], outputs=[fb_status], ) not_helpful_btn.click( fn=lambda n, last: on_feedback_submit("not_helpful", n, last), inputs=[note, last_answer], outputs=[fb_status], ) demo.launch()