Spaces:
Sleeping
Sleeping
""" | |
app.py β Step 9 | |
Adds: | |
- Feedback logger (Helpful / Not helpful) with optional note β data/feedback.csv | |
- Per-session ID so you can trace usage (no cookies needed) | |
- Auto-build index on first question (from Step 8 refined) | |
Everything else (RAG + Extractive/Generative toggle) stays the same. | |
""" | |
from pathlib import Path | |
from typing import List | |
import secrets | |
import time | |
import gradio as gr | |
from indexer import build_index | |
from rag_search import ( | |
RAGSearcher, | |
format_sources, | |
extract_links, | |
split_form_links, | |
links_markdown, | |
) | |
from evo_inference import synthesize_with_evo | |
from utils_lang import SUPPORTED, normalize_lang, L | |
from feedback import log_feedback # NEW | |
_searcher = None | |
DATA_SEED_DIR = Path("data/seed") | |
ALLOWED_EXTS = {".txt", ".pdf", ".html", ".htm", ".xhtml"} | |
def ensure_searcher(): | |
"""Create (or return) a global searcher bound to the current index.""" | |
global _searcher | |
if _searcher is None: | |
_searcher = RAGSearcher() # may raise RuntimeError if index missing | |
return _searcher | |
def on_build_index(): | |
"""Build the FAISS index and reset the cached searcher.""" | |
try: | |
msg = build_index() | |
global _searcher | |
_searcher = None # force reload next time | |
except Exception as e: | |
msg = f"Error while building index: {e}" | |
return msg | |
def _save_files(files: List[gr.File]) -> List[str]: | |
"""Save uploaded files (txt/pdf/html) to data/seed/.""" | |
saved = [] | |
DATA_SEED_DIR.mkdir(parents=True, exist_ok=True) | |
if not files: | |
return saved | |
for f in files: | |
try: | |
path = None | |
if isinstance(f, dict) and "name" in f: | |
path = Path(f["name"]) | |
elif hasattr(f, "name"): | |
path = Path(getattr(f, "name")) | |
elif isinstance(f, str): | |
path = Path(f) | |
if not path or not path.exists(): | |
continue | |
if path.suffix.lower() not in ALLOWED_EXTS: | |
continue | |
(DATA_SEED_DIR / path.name).write_bytes(path.read_bytes()) | |
saved.append(path.name) | |
except Exception: | |
continue | |
return saved | |
def on_upload_and_reindex(files): | |
"""Save uploaded files then rebuild the index.""" | |
saved = _save_files(files or []) | |
if not saved: | |
return "No valid .txt/.pdf/.html uploaded." | |
status = on_build_index() | |
return f"Uploaded: {', '.join(saved)}\n{status}" | |
def on_ask( | |
question: str, | |
top_k: int, | |
lang_code: str, | |
mode_label: str, | |
temp: float, | |
max_tokens: int, | |
history: list, | |
session_id: str, # NEW: State input | |
): | |
""" | |
Handle a user question end-to-end: | |
- Ensure index exists (auto-build on first use) | |
- Retrieve top-k chunks | |
- Synthesize answer (Extractive or Generative) | |
- Show sources + extracted links/forms | |
- Append to chat history | |
- Return a structured "last answer" payload for feedback logging | |
""" | |
lang = normalize_lang(lang_code) | |
if not question or len(question.strip()) < 3: | |
last_payload = {} | |
return history, L(lang, "intro_err"), f"{L(lang, 'sources')}: (none)", "", "", last_payload | |
try: | |
# Ensure searcher; if index missing, build once automatically then retry | |
try: | |
searcher = ensure_searcher() | |
except RuntimeError: | |
on_build_index() | |
searcher = ensure_searcher() | |
hits = searcher.search(question, k=int(top_k)) | |
# Map UI label to internal mode flag | |
mode = "generative" if mode_label.startswith("Generative") else "extractive" | |
answer = synthesize_with_evo( | |
question, | |
lang, | |
hits, | |
mode=mode, | |
max_new_tokens=int(max_tokens), | |
temperature=float(temp), | |
) | |
# Localized "Sources" header | |
srcs = format_sources(hits) | |
if srcs.startswith("Sources:"): | |
srcs = srcs.replace("Sources:", f"{L(lang, 'sources')}:") | |
# Extract URLs and form links | |
all_links = extract_links(hits) | |
form_links, other_links = split_form_links(all_links) | |
links_md = links_markdown(L(lang, "links"), other_links) | |
forms_md = links_markdown(L(lang, "forms"), form_links) | |
# Build "last answer" payload for feedback | |
last_payload = { | |
"ts_question": int(time.time()), | |
"session": session_id or "", | |
"lang": lang, | |
"mode": mode, | |
"temperature": float(temp), | |
"max_tokens": int(max_tokens), | |
"top_k": int(top_k), | |
"question": question, | |
"answer": answer, | |
"sources": srcs, | |
"links": links_md, | |
"forms": forms_md, | |
} | |
except Exception as e: | |
answer = f"Error: {e}\n\n{L(lang, 'tip_build')}" | |
srcs = f"{L(lang, 'sources')}: (none)" | |
links_md = f"**{L(lang, 'links')}:** (none)" | |
forms_md = f"**{L(lang, 'forms')}:** (none)" | |
last_payload = {} | |
history = history + [(question, answer)] | |
return history, answer, srcs, links_md, forms_md, last_payload | |
def on_feedback_submit(kind: str, note: str, last_payload: dict) -> str: | |
""" | |
Log feedback to CSV. 'kind' is 'helpful' or 'not_helpful'. | |
""" | |
if not last_payload: | |
return "Ask a question first, then rate the answer." | |
entry = dict(last_payload) # copy | |
entry["feedback"] = kind | |
entry["note"] = note or "" | |
return log_feedback(entry) | |
def _new_session_id(): | |
"""Generate a short session id on app load.""" | |
return secrets.token_hex(8) | |
with gr.Blocks(title=L("en", "title")) as demo: | |
gr.Markdown(f"# π²πΊ **{L('en','title')}** β Step 9") | |
# --- Hidden state: session id + last answer payload | |
session = gr.State() | |
last_answer = gr.State() | |
# Generate a session id when the app loads | |
demo.load(fn=_new_session_id, inputs=None, outputs=session) | |
# --- Admin: build index / upload & reindex | |
with gr.Row(): | |
build_btn = gr.Button(L("en", "build_btn")) | |
status = gr.Markdown(L("en", "status_idle")) | |
build_btn.click(fn=on_build_index, outputs=status) | |
with gr.Accordion("Admin: upload TXT / PDF / HTML and auto-reindex", open=False): | |
upload = gr.File( | |
file_count="multiple", | |
file_types=["file"], | |
label="Upload .txt / .pdf / .html", | |
) | |
save_btn = gr.Button("Save & Reindex") | |
upload_status = gr.Markdown() | |
save_btn.click(fn=on_upload_and_reindex, inputs=upload, outputs=upload_status) | |
# --- User controls | |
gr.Markdown("### Language / Langue / Langaz") | |
lang = gr.Dropdown(choices=list(SUPPORTED.keys()), value="en", label="EN / FR / MFE") | |
gr.Markdown("### Answer mode & controls") | |
with gr.Row(): | |
mode = gr.Radio( | |
choices=["Extractive (safe)", "Generative (Evo)"], | |
value="Extractive (safe)", | |
label="Mode", | |
) | |
temp = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature (gen)") | |
max_tokens = gr.Slider(64, 384, value=192, step=16, label="Max new tokens (gen)") | |
# --- Q&A | |
gr.Markdown("### Ask a question / Posez une question / Poz enn kestyon") | |
with gr.Row(): | |
q = gr.Textbox(placeholder=L("en", "ask_placeholder"), label=L("en", "ask_label")) | |
k = gr.Slider(3, 12, step=1, value=6, label=L("en", "k_label")) | |
ask_btn = gr.Button("Ask / Demander / Rode") | |
chat = gr.Chatbot(label="Assistant", height=360) | |
answer_md = gr.Markdown() | |
sources_md = gr.Markdown() | |
links_md = gr.Markdown() | |
forms_md = gr.Markdown() | |
ask_btn.click( | |
fn=on_ask, | |
inputs=[q, k, lang, mode, temp, max_tokens, chat, session], | |
outputs=[chat, answer_md, sources_md, links_md, forms_md, last_answer], | |
) | |
q.submit( | |
fn=on_ask, | |
inputs=[q, k, lang, mode, temp, max_tokens, chat, session], | |
outputs=[chat, answer_md, sources_md, links_md, forms_md, last_answer], | |
) | |
# --- Feedback UI | |
gr.Markdown("### Rate this answer") | |
with gr.Row(): | |
note = gr.Textbox(placeholder="Optional: tell us what was good or missing", label="Your note", lines=2) | |
with gr.Row(): | |
helpful_btn = gr.Button("π Helpful") | |
not_helpful_btn = gr.Button("π Not helpful") | |
fb_status = gr.Markdown() | |
helpful_btn.click( | |
fn=lambda n, last: on_feedback_submit("helpful", n, last), | |
inputs=[note, last_answer], | |
outputs=[fb_status], | |
) | |
not_helpful_btn.click( | |
fn=lambda n, last: on_feedback_submit("not_helpful", n, last), | |
inputs=[note, last_answer], | |
outputs=[fb_status], | |
) | |
demo.launch() | |