Spaces:
Sleeping
Sleeping
""" | |
app.py | |
Step 7: Allow uploading .txt + .pdf + .html files from the Admin panel (then auto-reindex). | |
(Objective) | |
- Accept multiple files (txt/pdf/html) in the Admin section. | |
- Save them into data/seed/ (overwrites by filename). | |
- Rebuild index and clear the cached searcher. | |
""" | |
from pathlib import Path | |
from typing import List | |
import gradio as gr | |
from indexer import build_index | |
from rag_search import ( | |
RAGSearcher, | |
format_sources, | |
extract_links, | |
split_form_links, | |
links_markdown, | |
) | |
from evo_inference import synthesize_with_evo | |
from utils_lang import SUPPORTED, normalize_lang, L | |
_searcher = None | |
DATA_SEED_DIR = Path("data/seed") | |
ALLOWED_EXTS = {".txt", ".pdf", ".html", ".htm", ".xhtml"} # (Objective) | |
def ensure_searcher(): | |
global _searcher | |
if _searcher is None: | |
_searcher = RAGSearcher() | |
return _searcher | |
def on_build_index(): | |
try: | |
msg = build_index() | |
global _searcher | |
_searcher = None | |
except Exception as e: | |
msg = f"Error while building index: {e}" | |
return msg | |
def _save_files(files: List[gr.File]) -> List[str]: | |
""" | |
(Objective) Save uploaded files with allowed extensions into data/seed/. | |
Supports different Gradio file object shapes. | |
""" | |
saved = [] | |
DATA_SEED_DIR.mkdir(parents=True, exist_ok=True) | |
if not files: | |
return saved | |
for f in files: | |
try: | |
# Resolve a path from the gradio file object | |
path = None | |
if isinstance(f, dict) and "name" in f: | |
path = Path(f["name"]) | |
elif hasattr(f, "name"): | |
path = Path(getattr(f, "name")) | |
elif isinstance(f, str): | |
path = Path(f) | |
if not path or not path.exists(): | |
continue | |
if path.suffix.lower() not in ALLOWED_EXTS: | |
continue | |
dest = DATA_SEED_DIR / path.name | |
dest.write_bytes(path.read_bytes()) | |
saved.append(path.name) | |
except Exception: | |
continue | |
return saved | |
def on_upload_and_reindex(files) -> str: | |
saved = _save_files(files or []) | |
if not saved: | |
return "No valid .txt/.pdf/.html uploaded." | |
status = on_build_index() | |
return f"Uploaded: {', '.join(saved)}\n{status}" | |
def on_ask(question: str, top_k: int, lang_code: str, history: list): | |
lang = normalize_lang(lang_code) | |
if not question or len(question.strip()) < 3: | |
return history, L(lang, "intro_err"), f"{L(lang, 'sources')}: (none)", "", "" | |
try: | |
searcher = ensure_searcher() | |
hits = searcher.search(question, k=int(top_k)) | |
answer = synthesize_with_evo(question, lang, hits) | |
srcs = format_sources(hits) | |
if srcs.startswith("Sources:"): | |
srcs = srcs.replace("Sources:", f"{L(lang, 'sources')}:") | |
all_links = extract_links(hits) | |
form_links, other_links = split_form_links(all_links) | |
links_md = links_markdown(L(lang, "links"), other_links) | |
forms_md = links_markdown(L(lang, "forms"), form_links) | |
except Exception as e: | |
answer = f"Error: {e}\n\n{L(lang, 'tip_build')}" | |
srcs = f"{L(lang, 'sources')}: (none)" | |
links_md = f"**{L(lang, 'links')}:** (none)" | |
forms_md = f"**{L(lang, 'forms')}:** (none)" | |
history = history + [(question, answer)] | |
return history, answer, srcs, links_md, forms_md | |
with gr.Blocks(title=L("en", "title")) as demo: | |
gr.Markdown(f"# π²πΊ **{L('en','title')}** β Step 7") | |
with gr.Row(): | |
build_btn = gr.Button(L("en", "build_btn")) | |
status = gr.Markdown(L("en", "status_idle")) | |
build_btn.click(fn=on_build_index, outputs=status) | |
# Admin: upload txt/pdf/html, then reindex | |
with gr.Accordion("Admin: upload TXT / PDF / HTML and auto-reindex", open=False): | |
upload = gr.File( | |
file_count="multiple", | |
file_types=["file"], # allow any file; we'll filter by extension | |
label="Upload .txt / .pdf / .html" | |
) | |
save_btn = gr.Button("Save & Reindex") | |
upload_status = gr.Markdown() | |
save_btn.click(fn=on_upload_and_reindex, inputs=upload, outputs=upload_status) | |
gr.Markdown("### Language / Langue / Langaz") | |
lang = gr.Dropdown(choices=list(SUPPORTED.keys()), value="en", label="EN / FR / MFE") | |
gr.Markdown("### Ask a question / Posez une question / Poz enn kestyon") | |
with gr.Row(): | |
q = gr.Textbox(placeholder=L("en", "ask_placeholder"), label=L("en", "ask_label")) | |
k = gr.Slider(3, 12, step=1, value=6, label=L("en", "k_label")) | |
ask_btn = gr.Button("Ask / Demander / Rode") | |
chat = gr.Chatbot(label="Assistant", height=360) | |
answer_md = gr.Markdown() | |
sources_md = gr.Markdown() | |
links_md = gr.Markdown() | |
forms_md = gr.Markdown() | |
ask_btn.click( | |
fn=on_ask, | |
inputs=[q, k, lang, chat], | |
outputs=[chat, answer_md, sources_md, links_md, forms_md], | |
) | |
q.submit( | |
fn=on_ask, | |
inputs=[q, k, lang, chat], | |
outputs=[chat, answer_md, sources_md, links_md, forms_md], | |
) | |
demo.launch() | |