Spaces:
Sleeping
Sleeping
File size: 5,155 Bytes
07f8c0c 57774c2 8d7e557 57774c2 8d7e557 07f8c0c ced2a62 1849681 ced2a62 e55c124 ced2a62 57774c2 ced2a62 8d7e557 e55c124 1849681 57774c2 1849681 e55c124 ced2a62 1849681 07f8c0c 57774c2 8d7e557 ced2a62 8d7e557 ced2a62 8d7e557 ced2a62 8d7e557 ced2a62 8d7e557 ced2a62 8d7e557 ced2a62 8d7e557 ced2a62 57774c2 e55c124 ced2a62 e55c124 57774c2 ced2a62 57774c2 ced2a62 e55c124 57774c2 ced2a62 e55c124 ced2a62 e55c124 57774c2 8d7e557 07f8c0c 1849681 57774c2 1849681 8d7e557 ced2a62 57774c2 e55c124 57774c2 e55c124 ced2a62 e55c124 07f8c0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
"""
app.py
Step 7: Allow uploading .txt + .pdf + .html files from the Admin panel (then auto-reindex).
(Objective)
- Accept multiple files (txt/pdf/html) in the Admin section.
- Save them into data/seed/ (overwrites by filename).
- Rebuild index and clear the cached searcher.
"""
from pathlib import Path
from typing import List
import gradio as gr
from indexer import build_index
from rag_search import (
RAGSearcher,
format_sources,
extract_links,
split_form_links,
links_markdown,
)
from evo_inference import synthesize_with_evo
from utils_lang import SUPPORTED, normalize_lang, L
_searcher = None
DATA_SEED_DIR = Path("data/seed")
ALLOWED_EXTS = {".txt", ".pdf", ".html", ".htm", ".xhtml"} # (Objective)
def ensure_searcher():
global _searcher
if _searcher is None:
_searcher = RAGSearcher()
return _searcher
def on_build_index():
try:
msg = build_index()
global _searcher
_searcher = None
except Exception as e:
msg = f"Error while building index: {e}"
return msg
def _save_files(files: List[gr.File]) -> List[str]:
"""
(Objective) Save uploaded files with allowed extensions into data/seed/.
Supports different Gradio file object shapes.
"""
saved = []
DATA_SEED_DIR.mkdir(parents=True, exist_ok=True)
if not files:
return saved
for f in files:
try:
# Resolve a path from the gradio file object
path = None
if isinstance(f, dict) and "name" in f:
path = Path(f["name"])
elif hasattr(f, "name"):
path = Path(getattr(f, "name"))
elif isinstance(f, str):
path = Path(f)
if not path or not path.exists():
continue
if path.suffix.lower() not in ALLOWED_EXTS:
continue
dest = DATA_SEED_DIR / path.name
dest.write_bytes(path.read_bytes())
saved.append(path.name)
except Exception:
continue
return saved
def on_upload_and_reindex(files) -> str:
saved = _save_files(files or [])
if not saved:
return "No valid .txt/.pdf/.html uploaded."
status = on_build_index()
return f"Uploaded: {', '.join(saved)}\n{status}"
def on_ask(question: str, top_k: int, lang_code: str, history: list):
lang = normalize_lang(lang_code)
if not question or len(question.strip()) < 3:
return history, L(lang, "intro_err"), f"{L(lang, 'sources')}: (none)", "", ""
try:
searcher = ensure_searcher()
hits = searcher.search(question, k=int(top_k))
answer = synthesize_with_evo(question, lang, hits)
srcs = format_sources(hits)
if srcs.startswith("Sources:"):
srcs = srcs.replace("Sources:", f"{L(lang, 'sources')}:")
all_links = extract_links(hits)
form_links, other_links = split_form_links(all_links)
links_md = links_markdown(L(lang, "links"), other_links)
forms_md = links_markdown(L(lang, "forms"), form_links)
except Exception as e:
answer = f"Error: {e}\n\n{L(lang, 'tip_build')}"
srcs = f"{L(lang, 'sources')}: (none)"
links_md = f"**{L(lang, 'links')}:** (none)"
forms_md = f"**{L(lang, 'forms')}:** (none)"
history = history + [(question, answer)]
return history, answer, srcs, links_md, forms_md
with gr.Blocks(title=L("en", "title")) as demo:
gr.Markdown(f"# 🇲🇺 **{L('en','title')}** — Step 7")
with gr.Row():
build_btn = gr.Button(L("en", "build_btn"))
status = gr.Markdown(L("en", "status_idle"))
build_btn.click(fn=on_build_index, outputs=status)
# Admin: upload txt/pdf/html, then reindex
with gr.Accordion("Admin: upload TXT / PDF / HTML and auto-reindex", open=False):
upload = gr.File(
file_count="multiple",
file_types=["file"], # allow any file; we'll filter by extension
label="Upload .txt / .pdf / .html"
)
save_btn = gr.Button("Save & Reindex")
upload_status = gr.Markdown()
save_btn.click(fn=on_upload_and_reindex, inputs=upload, outputs=upload_status)
gr.Markdown("### Language / Langue / Langaz")
lang = gr.Dropdown(choices=list(SUPPORTED.keys()), value="en", label="EN / FR / MFE")
gr.Markdown("### Ask a question / Posez une question / Poz enn kestyon")
with gr.Row():
q = gr.Textbox(placeholder=L("en", "ask_placeholder"), label=L("en", "ask_label"))
k = gr.Slider(3, 12, step=1, value=6, label=L("en", "k_label"))
ask_btn = gr.Button("Ask / Demander / Rode")
chat = gr.Chatbot(label="Assistant", height=360)
answer_md = gr.Markdown()
sources_md = gr.Markdown()
links_md = gr.Markdown()
forms_md = gr.Markdown()
ask_btn.click(
fn=on_ask,
inputs=[q, k, lang, chat],
outputs=[chat, answer_md, sources_md, links_md, forms_md],
)
q.submit(
fn=on_ask,
inputs=[q, k, lang, chat],
outputs=[chat, answer_md, sources_md, links_md, forms_md],
)
demo.launch()
|