Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
"""
|
2 |
app.py
|
3 |
-
Step
|
4 |
|
5 |
-
|
6 |
-
-
|
7 |
-
-
|
|
|
8 |
"""
|
9 |
|
10 |
-
import os
|
11 |
from pathlib import Path
|
12 |
from typing import List
|
13 |
|
@@ -24,13 +24,12 @@ from rag_search import (
|
|
24 |
from evo_inference import synthesize_with_evo
|
25 |
from utils_lang import SUPPORTED, normalize_lang, L
|
26 |
|
27 |
-
# Global (Objective) lazy searcher instance
|
28 |
_searcher = None
|
29 |
DATA_SEED_DIR = Path("data/seed")
|
|
|
30 |
|
31 |
|
32 |
def ensure_searcher():
|
33 |
-
"""(Objective) Create/return a global RAGSearcher bound to the current index."""
|
34 |
global _searcher
|
35 |
if _searcher is None:
|
36 |
_searcher = RAGSearcher()
|
@@ -38,7 +37,6 @@ def ensure_searcher():
|
|
38 |
|
39 |
|
40 |
def on_build_index():
|
41 |
-
"""(Objective) Build FAISS index, reset searcher so it reloads next query."""
|
42 |
try:
|
43 |
msg = build_index()
|
44 |
global _searcher
|
@@ -48,11 +46,10 @@ def on_build_index():
|
|
48 |
return msg
|
49 |
|
50 |
|
51 |
-
def
|
52 |
"""
|
53 |
-
(Objective) Save uploaded
|
54 |
-
|
55 |
-
Returns a list of saved filenames.
|
56 |
"""
|
57 |
saved = []
|
58 |
DATA_SEED_DIR.mkdir(parents=True, exist_ok=True)
|
@@ -61,47 +58,37 @@ def save_uploaded_txt(files: List[gr.File]) -> List[str]:
|
|
61 |
|
62 |
for f in files:
|
63 |
try:
|
64 |
-
#
|
65 |
-
path =
|
66 |
-
# Some gradio versions pass dicts with 'name'
|
67 |
if isinstance(f, dict) and "name" in f:
|
68 |
-
path = f["name"]
|
69 |
-
|
|
|
|
|
|
|
|
|
70 |
continue
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
dest
|
77 |
-
|
78 |
-
saved.append(src.name)
|
79 |
except Exception:
|
80 |
-
# Skip problematic files silently for robustness
|
81 |
continue
|
82 |
return saved
|
83 |
|
84 |
|
85 |
def on_upload_and_reindex(files) -> str:
|
86 |
-
|
87 |
-
(Objective) Save uploaded .txt files and rebuild the index immediately.
|
88 |
-
"""
|
89 |
-
saved = save_uploaded_txt(files or [])
|
90 |
if not saved:
|
91 |
-
return "No valid .txt uploaded."
|
92 |
status = on_build_index()
|
93 |
return f"Uploaded: {', '.join(saved)}\n{status}"
|
94 |
|
95 |
|
96 |
def on_ask(question: str, top_k: int, lang_code: str, history: list):
|
97 |
-
"""
|
98 |
-
(Objective) Handle user question end-to-end:
|
99 |
-
- Ensure index exists
|
100 |
-
- Retrieve top-k hits
|
101 |
-
- Synthesize grounded answer (templated Evo)
|
102 |
-
- Extract and render links & forms
|
103 |
-
- Append to chat history
|
104 |
-
"""
|
105 |
lang = normalize_lang(lang_code)
|
106 |
if not question or len(question.strip()) < 3:
|
107 |
return history, L(lang, "intro_err"), f"{L(lang, 'sources')}: (none)", "", ""
|
@@ -111,12 +98,10 @@ def on_ask(question: str, top_k: int, lang_code: str, history: list):
|
|
111 |
hits = searcher.search(question, k=int(top_k))
|
112 |
answer = synthesize_with_evo(question, lang, hits)
|
113 |
|
114 |
-
# Sources (localized header)
|
115 |
srcs = format_sources(hits)
|
116 |
if srcs.startswith("Sources:"):
|
117 |
srcs = srcs.replace("Sources:", f"{L(lang, 'sources')}:")
|
118 |
|
119 |
-
# Links & Forms
|
120 |
all_links = extract_links(hits)
|
121 |
form_links, other_links = split_form_links(all_links)
|
122 |
links_md = links_markdown(L(lang, "links"), other_links)
|
@@ -133,21 +118,24 @@ def on_ask(question: str, top_k: int, lang_code: str, history: list):
|
|
133 |
|
134 |
|
135 |
with gr.Blocks(title=L("en", "title")) as demo:
|
136 |
-
gr.Markdown(f"# 🇲🇺 **{L('en','title')}** — Step
|
137 |
|
138 |
-
# --- Admin controls
|
139 |
with gr.Row():
|
140 |
build_btn = gr.Button(L("en", "build_btn"))
|
141 |
status = gr.Markdown(L("en", "status_idle"))
|
142 |
build_btn.click(fn=on_build_index, outputs=status)
|
143 |
|
144 |
-
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
146 |
save_btn = gr.Button("Save & Reindex")
|
147 |
upload_status = gr.Markdown()
|
148 |
save_btn.click(fn=on_upload_and_reindex, inputs=upload, outputs=upload_status)
|
149 |
|
150 |
-
# --- User Q&A
|
151 |
gr.Markdown("### Language / Langue / Langaz")
|
152 |
lang = gr.Dropdown(choices=list(SUPPORTED.keys()), value="en", label="EN / FR / MFE")
|
153 |
|
|
|
1 |
"""
|
2 |
app.py
|
3 |
+
Step 7: Allow uploading .txt + .pdf + .html files from the Admin panel (then auto-reindex).
|
4 |
|
5 |
+
(Objective)
|
6 |
+
- Accept multiple files (txt/pdf/html) in the Admin section.
|
7 |
+
- Save them into data/seed/ (overwrites by filename).
|
8 |
+
- Rebuild index and clear the cached searcher.
|
9 |
"""
|
10 |
|
|
|
11 |
from pathlib import Path
|
12 |
from typing import List
|
13 |
|
|
|
24 |
from evo_inference import synthesize_with_evo
|
25 |
from utils_lang import SUPPORTED, normalize_lang, L
|
26 |
|
|
|
27 |
_searcher = None
|
28 |
DATA_SEED_DIR = Path("data/seed")
|
29 |
+
ALLOWED_EXTS = {".txt", ".pdf", ".html", ".htm", ".xhtml"} # (Objective)
|
30 |
|
31 |
|
32 |
def ensure_searcher():
|
|
|
33 |
global _searcher
|
34 |
if _searcher is None:
|
35 |
_searcher = RAGSearcher()
|
|
|
37 |
|
38 |
|
39 |
def on_build_index():
|
|
|
40 |
try:
|
41 |
msg = build_index()
|
42 |
global _searcher
|
|
|
46 |
return msg
|
47 |
|
48 |
|
49 |
+
def _save_files(files: List[gr.File]) -> List[str]:
|
50 |
"""
|
51 |
+
(Objective) Save uploaded files with allowed extensions into data/seed/.
|
52 |
+
Supports different Gradio file object shapes.
|
|
|
53 |
"""
|
54 |
saved = []
|
55 |
DATA_SEED_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
58 |
|
59 |
for f in files:
|
60 |
try:
|
61 |
+
# Resolve a path from the gradio file object
|
62 |
+
path = None
|
|
|
63 |
if isinstance(f, dict) and "name" in f:
|
64 |
+
path = Path(f["name"])
|
65 |
+
elif hasattr(f, "name"):
|
66 |
+
path = Path(getattr(f, "name"))
|
67 |
+
elif isinstance(f, str):
|
68 |
+
path = Path(f)
|
69 |
+
if not path or not path.exists():
|
70 |
continue
|
71 |
+
|
72 |
+
if path.suffix.lower() not in ALLOWED_EXTS:
|
73 |
+
continue
|
74 |
+
|
75 |
+
dest = DATA_SEED_DIR / path.name
|
76 |
+
dest.write_bytes(path.read_bytes())
|
77 |
+
saved.append(path.name)
|
|
|
78 |
except Exception:
|
|
|
79 |
continue
|
80 |
return saved
|
81 |
|
82 |
|
83 |
def on_upload_and_reindex(files) -> str:
|
84 |
+
saved = _save_files(files or [])
|
|
|
|
|
|
|
85 |
if not saved:
|
86 |
+
return "No valid .txt/.pdf/.html uploaded."
|
87 |
status = on_build_index()
|
88 |
return f"Uploaded: {', '.join(saved)}\n{status}"
|
89 |
|
90 |
|
91 |
def on_ask(question: str, top_k: int, lang_code: str, history: list):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
lang = normalize_lang(lang_code)
|
93 |
if not question or len(question.strip()) < 3:
|
94 |
return history, L(lang, "intro_err"), f"{L(lang, 'sources')}: (none)", "", ""
|
|
|
98 |
hits = searcher.search(question, k=int(top_k))
|
99 |
answer = synthesize_with_evo(question, lang, hits)
|
100 |
|
|
|
101 |
srcs = format_sources(hits)
|
102 |
if srcs.startswith("Sources:"):
|
103 |
srcs = srcs.replace("Sources:", f"{L(lang, 'sources')}:")
|
104 |
|
|
|
105 |
all_links = extract_links(hits)
|
106 |
form_links, other_links = split_form_links(all_links)
|
107 |
links_md = links_markdown(L(lang, "links"), other_links)
|
|
|
118 |
|
119 |
|
120 |
with gr.Blocks(title=L("en", "title")) as demo:
|
121 |
+
gr.Markdown(f"# 🇲🇺 **{L('en','title')}** — Step 7")
|
122 |
|
|
|
123 |
with gr.Row():
|
124 |
build_btn = gr.Button(L("en", "build_btn"))
|
125 |
status = gr.Markdown(L("en", "status_idle"))
|
126 |
build_btn.click(fn=on_build_index, outputs=status)
|
127 |
|
128 |
+
# Admin: upload txt/pdf/html, then reindex
|
129 |
+
with gr.Accordion("Admin: upload TXT / PDF / HTML and auto-reindex", open=False):
|
130 |
+
upload = gr.File(
|
131 |
+
file_count="multiple",
|
132 |
+
file_types=["file"], # allow any file; we'll filter by extension
|
133 |
+
label="Upload .txt / .pdf / .html"
|
134 |
+
)
|
135 |
save_btn = gr.Button("Save & Reindex")
|
136 |
upload_status = gr.Markdown()
|
137 |
save_btn.click(fn=on_upload_and_reindex, inputs=upload, outputs=upload_status)
|
138 |
|
|
|
139 |
gr.Markdown("### Language / Langue / Langaz")
|
140 |
lang = gr.Dropdown(choices=list(SUPPORTED.keys()), value="en", label="EN / FR / MFE")
|
141 |
|