File size: 5,155 Bytes
07f8c0c
57774c2
8d7e557
57774c2
8d7e557
 
 
 
07f8c0c
 
ced2a62
 
 
1849681
ced2a62
e55c124
ced2a62
 
 
 
 
 
 
57774c2
 
 
ced2a62
 
8d7e557
e55c124
 
 
 
 
 
 
1849681
57774c2
1849681
 
 
e55c124
ced2a62
1849681
 
 
07f8c0c
57774c2
8d7e557
ced2a62
8d7e557
 
ced2a62
 
 
 
 
 
 
 
8d7e557
 
ced2a62
8d7e557
 
 
 
 
 
ced2a62
8d7e557
 
 
 
 
 
 
ced2a62
 
 
 
 
 
8d7e557
ced2a62
8d7e557
ced2a62
 
 
 
57774c2
 
e55c124
ced2a62
e55c124
 
 
 
57774c2
ced2a62
57774c2
 
 
ced2a62
 
 
 
 
 
e55c124
57774c2
 
ced2a62
 
e55c124
 
ced2a62
e55c124
 
57774c2
8d7e557
07f8c0c
1849681
57774c2
 
1849681
 
8d7e557
 
 
 
 
 
 
ced2a62
 
 
 
57774c2
 
 
 
e55c124
57774c2
 
 
 
e55c124
 
 
ced2a62
 
 
 
 
 
 
 
 
 
 
 
 
e55c124
07f8c0c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""
app.py
Step 7: Allow uploading .txt + .pdf + .html files from the Admin panel (then auto-reindex).

(Objective)
- Accept multiple files (txt/pdf/html) in the Admin section.
- Save them into data/seed/ (overwrites by filename).
- Rebuild index and clear the cached searcher.
"""

from pathlib import Path
from typing import List

import gradio as gr

from indexer import build_index
from rag_search import (
    RAGSearcher,
    format_sources,
    extract_links,
    split_form_links,
    links_markdown,
)
from evo_inference import synthesize_with_evo
from utils_lang import SUPPORTED, normalize_lang, L

_searcher = None
DATA_SEED_DIR = Path("data/seed")
ALLOWED_EXTS = {".txt", ".pdf", ".html", ".htm", ".xhtml"}  # (Objective)


def ensure_searcher():
    global _searcher
    if _searcher is None:
        _searcher = RAGSearcher()
    return _searcher


def on_build_index():
    try:
        msg = build_index()
        global _searcher
        _searcher = None
    except Exception as e:
        msg = f"Error while building index: {e}"
    return msg


def _save_files(files: List[gr.File]) -> List[str]:
    """
    (Objective) Save uploaded files with allowed extensions into data/seed/.
    Supports different Gradio file object shapes.
    """
    saved = []
    DATA_SEED_DIR.mkdir(parents=True, exist_ok=True)
    if not files:
        return saved

    for f in files:
        try:
            # Resolve a path from the gradio file object
            path = None
            if isinstance(f, dict) and "name" in f:
                path = Path(f["name"])
            elif hasattr(f, "name"):
                path = Path(getattr(f, "name"))
            elif isinstance(f, str):
                path = Path(f)
            if not path or not path.exists():
                continue

            if path.suffix.lower() not in ALLOWED_EXTS:
                continue

            dest = DATA_SEED_DIR / path.name
            dest.write_bytes(path.read_bytes())
            saved.append(path.name)
        except Exception:
            continue
    return saved


def on_upload_and_reindex(files) -> str:
    saved = _save_files(files or [])
    if not saved:
        return "No valid .txt/.pdf/.html uploaded."
    status = on_build_index()
    return f"Uploaded: {', '.join(saved)}\n{status}"


def on_ask(question: str, top_k: int, lang_code: str, history: list):
    lang = normalize_lang(lang_code)
    if not question or len(question.strip()) < 3:
        return history, L(lang, "intro_err"), f"{L(lang, 'sources')}: (none)", "", ""

    try:
        searcher = ensure_searcher()
        hits = searcher.search(question, k=int(top_k))
        answer = synthesize_with_evo(question, lang, hits)

        srcs = format_sources(hits)
        if srcs.startswith("Sources:"):
            srcs = srcs.replace("Sources:", f"{L(lang, 'sources')}:")

        all_links = extract_links(hits)
        form_links, other_links = split_form_links(all_links)
        links_md = links_markdown(L(lang, "links"), other_links)
        forms_md = links_markdown(L(lang, "forms"), form_links)

    except Exception as e:
        answer = f"Error: {e}\n\n{L(lang, 'tip_build')}"
        srcs = f"{L(lang, 'sources')}: (none)"
        links_md = f"**{L(lang, 'links')}:** (none)"
        forms_md = f"**{L(lang, 'forms')}:** (none)"

    history = history + [(question, answer)]
    return history, answer, srcs, links_md, forms_md


with gr.Blocks(title=L("en", "title")) as demo:
    gr.Markdown(f"# 🇲🇺 **{L('en','title')}** — Step 7")

    with gr.Row():
        build_btn = gr.Button(L("en", "build_btn"))
        status = gr.Markdown(L("en", "status_idle"))
    build_btn.click(fn=on_build_index, outputs=status)

    # Admin: upload txt/pdf/html, then reindex
    with gr.Accordion("Admin: upload TXT / PDF / HTML and auto-reindex", open=False):
        upload = gr.File(
            file_count="multiple",
            file_types=["file"],  # allow any file; we'll filter by extension
            label="Upload .txt / .pdf / .html"
        )
        save_btn = gr.Button("Save & Reindex")
        upload_status = gr.Markdown()
        save_btn.click(fn=on_upload_and_reindex, inputs=upload, outputs=upload_status)

    gr.Markdown("### Language / Langue / Langaz")
    lang = gr.Dropdown(choices=list(SUPPORTED.keys()), value="en", label="EN / FR / MFE")

    gr.Markdown("### Ask a question / Posez une question / Poz enn kestyon")
    with gr.Row():
        q = gr.Textbox(placeholder=L("en", "ask_placeholder"), label=L("en", "ask_label"))
        k = gr.Slider(3, 12, step=1, value=6, label=L("en", "k_label"))
        ask_btn = gr.Button("Ask / Demander / Rode")

    chat = gr.Chatbot(label="Assistant", height=360)
    answer_md = gr.Markdown()
    sources_md = gr.Markdown()
    links_md = gr.Markdown()
    forms_md = gr.Markdown()

    ask_btn.click(
        fn=on_ask,
        inputs=[q, k, lang, chat],
        outputs=[chat, answer_md, sources_md, links_md, forms_md],
    )
    q.submit(
        fn=on_ask,
        inputs=[q, k, lang, chat],
        outputs=[chat, answer_md, sources_md, links_md, forms_md],
    )

demo.launch()