File size: 5,845 Bytes
07f8c0c
57774c2
ced2a62
57774c2
ced2a62
 
 
07f8c0c
 
ced2a62
 
 
 
1849681
ced2a62
e55c124
ced2a62
 
 
 
 
 
 
57774c2
 
 
ced2a62
 
 
e55c124
 
 
ced2a62
e55c124
 
 
 
1849681
57774c2
1849681
ced2a62
1849681
 
e55c124
ced2a62
1849681
 
 
07f8c0c
57774c2
ced2a62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57774c2
e55c124
ced2a62
 
 
 
 
 
e55c124
57774c2
e55c124
ced2a62
e55c124
 
 
 
57774c2
ced2a62
 
57774c2
 
 
ced2a62
 
 
 
 
 
 
e55c124
57774c2
 
ced2a62
 
e55c124
 
ced2a62
e55c124
 
57774c2
ced2a62
07f8c0c
ced2a62
1849681
57774c2
 
1849681
 
ced2a62
 
 
 
 
 
 
57774c2
 
 
 
e55c124
57774c2
 
 
 
e55c124
 
 
ced2a62
 
 
 
 
 
 
 
 
 
 
 
 
e55c124
07f8c0c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
"""
app.py
Step 6: Add official links & forms display + Admin upload (TXT) with auto-reindex.

New in Step 6 (Objective):
- After answering, we extract URLs from retrieved chunks, split "forms", and show both as clickable lists.
- Admin can upload new TXT files from the UI; we save them to data/seed/ and rebuild the index automatically.
"""

import os
from pathlib import Path
from typing import List

import gradio as gr

from indexer import build_index
from rag_search import (
    RAGSearcher,
    format_sources,
    extract_links,
    split_form_links,
    links_markdown,
)
from evo_inference import synthesize_with_evo
from utils_lang import SUPPORTED, normalize_lang, L

# Global (Objective) lazy searcher instance
_searcher = None
DATA_SEED_DIR = Path("data/seed")


def ensure_searcher():
    """(Objective) Create/return a global RAGSearcher bound to the current index."""
    global _searcher
    if _searcher is None:
        _searcher = RAGSearcher()
    return _searcher


def on_build_index():
    """(Objective) Build FAISS index, reset searcher so it reloads next query."""
    try:
        msg = build_index()
        global _searcher
        _searcher = None
    except Exception as e:
        msg = f"Error while building index: {e}"
    return msg


def save_uploaded_txt(files: List[gr.File]) -> List[str]:
    """
    (Objective) Save uploaded .txt files into data/seed/.
    Gradio may pass objects with .name or file paths as strings; support both.
    Returns a list of saved filenames.
    """
    saved = []
    DATA_SEED_DIR.mkdir(parents=True, exist_ok=True)
    if not files:
        return saved

    for f in files:
        try:
            # Handle both dict-like and path-like objects gracefully
            path = getattr(f, "name", None) or (f if isinstance(f, str) else None)
            # Some gradio versions pass dicts with 'name'
            if isinstance(f, dict) and "name" in f:
                path = f["name"]
            if not path:
                continue
            # Read bytes and write to seed folder using the base filename
            src = Path(path)
            if src.suffix.lower() != ".txt":
                continue  # Only .txt for this step
            content = Path(path).read_bytes()
            dest = DATA_SEED_DIR / src.name
            dest.write_bytes(content)
            saved.append(src.name)
        except Exception:
            # Skip problematic files silently for robustness
            continue
    return saved


def on_upload_and_reindex(files) -> str:
    """
    (Objective) Save uploaded .txt files and rebuild the index immediately.
    """
    saved = save_uploaded_txt(files or [])
    if not saved:
        return "No valid .txt uploaded."
    status = on_build_index()
    return f"Uploaded: {', '.join(saved)}\n{status}"


def on_ask(question: str, top_k: int, lang_code: str, history: list):
    """
    (Objective) Handle user question end-to-end:
    - Ensure index exists
    - Retrieve top-k hits
    - Synthesize grounded answer (templated Evo)
    - Extract and render links & forms
    - Append to chat history
    """
    lang = normalize_lang(lang_code)
    if not question or len(question.strip()) < 3:
        return history, L(lang, "intro_err"), f"{L(lang, 'sources')}: (none)", "", ""

    try:
        searcher = ensure_searcher()
        hits = searcher.search(question, k=int(top_k))
        answer = synthesize_with_evo(question, lang, hits)

        # Sources (localized header)
        srcs = format_sources(hits)
        if srcs.startswith("Sources:"):
            srcs = srcs.replace("Sources:", f"{L(lang, 'sources')}:")

        # Links & Forms
        all_links = extract_links(hits)
        form_links, other_links = split_form_links(all_links)
        links_md = links_markdown(L(lang, "links"), other_links)
        forms_md = links_markdown(L(lang, "forms"), form_links)

    except Exception as e:
        answer = f"Error: {e}\n\n{L(lang, 'tip_build')}"
        srcs = f"{L(lang, 'sources')}: (none)"
        links_md = f"**{L(lang, 'links')}:** (none)"
        forms_md = f"**{L(lang, 'forms')}:** (none)"

    history = history + [(question, answer)]
    return history, answer, srcs, links_md, forms_md


with gr.Blocks(title=L("en", "title")) as demo:
    gr.Markdown(f"# 🇲🇺 **{L('en','title')}** — Step 6")

    # --- Admin controls
    with gr.Row():
        build_btn = gr.Button(L("en", "build_btn"))
        status = gr.Markdown(L("en", "status_idle"))
    build_btn.click(fn=on_build_index, outputs=status)

    with gr.Accordion("Admin: upload TXT and auto-reindex", open=False):
        upload = gr.File(file_count="multiple", file_types=["text"], label="Upload .txt files")
        save_btn = gr.Button("Save & Reindex")
        upload_status = gr.Markdown()
        save_btn.click(fn=on_upload_and_reindex, inputs=upload, outputs=upload_status)

    # --- User Q&A
    gr.Markdown("### Language / Langue / Langaz")
    lang = gr.Dropdown(choices=list(SUPPORTED.keys()), value="en", label="EN / FR / MFE")

    gr.Markdown("### Ask a question / Posez une question / Poz enn kestyon")
    with gr.Row():
        q = gr.Textbox(placeholder=L("en", "ask_placeholder"), label=L("en", "ask_label"))
        k = gr.Slider(3, 12, step=1, value=6, label=L("en", "k_label"))
        ask_btn = gr.Button("Ask / Demander / Rode")

    chat = gr.Chatbot(label="Assistant", height=360)
    answer_md = gr.Markdown()
    sources_md = gr.Markdown()
    links_md = gr.Markdown()
    forms_md = gr.Markdown()

    ask_btn.click(
        fn=on_ask,
        inputs=[q, k, lang, chat],
        outputs=[chat, answer_md, sources_md, links_md, forms_md],
    )
    q.submit(
        fn=on_ask,
        inputs=[q, k, lang, chat],
        outputs=[chat, answer_md, sources_md, links_md, forms_md],
    )

demo.launch()