HemanM commited on
Commit
5bab10b
·
verified ·
1 Parent(s): 0c48a7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -35
app.py CHANGED
@@ -1,7 +1,8 @@
1
  """
2
- app.py — Step 8
3
- Adds a simple toggle to choose Extractive (safe) or Generative (Evo),
4
- plus temperature and max_tokens controls.
 
5
  """
6
 
7
  from pathlib import Path
@@ -9,7 +10,13 @@ from typing import List
9
  import gradio as gr
10
 
11
  from indexer import build_index
12
- from rag_search import RAGSearcher, format_sources, extract_links, split_form_links, links_markdown
 
 
 
 
 
 
13
  from evo_inference import synthesize_with_evo
14
  from utils_lang import SUPPORTED, normalize_lang, L
15
 
@@ -17,83 +24,140 @@ _searcher = None
17
  DATA_SEED_DIR = Path("data/seed")
18
  ALLOWED_EXTS = {".txt", ".pdf", ".html", ".htm", ".xhtml"}
19
 
 
20
  def ensure_searcher():
 
21
  global _searcher
22
- if _searcher is None: _searcher = RAGSearcher()
 
23
  return _searcher
24
 
 
25
  def on_build_index():
 
26
  try:
27
  msg = build_index()
28
- global _searcher; _searcher = None
 
29
  except Exception as e:
30
  msg = f"Error while building index: {e}"
31
  return msg
32
 
 
33
  def _save_files(files: List[gr.File]) -> List[str]:
34
- saved=[]; DATA_SEED_DIR.mkdir(parents=True, exist_ok=True)
35
- if not files: return saved
 
 
 
36
  for f in files:
37
  try:
38
  path = None
39
- if isinstance(f, dict) and "name" in f: path = Path(f["name"])
40
- elif hasattr(f, "name"): path = Path(getattr(f, "name"))
41
- elif isinstance(f, str): path = Path(f)
42
- if not path or not path.exists(): continue
43
- if path.suffix.lower() not in ALLOWED_EXTS: continue
 
 
 
 
 
44
  (DATA_SEED_DIR / path.name).write_bytes(path.read_bytes())
45
  saved.append(path.name)
46
- except: continue
 
47
  return saved
48
 
 
49
  def on_upload_and_reindex(files):
 
50
  saved = _save_files(files or [])
51
- if not saved: return "No valid .txt/.pdf/.html uploaded."
 
52
  status = on_build_index()
53
  return f"Uploaded: {', '.join(saved)}\n{status}"
54
 
55
- def on_ask(question: str, top_k: int, lang_code: str, mode_label: str, temp: float, max_tokens: int, history: list):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  lang = normalize_lang(lang_code)
57
  if not question or len(question.strip()) < 3:
58
- return history, L(lang,"intro_err"), f"{L(lang,'sources')}: (none)", "", ""
 
59
  try:
60
- searcher = ensure_searcher()
 
 
 
 
 
 
61
  hits = searcher.search(question, k=int(top_k))
62
- # Map UI label to internal arg
 
63
  mode = "generative" if mode_label.startswith("Generative") else "extractive"
64
 
65
  answer = synthesize_with_evo(
66
- question, lang, hits,
 
 
67
  mode=mode,
68
  max_new_tokens=int(max_tokens),
69
  temperature=float(temp),
70
  )
71
 
 
72
  srcs = format_sources(hits)
73
- if srcs.startswith("Sources:"): srcs = srcs.replace("Sources:", f"{L(lang,'sources')}:")
 
 
 
74
  all_links = extract_links(hits)
75
  form_links, other_links = split_form_links(all_links)
76
- links_md = links_markdown(L(lang,"links"), other_links)
77
- forms_md = links_markdown(L(lang,"forms"), form_links)
 
78
  except Exception as e:
79
- answer = f"Error: {e}\n\n{L(lang,'tip_build')}"
80
- srcs = f"{L(lang,'sources')}: (none)"
81
- links_md = f"**{L(lang,'links')}:** (none)"
82
- forms_md = f"**{L(lang,'forms')}:** (none)"
 
83
  history = history + [(question, answer)]
84
  return history, answer, srcs, links_md, forms_md
85
 
86
- with gr.Blocks(title=L("en","title")) as demo:
 
87
  gr.Markdown(f"# 🇲🇺 **{L('en','title')}** — Step 8")
88
 
89
- # Admin
90
  with gr.Row():
91
- build_btn = gr.Button(L("en","build_btn"))
92
- status = gr.Markdown(L("en","status_idle"))
93
  build_btn.click(fn=on_build_index, outputs=status)
94
 
95
  with gr.Accordion("Admin: upload TXT / PDF / HTML and auto-reindex", open=False):
96
- upload = gr.File(file_count="multiple", file_types=["file"], label="Upload .txt / .pdf / .html")
 
 
 
 
97
  save_btn = gr.Button("Save & Reindex")
98
  upload_status = gr.Markdown()
99
  save_btn.click(fn=on_upload_and_reindex, inputs=upload, outputs=upload_status)
@@ -107,15 +171,15 @@ with gr.Blocks(title=L("en","title")) as demo:
107
  mode = gr.Radio(
108
  choices=["Extractive (safe)", "Generative (Evo)"],
109
  value="Extractive (safe)",
110
- label="Mode"
111
  )
112
  temp = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature (gen)")
113
  max_tokens = gr.Slider(64, 384, value=192, step=16, label="Max new tokens (gen)")
114
 
115
  gr.Markdown("### Ask a question / Posez une question / Poz enn kestyon")
116
  with gr.Row():
117
- q = gr.Textbox(placeholder=L("en","ask_placeholder"), label=L("en","ask_label"))
118
- k = gr.Slider(3, 12, step=1, value=6, label=L("en","k_label"))
119
  ask_btn = gr.Button("Ask / Demander / Rode")
120
 
121
  chat = gr.Chatbot(label="Assistant", height=360)
 
1
  """
2
+ app.py — Step 8 (refined)
3
+ Adds:
4
+ - Auto-build of the index on first question (prevents "Index not found" error)
5
+ - Same Extractive/Generative toggle + temperature/max_tokens controls
6
  """
7
 
8
  from pathlib import Path
 
10
  import gradio as gr
11
 
12
  from indexer import build_index
13
+ from rag_search import (
14
+ RAGSearcher,
15
+ format_sources,
16
+ extract_links,
17
+ split_form_links,
18
+ links_markdown,
19
+ )
20
  from evo_inference import synthesize_with_evo
21
  from utils_lang import SUPPORTED, normalize_lang, L
22
 
 
24
  DATA_SEED_DIR = Path("data/seed")
25
  ALLOWED_EXTS = {".txt", ".pdf", ".html", ".htm", ".xhtml"}
26
 
27
+
28
  def ensure_searcher():
29
+ """Create (or return) a global searcher bound to the current index."""
30
  global _searcher
31
+ if _searcher is None:
32
+ _searcher = RAGSearcher() # may raise RuntimeError if index missing
33
  return _searcher
34
 
35
+
36
  def on_build_index():
37
+ """Build the FAISS index and reset the cached searcher."""
38
  try:
39
  msg = build_index()
40
+ global _searcher
41
+ _searcher = None # force reload next time
42
  except Exception as e:
43
  msg = f"Error while building index: {e}"
44
  return msg
45
 
46
+
47
  def _save_files(files: List[gr.File]) -> List[str]:
48
+ """Save uploaded files (txt/pdf/html) to data/seed/."""
49
+ saved = []
50
+ DATA_SEED_DIR.mkdir(parents=True, exist_ok=True)
51
+ if not files:
52
+ return saved
53
  for f in files:
54
  try:
55
  path = None
56
+ if isinstance(f, dict) and "name" in f:
57
+ path = Path(f["name"])
58
+ elif hasattr(f, "name"):
59
+ path = Path(getattr(f, "name"))
60
+ elif isinstance(f, str):
61
+ path = Path(f)
62
+ if not path or not path.exists():
63
+ continue
64
+ if path.suffix.lower() not in ALLOWED_EXTS:
65
+ continue
66
  (DATA_SEED_DIR / path.name).write_bytes(path.read_bytes())
67
  saved.append(path.name)
68
+ except Exception:
69
+ continue
70
  return saved
71
 
72
+
73
  def on_upload_and_reindex(files):
74
+ """Save uploaded files then rebuild the index."""
75
  saved = _save_files(files or [])
76
+ if not saved:
77
+ return "No valid .txt/.pdf/.html uploaded."
78
  status = on_build_index()
79
  return f"Uploaded: {', '.join(saved)}\n{status}"
80
 
81
+
82
+ def on_ask(
83
+ question: str,
84
+ top_k: int,
85
+ lang_code: str,
86
+ mode_label: str,
87
+ temp: float,
88
+ max_tokens: int,
89
+ history: list,
90
+ ):
91
+ """
92
+ Handle a user question end-to-end:
93
+ - Ensure index exists (auto-build on first use)
94
+ - Retrieve top-k chunks
95
+ - Synthesize answer (Extractive or Generative)
96
+ - Show sources + extracted links/forms
97
+ - Append to chat history
98
+ """
99
  lang = normalize_lang(lang_code)
100
  if not question or len(question.strip()) < 3:
101
+ return history, L(lang, "intro_err"), f"{L(lang, 'sources')}: (none)", "", ""
102
+
103
  try:
104
+ # Ensure searcher; if index missing, build once automatically then retry
105
+ try:
106
+ searcher = ensure_searcher()
107
+ except RuntimeError:
108
+ on_build_index()
109
+ searcher = ensure_searcher()
110
+
111
  hits = searcher.search(question, k=int(top_k))
112
+
113
+ # Map UI label to internal mode flag
114
  mode = "generative" if mode_label.startswith("Generative") else "extractive"
115
 
116
  answer = synthesize_with_evo(
117
+ question,
118
+ lang,
119
+ hits,
120
  mode=mode,
121
  max_new_tokens=int(max_tokens),
122
  temperature=float(temp),
123
  )
124
 
125
+ # Localized "Sources" header
126
  srcs = format_sources(hits)
127
+ if srcs.startswith("Sources:"):
128
+ srcs = srcs.replace("Sources:", f"{L(lang, 'sources')}:")
129
+
130
+ # Extract URLs and form links
131
  all_links = extract_links(hits)
132
  form_links, other_links = split_form_links(all_links)
133
+ links_md = links_markdown(L(lang, "links"), other_links)
134
+ forms_md = links_markdown(L(lang, "forms"), form_links)
135
+
136
  except Exception as e:
137
+ answer = f"Error: {e}\n\n{L(lang, 'tip_build')}"
138
+ srcs = f"{L(lang, 'sources')}: (none)"
139
+ links_md = f"**{L(lang, 'links')}:** (none)"
140
+ forms_md = f"**{L(lang, 'forms')}:** (none)"
141
+
142
  history = history + [(question, answer)]
143
  return history, answer, srcs, links_md, forms_md
144
 
145
+
146
+ with gr.Blocks(title=L("en", "title")) as demo:
147
  gr.Markdown(f"# 🇲🇺 **{L('en','title')}** — Step 8")
148
 
149
+ # Admin: build index / upload & reindex
150
  with gr.Row():
151
+ build_btn = gr.Button(L("en", "build_btn"))
152
+ status = gr.Markdown(L("en", "status_idle"))
153
  build_btn.click(fn=on_build_index, outputs=status)
154
 
155
  with gr.Accordion("Admin: upload TXT / PDF / HTML and auto-reindex", open=False):
156
+ upload = gr.File(
157
+ file_count="multiple",
158
+ file_types=["file"],
159
+ label="Upload .txt / .pdf / .html",
160
+ )
161
  save_btn = gr.Button("Save & Reindex")
162
  upload_status = gr.Markdown()
163
  save_btn.click(fn=on_upload_and_reindex, inputs=upload, outputs=upload_status)
 
171
  mode = gr.Radio(
172
  choices=["Extractive (safe)", "Generative (Evo)"],
173
  value="Extractive (safe)",
174
+ label="Mode",
175
  )
176
  temp = gr.Slider(0.0, 1.2, value=0.4, step=0.05, label="Temperature (gen)")
177
  max_tokens = gr.Slider(64, 384, value=192, step=16, label="Max new tokens (gen)")
178
 
179
  gr.Markdown("### Ask a question / Posez une question / Poz enn kestyon")
180
  with gr.Row():
181
+ q = gr.Textbox(placeholder=L("en", "ask_placeholder"), label=L("en", "ask_label"))
182
+ k = gr.Slider(3, 12, step=1, value=6, label=L("en", "k_label"))
183
  ask_btn = gr.Button("Ask / Demander / Rode")
184
 
185
  chat = gr.Chatbot(label="Assistant", height=360)