shukdevdattaEX commited on
Commit
46c2b45
Β·
verified Β·
1 Parent(s): fd142b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +301 -102
app.py CHANGED
@@ -97,97 +97,265 @@ def safe_load_phpmyadmin_like_json(raw_text: str) -> List[Dict[str, Any]]:
97
  return objs
98
 
99
  # -----------------------------
100
- # Build a retriever-friendly corpus
101
  # -----------------------------
102
- def flatten_json_to_corpus(docs: List[Dict[str, Any]], max_value_len: int = 500) -> List[Dict[str, Any]]:
103
  """
104
- Turn the exported structure into small searchable text chunks.
105
- For each table row: create a text like: [table=name idx=i] key=value; ...
106
  """
107
  corpus = []
108
- for obj in docs:
109
- otype = obj.get("type")
110
- if otype == "table":
111
- tname = obj.get("name", "unknown_table")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  rows = obj.get("data", [])
 
113
  if isinstance(rows, list):
114
- for i, row in enumerate(rows):
 
115
  if isinstance(row, dict):
 
116
  parts = []
 
 
117
  for k, v in row.items():
118
- val = str(v)
119
  if len(val) > max_value_len:
120
  val = val[:max_value_len] + "…"
121
- parts.append(f"{k}={val}")
122
- text = f"[table={tname} idx={i}] " + " ; ".join(parts)
123
- corpus.append({"table": tname, "idx": i, "text": text})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  else:
125
- # Non-table entries (headers, etc.) β€” keep a small representation
126
- text = json.dumps(obj, ensure_ascii=False)[:2000]
127
- corpus.append({"table": otype or "meta", "idx": -1, "text": text})
 
 
 
 
 
 
 
 
 
 
 
128
  return corpus
129
 
130
  # -----------------------------
131
- # Super-simple keyword retriever
132
  # -----------------------------
133
- def _tokenize(s: str) -> List[str]:
134
- return re.findall(r"[A-Za-z0-9_]+", s.lower())
135
-
136
- def score_doc(query: str, doc_text: str) -> float:
137
- """
138
- Very light scorer: term overlap + a tiny BM25-ish adjustment by doc length.
139
- """
140
- q_tokens = _tokenize(query)
141
- d_tokens = _tokenize(doc_text)
142
- if not d_tokens:
143
- return 0.0
144
- q_set = set(q_tokens)
145
- overlap = sum(1 for t in d_tokens if t in q_set)
146
- # length normalization
147
- return overlap / math.log2(len(d_tokens) + 2)
148
-
149
- def retrieve_top_k(query: str, corpus: List[Dict[str, Any]], k: int = 10, per_table_cap: int = 5) -> List[Dict[str, Any]]:
150
- # Score every doc
151
- scored = [(score_doc(query, c["text"]), c) for c in corpus]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  scored.sort(key=lambda x: x[0], reverse=True)
153
- # Optional cap per table to avoid one table flooding the context
 
154
  table_counts = {}
155
- out = []
156
- for s, c in scored:
157
- if s <= 0:
 
 
 
 
 
 
158
  continue
159
- t = c.get("table", "unknown")
160
- if table_counts.get(t, 0) >= per_table_cap:
 
161
  continue
162
- out.append(c)
163
- table_counts[t] = table_counts.get(t, 0) + 1
164
- if len(out) >= k:
 
 
 
165
  break
166
- # If nothing scored positive, at least return a couple of diverse items
167
- if not out:
168
- out = [c for _, c in scored[:k]]
169
- return out
 
 
 
 
170
 
171
  # -----------------------------
172
- # Compose prompt for Together model
173
  # -----------------------------
174
- def build_prompt(query: str, passages: List[Dict[str, Any]]) -> str:
175
- context_blocks = []
176
- for p in passages:
177
- context_blocks.append(p["text"])
178
- context = "\n\n".join(context_blocks)
179
- prompt = f"""You are a strict JSON-knowledge assistant. Answer ONLY using the provided context from the JSON export.
180
- If the answer is not present, say you could not find it in the JSON.
181
-
182
- # User question
 
 
 
 
 
 
 
 
 
 
183
  {query}
184
 
185
- # Context (JSON-derived snippets)
186
- {context}
 
 
 
187
 
188
  # Instructions
189
- - Cite table names and ids if helpful (e.g., table=admission_acceptance_lists idx=12).
190
- - Do not invent any data that is not in the context."""
 
 
 
 
 
 
191
 
192
  return prompt
193
 
@@ -197,96 +365,120 @@ If the answer is not present, say you could not find it in the JSON.
197
  def call_together(api_key: str, prompt: str) -> str:
198
  if not api_key or not api_key.strip():
199
  return "⚠️ Please enter your Together API key."
200
- # Set env and client to ensure the SDK picks it up everywhere
201
- os.environ["TOGETHER_API_KEY"] = api_key.strip()
202
- client = Together(api_key=api_key.strip())
203
- resp = client.chat.completions.create(
204
- model="lgai/exaone-3-5-32b-instruct",
205
- messages=[{"role": "user", "content": prompt}],
206
- temperature=0.2,
207
- )
208
- return resp.choices[0].message.content
 
 
 
 
 
 
209
 
210
  # -----------------------------
211
  # Gradio App
212
  # -----------------------------
213
- with gr.Blocks(title="JSON Chatbot (Together)") as demo:
214
- gr.Markdown("## πŸ“š JSON Chatbot on Your Dump (Together Exaone 3.5 32B)\nUpload your JSON export and ask questions. The app safely loads imperfect JSON and retrieves the most relevant rows to answer your query.")
215
 
216
  with gr.Row():
217
  api_key_tb = gr.Textbox(label="Together API Key", type="password", placeholder="Paste your TOGETHER_API_KEY here")
218
- topk_slider = gr.Slider(3, 20, value=10, step=1, label="Top-K JSON Passages")
219
 
220
  with gr.Row():
221
  json_file = gr.File(label="Upload JSON export (e.g., phpMyAdmin export)", file_count="single", file_types=[".json"])
222
  fallback_path = gr.Textbox(label="Or fixed path on disk (optional)", placeholder="e.g., sultanbr_innovativeskills.json")
223
 
224
- with gr.Accordion("Advanced", open=False):
225
- per_table_cap = gr.Slider(1, 10, value=5, step=1, label="Max passages per table")
226
- max_val_len = gr.Slider(100, 2000, value=500, step=50, label="Max value length per field (truncation)")
227
 
228
- status = gr.Markdown("")
229
- chatbot = gr.Chatbot(height=420)
230
- user_box = gr.Textbox(label="Ask something about the JSON...", placeholder="e.g., What are the admission criteria?")
231
- clear_btn = gr.Button("Clear", variant="secondary")
 
 
 
 
 
 
 
 
 
232
 
233
  # States
234
- state_corpus = gr.State([]) # list of {"table","idx","text"}
235
- state_docs = gr.State([]) # raw list of parsed json objects
236
 
237
  def load_json_to_corpus(file_obj, path_text, max_value_len):
238
- """
239
- Load JSON from uploaded file (preferred) or from a disk path (fallback).
240
- Build corpus for retrieval. Returns (status_text, corpus, docs)
241
- """
242
  try:
243
  if file_obj is not None:
244
  with open(file_obj.name, "r", encoding="utf-8", errors="replace") as f:
245
  raw = f.read()
 
246
  else:
247
  p = (path_text or "").strip()
248
  if not p:
249
  return ("⚠️ Please upload a JSON file or provide a valid path.", [], [])
250
  with open(p, "r", encoding="utf-8", errors="replace") as f:
251
  raw = f.read()
 
252
 
253
  docs = safe_load_phpmyadmin_like_json(raw)
254
 
255
  if not isinstance(docs, list):
256
- # Some exports might be a single object β€” normalize to list
257
  docs = [docs]
258
 
259
  corpus = flatten_json_to_corpus(docs, max_value_len=int(max_value_len))
260
 
261
- return (f"βœ… Loaded {len(docs)} top-level objects; built {len(corpus)} passages.", corpus, docs)
 
 
 
 
 
 
 
 
262
 
263
  except Exception as e:
264
- return (f"❌ Load error: {e}", [], [])
265
 
266
- def ask(api_key, query, history, corpus, k, cap):
267
  if not corpus:
268
- return history + [[query, "⚠️ Please upload/load the JSON first."]]
269
  if not query or not query.strip():
270
  return history + [["", "⚠️ Please enter a question."]]
271
 
272
- # Retrieve relevant snippets
273
- top_passages = retrieve_top_k(query, corpus, k=int(k), per_table_cap=int(cap))
274
- prompt = build_prompt(query, top_passages)
 
 
275
 
276
  try:
277
  answer = call_together(api_key, prompt)
278
  except Exception as e:
279
- answer = f"❌ API error: {e}"
280
 
281
  history = history + [[query, answer]]
282
  return history
283
 
284
- # Wire events
285
  json_file.upload(
286
  load_json_to_corpus,
287
  inputs=[json_file, fallback_path, max_val_len],
288
  outputs=[status, state_corpus, state_docs],
289
  )
 
290
  fallback_path.change(
291
  load_json_to_corpus,
292
  inputs=[json_file, fallback_path, max_val_len],
@@ -294,14 +486,21 @@ with gr.Blocks(title="JSON Chatbot (Together)") as demo:
294
  )
295
 
296
  user_box.submit(
297
- ask,
298
  inputs=[api_key_tb, user_box, chatbot, state_corpus, topk_slider, per_table_cap],
299
  outputs=[chatbot],
 
 
 
 
 
 
300
  )
301
 
302
- clear_btn.click(lambda: ([], "", "πŸ”„ Ready. Upload JSON or set a path, then ask a question."),
303
- inputs=[],
304
- outputs=[chatbot, user_box, status])
 
305
 
306
  if __name__ == "__main__":
307
  demo.launch()
 
97
  return objs
98
 
99
  # -----------------------------
100
+ # Enhanced corpus building with better indexing
101
  # -----------------------------
102
+ def flatten_json_to_corpus(docs: List[Dict[str, Any]], max_value_len: int = 1000) -> List[Dict[str, Any]]:
103
  """
104
+ Turn the exported structure into searchable text chunks with enhanced indexing.
105
+ Creates multiple representations of the same data for better retrieval.
106
  """
107
  corpus = []
108
+
109
+ def extract_all_text_values(obj, prefix=""):
110
+ """Recursively extract all text values from nested objects/arrays"""
111
+ texts = []
112
+ if isinstance(obj, dict):
113
+ for k, v in obj.items():
114
+ key_path = f"{prefix}.{k}" if prefix else k
115
+ if isinstance(v, (dict, list)):
116
+ texts.extend(extract_all_text_values(v, key_path))
117
+ else:
118
+ val_str = str(v).strip()
119
+ if val_str and val_str.lower() not in ['null', 'none', '']:
120
+ texts.append(f"{k}: {val_str}")
121
+ elif isinstance(obj, list):
122
+ for i, item in enumerate(obj):
123
+ texts.extend(extract_all_text_values(item, f"{prefix}[{i}]"))
124
+ else:
125
+ val_str = str(obj).strip()
126
+ if val_str and val_str.lower() not in ['null', 'none', '']:
127
+ texts.append(val_str)
128
+ return texts
129
+
130
+ for obj_idx, obj in enumerate(docs):
131
+ obj_type = obj.get("type", "unknown")
132
+
133
+ if obj_type == "table":
134
+ table_name = obj.get("name", f"table_{obj_idx}")
135
  rows = obj.get("data", [])
136
+
137
  if isinstance(rows, list):
138
+ # Create entries for individual rows
139
+ for row_idx, row in enumerate(rows):
140
  if isinstance(row, dict):
141
+ # Standard row representation
142
  parts = []
143
+ all_values = []
144
+
145
  for k, v in row.items():
146
+ val = str(v).strip()
147
  if len(val) > max_value_len:
148
  val = val[:max_value_len] + "…"
149
+ if val and val.lower() not in ['null', 'none', '']:
150
+ parts.append(f"{k}={val}")
151
+ all_values.append(val)
152
+
153
+ # Main row text
154
+ row_text = f"[table={table_name} row={row_idx}] " + " | ".join(parts)
155
+ corpus.append({
156
+ "table": table_name,
157
+ "idx": row_idx,
158
+ "text": row_text,
159
+ "type": "row",
160
+ "raw_data": row
161
+ })
162
+
163
+ # Also create a searchable version with just values for name searches
164
+ if all_values:
165
+ value_text = f"[table={table_name} row={row_idx}] Contains: " + " ".join(all_values)
166
+ corpus.append({
167
+ "table": table_name,
168
+ "idx": row_idx,
169
+ "text": value_text,
170
+ "type": "values",
171
+ "raw_data": row
172
+ })
173
+
174
+ # Create table summary
175
+ if rows:
176
+ sample_keys = []
177
+ if rows and isinstance(rows[0], dict):
178
+ sample_keys = list(rows[0].keys())[:10]
179
+
180
+ table_summary = f"[table={table_name} summary] Table with {len(rows)} rows. Fields: {', '.join(sample_keys)}"
181
+ corpus.append({
182
+ "table": table_name,
183
+ "idx": -1,
184
+ "text": table_summary,
185
+ "type": "summary",
186
+ "raw_data": {"row_count": len(rows), "fields": sample_keys}
187
+ })
188
  else:
189
+ # Non-table entries - extract all textual content
190
+ all_texts = extract_all_text_values(obj)
191
+ if all_texts:
192
+ text = f"[{obj_type}] " + " | ".join(all_texts[:20]) # Limit to prevent too long
193
+ if len(text) > 2000:
194
+ text = text[:2000] + "…"
195
+ corpus.append({
196
+ "table": obj_type,
197
+ "idx": obj_idx,
198
+ "text": text,
199
+ "type": "meta",
200
+ "raw_data": obj
201
+ })
202
+
203
  return corpus
204
 
205
  # -----------------------------
206
+ # Enhanced retrieval with multiple scoring methods
207
  # -----------------------------
208
+ def _tokenize_enhanced(s: str) -> List[str]:
209
+ """Enhanced tokenization that handles names and phrases better"""
210
+ # Keep original words, lowercase versions, and partial matches
211
+ tokens = []
212
+
213
+ # Get word tokens
214
+ words = re.findall(r"[A-Za-z0-9_]+", s)
215
+ for word in words:
216
+ tokens.append(word.lower())
217
+ if len(word) > 3:
218
+ # Add partial tokens for name matching
219
+ tokens.append(word[:4].lower())
220
+
221
+ # Also extract quoted phrases and camelCase splits
222
+ quoted = re.findall(r'"([^"]*)"', s)
223
+ for q in quoted:
224
+ tokens.extend(q.lower().split())
225
+
226
+ return tokens
227
+
228
+ def calculate_enhanced_score(query: str, doc_text: str, doc_data: Dict) -> float:
229
+ """Enhanced scoring that considers multiple factors"""
230
+ q_lower = query.lower()
231
+ d_lower = doc_text.lower()
232
+
233
+ score = 0.0
234
+
235
+ # 1. Exact phrase matching (highest weight)
236
+ if q_lower in d_lower:
237
+ score += 10.0
238
+
239
+ # 2. Token-based matching
240
+ q_tokens = _tokenize_enhanced(query)
241
+ d_tokens = _tokenize_enhanced(doc_text)
242
+
243
+ if d_tokens:
244
+ q_set = set(q_tokens)
245
+ d_set = set(d_tokens)
246
+
247
+ # Exact token matches
248
+ exact_matches = len(q_set & d_set)
249
+ score += exact_matches * 2.0
250
+
251
+ # Partial matches for names
252
+ for q_tok in q_tokens:
253
+ if len(q_tok) > 2:
254
+ for d_tok in d_tokens:
255
+ if q_tok in d_tok or d_tok in q_tok:
256
+ score += 0.5
257
+
258
+ # Length normalization
259
+ score = score / math.log2(len(d_tokens) + 2)
260
+
261
+ # 3. Boost for certain types of content
262
+ if "instructor" in q_lower and "instructor" in d_lower:
263
+ score += 5.0
264
+
265
+ if "batch" in q_lower and "batch" in d_lower:
266
+ score += 3.0
267
+
268
+ # Boost for rows vs summaries when looking for specific info
269
+ if any(word in q_lower for word in ["who", "name", "person"]):
270
+ if doc_data.get("type") == "row":
271
+ score += 2.0
272
+
273
+ return score
274
+
275
+ def retrieve_top_k_enhanced(query: str, corpus: List[Dict[str, Any]], k: int = 15, per_table_cap: int = 8) -> List[Dict[str, Any]]:
276
+ """Enhanced retrieval with better scoring and diversity"""
277
+
278
+ # Score every document
279
+ scored = []
280
+ for doc in corpus:
281
+ score = calculate_enhanced_score(query, doc["text"], doc)
282
+ if score > 0:
283
+ scored.append((score, doc))
284
+
285
+ # Sort by score
286
  scored.sort(key=lambda x: x[0], reverse=True)
287
+
288
+ # Apply diversity constraints
289
  table_counts = {}
290
+ type_counts = {}
291
+ result = []
292
+
293
+ for score, doc in scored:
294
+ table_name = doc.get("table", "unknown")
295
+ doc_type = doc.get("type", "unknown")
296
+
297
+ # Check table limit
298
+ if table_counts.get(table_name, 0) >= per_table_cap:
299
  continue
300
+
301
+ # Prefer diverse content types
302
+ if type_counts.get(doc_type, 0) >= k // 3 and len(result) > k // 2:
303
  continue
304
+
305
+ result.append(doc)
306
+ table_counts[table_name] = table_counts.get(table_name, 0) + 1
307
+ type_counts[doc_type] = type_counts.get(doc_type, 0) + 1
308
+
309
+ if len(result) >= k:
310
  break
311
+
312
+ # If no good matches, return some diverse samples
313
+ if len(result) < 3:
314
+ fallback = [doc for _, doc in scored[:k]]
315
+ result.extend(fallback)
316
+ result = result[:k]
317
+
318
+ return result
319
 
320
  # -----------------------------
321
+ # Enhanced prompt building
322
  # -----------------------------
323
+ def build_enhanced_prompt(query: str, passages: List[Dict[str, Any]]) -> str:
324
+ """Build a more comprehensive prompt with structured context"""
325
+
326
+ context_sections = []
327
+ table_summaries = []
328
+
329
+ for passage in passages:
330
+ if passage.get("type") == "summary":
331
+ table_summaries.append(passage["text"])
332
+ else:
333
+ context_sections.append(passage["text"])
334
+
335
+ # Combine contexts
336
+ table_context = "\n".join(table_summaries) if table_summaries else ""
337
+ detail_context = "\n\n".join(context_sections)
338
+
339
+ prompt = f"""You are a thorough JSON database assistant. Answer using ONLY the provided context from the JSON export.
340
+
341
+ # User Question
342
  {query}
343
 
344
+ # Available Tables Summary
345
+ {table_context}
346
+
347
+ # Detailed Context (Most Relevant Entries)
348
+ {detail_context}
349
 
350
  # Instructions
351
+ - Search through ALL provided context thoroughly
352
+ - For person names, look for partial matches and variations
353
+ - For roles like "instructor" or "teacher", check all relevant entries
354
+ - If asking about people, include their roles, associations, and related info
355
+ - Cite specific table names and row indices when possible
356
+ - If information exists in the context but seems incomplete, mention what you found
357
+ - Only say "not found" if you genuinely cannot locate relevant information after thorough checking
358
+ - Be comprehensive - don't just return the first match you find"""
359
 
360
  return prompt
361
 
 
365
  def call_together(api_key: str, prompt: str) -> str:
366
  if not api_key or not api_key.strip():
367
  return "⚠️ Please enter your Together API key."
368
+
369
+ try:
370
+ # Set env and client to ensure the SDK picks it up everywhere
371
+ os.environ["TOGETHER_API_KEY"] = api_key.strip()
372
+ client = Together(api_key=api_key.strip())
373
+
374
+ resp = client.chat.completions.create(
375
+ model="lgai/exaone-3-5-32b-instruct",
376
+ messages=[{"role": "user", "content": prompt}],
377
+ temperature=0.1, # Lower temperature for more focused responses
378
+ max_tokens=1000,
379
+ )
380
+ return resp.choices[0].message.content
381
+ except Exception as e:
382
+ return f"❌ API Error: {str(e)}"
383
 
384
  # -----------------------------
385
  # Gradio App
386
  # -----------------------------
387
+ with gr.Blocks(title="Enhanced JSON Chatbot") as demo:
388
+ gr.Markdown("## πŸ“š Enhanced JSON Chatbot (Together Exaone 3.5 32B)\nUpload your JSON export and ask questions. Enhanced retrieval system for better name and role matching.")
389
 
390
  with gr.Row():
391
  api_key_tb = gr.Textbox(label="Together API Key", type="password", placeholder="Paste your TOGETHER_API_KEY here")
392
+ topk_slider = gr.Slider(5, 30, value=15, step=1, label="Top-K JSON Passages")
393
 
394
  with gr.Row():
395
  json_file = gr.File(label="Upload JSON export (e.g., phpMyAdmin export)", file_count="single", file_types=[".json"])
396
  fallback_path = gr.Textbox(label="Or fixed path on disk (optional)", placeholder="e.g., sultanbr_innovativeskills.json")
397
 
398
+ with gr.Accordion("Advanced Settings", open=False):
399
+ per_table_cap = gr.Slider(3, 15, value=8, step=1, label="Max passages per table")
400
+ max_val_len = gr.Slider(200, 2000, value=1000, step=100, label="Max value length per field")
401
 
402
+ status = gr.Markdown("πŸ”„ Ready. Upload JSON file to begin.")
403
+
404
+ with gr.Row():
405
+ with gr.Column(scale=4):
406
+ chatbot = gr.Chatbot(height=500)
407
+ user_box = gr.Textbox(
408
+ label="Ask about your JSON data...",
409
+ placeholder="e.g., Who are the batch instructors? or Who is Shukdev Datta?",
410
+ lines=2
411
+ )
412
+ with gr.Column(scale=1):
413
+ clear_btn = gr.Button("Clear Chat", variant="secondary", size="sm")
414
+ reload_btn = gr.Button("Reload JSON", variant="secondary", size="sm")
415
 
416
  # States
417
+ state_corpus = gr.State([])
418
+ state_docs = gr.State([])
419
 
420
  def load_json_to_corpus(file_obj, path_text, max_value_len):
421
+ """Load JSON and build enhanced corpus"""
 
 
 
422
  try:
423
  if file_obj is not None:
424
  with open(file_obj.name, "r", encoding="utf-8", errors="replace") as f:
425
  raw = f.read()
426
+ source = f"uploaded file: {file_obj.name}"
427
  else:
428
  p = (path_text or "").strip()
429
  if not p:
430
  return ("⚠️ Please upload a JSON file or provide a valid path.", [], [])
431
  with open(p, "r", encoding="utf-8", errors="replace") as f:
432
  raw = f.read()
433
+ source = f"file path: {p}"
434
 
435
  docs = safe_load_phpmyadmin_like_json(raw)
436
 
437
  if not isinstance(docs, list):
 
438
  docs = [docs]
439
 
440
  corpus = flatten_json_to_corpus(docs, max_value_len=int(max_value_len))
441
 
442
+ # Count tables vs other objects
443
+ tables = [d for d in docs if d.get("type") == "table"]
444
+
445
+ status_msg = f"βœ… Loaded from {source}\n"
446
+ status_msg += f"πŸ“Š {len(docs)} objects total, {len(tables)} tables\n"
447
+ status_msg += f"πŸ” Built {len(corpus)} searchable passages\n"
448
+ status_msg += f"πŸ’¬ Ready for questions!"
449
+
450
+ return (status_msg, corpus, docs)
451
 
452
  except Exception as e:
453
+ return (f"❌ Load error: {str(e)}", [], [])
454
 
455
+ def ask_enhanced(api_key, query, history, corpus, k, cap):
456
  if not corpus:
457
+ return history + [[query, "⚠️ Please upload and load the JSON file first."]]
458
  if not query or not query.strip():
459
  return history + [["", "⚠️ Please enter a question."]]
460
 
461
+ # Enhanced retrieval
462
+ top_passages = retrieve_top_k_enhanced(query.strip(), corpus, k=int(k), per_table_cap=int(cap))
463
+
464
+ # Build enhanced prompt
465
+ prompt = build_enhanced_prompt(query.strip(), top_passages)
466
 
467
  try:
468
  answer = call_together(api_key, prompt)
469
  except Exception as e:
470
+ answer = f"❌ API error: {str(e)}"
471
 
472
  history = history + [[query, answer]]
473
  return history
474
 
475
+ # Event handlers
476
  json_file.upload(
477
  load_json_to_corpus,
478
  inputs=[json_file, fallback_path, max_val_len],
479
  outputs=[status, state_corpus, state_docs],
480
  )
481
+
482
  fallback_path.change(
483
  load_json_to_corpus,
484
  inputs=[json_file, fallback_path, max_val_len],
 
486
  )
487
 
488
  user_box.submit(
489
+ ask_enhanced,
490
  inputs=[api_key_tb, user_box, chatbot, state_corpus, topk_slider, per_table_cap],
491
  outputs=[chatbot],
492
+ ).then(lambda: "", outputs=[user_box]) # Clear input after submit
493
+
494
+ reload_btn.click(
495
+ load_json_to_corpus,
496
+ inputs=[json_file, fallback_path, max_val_len],
497
+ outputs=[status, state_corpus, state_docs],
498
  )
499
 
500
+ clear_btn.click(
501
+ lambda: ([], "πŸ”„ Chat cleared. Ready for new questions."),
502
+ outputs=[chatbot, user_box]
503
+ )
504
 
505
  if __name__ == "__main__":
506
  demo.launch()