milwright commited on
Commit
4fde71b
·
1 Parent(s): 430b464

Fix RAG preview integration and clean dependencies

Browse files

- Fix RAG context integration in preview tab (was showing placeholder)
- Remove unused web search dependencies from requirements.txt
- Update preview functionality to actually retrieve RAG context

Files changed (2) hide show
  1. app.py +58 -28
  2. requirements.txt +0 -4
app.py CHANGED
@@ -1073,7 +1073,7 @@ def update_sandbox_preview(config_data):
1073
 
1074
  return preview_text, preview_html
1075
 
1076
- def on_preview_combined(name, description, system_prompt, enable_research_assistant, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag):
1077
  """Generate configuration and return preview updates"""
1078
  if not name or not name.strip():
1079
  return (
@@ -1084,7 +1084,7 @@ def on_preview_combined(name, description, system_prompt, enable_research_assist
1084
  )
1085
 
1086
  try:
1087
- # Use the system prompt directly (research assistant toggle already updates it)
1088
  if not system_prompt or not system_prompt.strip():
1089
  return (
1090
  {},
@@ -1104,7 +1104,12 @@ def on_preview_combined(name, description, system_prompt, enable_research_assist
1104
  'temperature': temperature,
1105
  'max_tokens': max_tokens,
1106
  'enable_dynamic_urls': enable_dynamic_urls,
 
 
 
 
1107
  'enable_vector_rag': enable_vector_rag,
 
1108
  'examples_text': examples_text,
1109
  'preview_ready': True
1110
  }
@@ -1237,14 +1242,31 @@ Once you set your API key, you'll be able to test real conversations in this pre
1237
  return "", history
1238
 
1239
  try:
1240
- # Get grounding context from URLs if configured
1241
- grounding_urls = [url1, url2, url3, url4]
 
 
 
 
 
 
 
1242
  grounding_context = get_cached_grounding_context([url for url in grounding_urls if url and url.strip()])
1243
 
1244
- # Add RAG context if available (simplified for preview)
1245
  rag_context = ""
1246
- if config_data.get('enable_vector_rag'):
1247
- rag_context = "\n\n[RAG context would be retrieved here based on similarity search]\n\n"
 
 
 
 
 
 
 
 
 
 
1248
 
1249
  # If dynamic URLs are enabled, check message for URLs to fetch
1250
  dynamic_context = ""
@@ -1353,7 +1375,7 @@ def export_preview_conversation(history):
1353
 
1354
  return gr.update(value=temp_file, visible=True)
1355
 
1356
- def on_generate(name, description, system_prompt, enable_research_assistant, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state):
1357
  if not name or not name.strip():
1358
  return gr.update(value="Error: Please provide a Space Title", visible=True), gr.update(visible=False), {}
1359
 
@@ -1364,7 +1386,7 @@ def on_generate(name, description, system_prompt, enable_research_assistant, mod
1364
  if enable_vector_rag and rag_tool_state:
1365
  rag_data = rag_tool_state.get_serialized_data()
1366
 
1367
- # Use the system prompt directly (research assistant toggle already updates it)
1368
  if not system_prompt or not system_prompt.strip():
1369
  return gr.update(value="Error: Please provide a System Prompt for the assistant", visible=True), gr.update(visible=False), {}
1370
 
@@ -1610,17 +1632,23 @@ def perform_web_search(query, description="Web search"):
1610
 
1611
  # Code execution functionality removed - no longer supported
1612
 
1613
- def toggle_research_assistant(enable_research):
1614
- """Toggle research assistant system prompt and dynamic URL fetching"""
1615
- if enable_research:
1616
- combined_prompt = "You are a research aid specializing in academic literature search and analysis. Your expertise spans discovering peer-reviewed sources, assessing research methodologies, synthesizing findings across studies, and delivering properly formatted citations. When responding, anchor claims in specific sources from provided URL contexts, differentiate between direct evidence and interpretive analysis, and note any limitations or contradictory results. Employ clear, accessible language that demystifies complex research, and propose connected research directions when appropriate. Your purpose is to serve as an informed research tool supporting users through initial concept development, exploratory investigation, information collection, and source compilation."
1617
  return (
1618
- gr.update(value=combined_prompt), # Update main system prompt
1619
  gr.update(value=True) # Enable dynamic URL fetching for research template
1620
  )
1621
- else:
 
 
 
 
 
 
1622
  return (
1623
- gr.update(value=""), # Clear main system prompt when disabling
1624
  gr.update(value=False) # Disable dynamic URL setting
1625
  )
1626
 
@@ -1795,12 +1823,14 @@ with gr.Blocks(
1795
  info="Define the assistant's role, purpose, and behavior in a single prompt"
1796
  )
1797
 
1798
- # Assistant configuration options
1799
- enable_research_assistant = gr.Checkbox(
1800
- label="Research Template",
1801
- value=False,
1802
- info="Enable to use pre-configured research assistant settings"
1803
- )
 
 
1804
 
1805
 
1806
 
@@ -1905,10 +1935,10 @@ with gr.Blocks(
1905
 
1906
 
1907
 
1908
- # Connect the research assistant checkbox
1909
- enable_research_assistant.change(
1910
- toggle_research_assistant,
1911
- inputs=[enable_research_assistant],
1912
  outputs=[system_prompt, enable_dynamic_urls]
1913
  )
1914
 
@@ -1947,7 +1977,7 @@ with gr.Blocks(
1947
  # Connect the generate button
1948
  generate_btn.click(
1949
  on_generate,
1950
- inputs=[name, description, system_prompt, enable_research_assistant, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state],
1951
  outputs=[status, download_file, sandbox_state]
1952
  )
1953
 
@@ -2066,7 +2096,7 @@ with gr.Blocks(
2066
  # Connect cross-tab functionality after all components are defined
2067
  preview_btn.click(
2068
  on_preview_combined,
2069
- inputs=[name, description, system_prompt, enable_research_assistant, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag],
2070
  outputs=[preview_config_state, preview_status_comp, preview_chat_section_comp, config_display_comp]
2071
  )
2072
 
 
1073
 
1074
  return preview_text, preview_html
1075
 
1076
+ def on_preview_combined(name, description, system_prompt, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag, rag_tool_state, url1="", url2="", url3="", url4=""):
1077
  """Generate configuration and return preview updates"""
1078
  if not name or not name.strip():
1079
  return (
 
1084
  )
1085
 
1086
  try:
1087
+ # Use the system prompt directly (template selector already updates it)
1088
  if not system_prompt or not system_prompt.strip():
1089
  return (
1090
  {},
 
1104
  'temperature': temperature,
1105
  'max_tokens': max_tokens,
1106
  'enable_dynamic_urls': enable_dynamic_urls,
1107
+ 'url1': url1,
1108
+ 'url2': url2,
1109
+ 'url3': url3,
1110
+ 'url4': url4,
1111
  'enable_vector_rag': enable_vector_rag,
1112
+ 'rag_tool_state': rag_tool_state,
1113
  'examples_text': examples_text,
1114
  'preview_ready': True
1115
  }
 
1242
  return "", history
1243
 
1244
  try:
1245
+ # Get grounding context from URLs - prioritize config_data URLs, fallback to preview tab URLs
1246
+ config_urls = [
1247
+ config_data.get('url1', ''),
1248
+ config_data.get('url2', ''),
1249
+ config_data.get('url3', ''),
1250
+ config_data.get('url4', '')
1251
+ ]
1252
+ # Use config URLs if available, otherwise use preview tab URLs
1253
+ grounding_urls = config_urls if any(url for url in config_urls if url) else [url1, url2, url3, url4]
1254
  grounding_context = get_cached_grounding_context([url for url in grounding_urls if url and url.strip()])
1255
 
1256
+ # Add RAG context if available (actual retrieval for preview)
1257
  rag_context = ""
1258
+ if config_data.get('enable_vector_rag') and HAS_RAG:
1259
+ try:
1260
+ # Get RAG tool from config_data if available
1261
+ rag_tool_state = config_data.get('rag_tool_state')
1262
+ if rag_tool_state:
1263
+ rag_context = rag_tool_state.get_relevant_context(message, max_chunks=2)
1264
+ if rag_context:
1265
+ rag_context = f"\n\n**RAG Context (Preview):**\n{rag_context}\n\n"
1266
+ else:
1267
+ rag_context = "\n\n[RAG: No processed documents available for context]\n\n"
1268
+ except Exception as e:
1269
+ rag_context = f"\n\n[RAG context error: {str(e)}]\n\n"
1270
 
1271
  # If dynamic URLs are enabled, check message for URLs to fetch
1272
  dynamic_context = ""
 
1375
 
1376
  return gr.update(value=temp_file, visible=True)
1377
 
1378
+ def on_generate(name, description, system_prompt, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state):
1379
  if not name or not name.strip():
1380
  return gr.update(value="Error: Please provide a Space Title", visible=True), gr.update(visible=False), {}
1381
 
 
1386
  if enable_vector_rag and rag_tool_state:
1387
  rag_data = rag_tool_state.get_serialized_data()
1388
 
1389
+ # Use the system prompt directly (template selector already updates it)
1390
  if not system_prompt or not system_prompt.strip():
1391
  return gr.update(value="Error: Please provide a System Prompt for the assistant", visible=True), gr.update(visible=False), {}
1392
 
 
1632
 
1633
  # Code execution functionality removed - no longer supported
1634
 
1635
+ def toggle_template(template_choice):
1636
+ """Toggle between different assistant templates"""
1637
+ if template_choice == "Research Template":
1638
+ research_prompt = "You are a research aid specializing in academic literature search and analysis. Your expertise spans discovering peer-reviewed sources, assessing research methodologies, synthesizing findings across studies, and delivering properly formatted citations. When responding, anchor claims in specific sources from provided URL contexts, differentiate between direct evidence and interpretive analysis, and note any limitations or contradictory results. Employ clear, accessible language that demystifies complex research, and propose connected research directions when appropriate. Your purpose is to serve as an informed research tool supporting users through initial concept development, exploratory investigation, information collection, and source compilation."
1639
  return (
1640
+ gr.update(value=research_prompt), # Update main system prompt
1641
  gr.update(value=True) # Enable dynamic URL fetching for research template
1642
  )
1643
+ elif template_choice == "Socratic Template":
1644
+ socratic_prompt = "You are a pedagogically-minded academic assistant designed for introductory courses. Your approach follows constructivist learning principles: build on students' prior knowledge, scaffold complex concepts through graduated questioning, and use Socratic dialogue to guide discovery. Provide concise, evidence-based explanations that connect theory to lived experiences. Each response should model critical thinking by acknowledging multiple perspectives, identifying assumptions, and revealing conceptual relationships. Conclude with open-ended questions that promote higher-order thinking—analysis, synthesis, or evaluation—rather than recall."
1645
+ return (
1646
+ gr.update(value=socratic_prompt), # Update main system prompt
1647
+ gr.update(value=False) # Socratic template doesn't need dynamic URLs by default
1648
+ )
1649
+ else: # "None" or any other value
1650
  return (
1651
+ gr.update(value=""), # Clear main system prompt
1652
  gr.update(value=False) # Disable dynamic URL setting
1653
  )
1654
 
 
1823
  info="Define the assistant's role, purpose, and behavior in a single prompt"
1824
  )
1825
 
1826
+ # Template selection
1827
+ with gr.Row():
1828
+ template_selector = gr.Radio(
1829
+ label="Assistant Template",
1830
+ choices=["None", "Research Template", "Socratic Template"],
1831
+ value="None",
1832
+ info="Select a pre-configured template or use custom system prompt"
1833
+ )
1834
 
1835
 
1836
 
 
1935
 
1936
 
1937
 
1938
+ # Connect the template selector
1939
+ template_selector.change(
1940
+ toggle_template,
1941
+ inputs=[template_selector],
1942
  outputs=[system_prompt, enable_dynamic_urls]
1943
  )
1944
 
 
1977
  # Connect the generate button
1978
  generate_btn.click(
1979
  on_generate,
1980
+ inputs=[name, description, system_prompt, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state],
1981
  outputs=[status, download_file, sandbox_state]
1982
  )
1983
 
 
2096
  # Connect cross-tab functionality after all components are defined
2097
  preview_btn.click(
2098
  on_preview_combined,
2099
+ inputs=[name, description, system_prompt, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag, rag_tool_state, url1, url2, url3, url4],
2100
  outputs=[preview_config_state, preview_status_comp, preview_chat_section_comp, config_display_comp]
2101
  )
2102
 
requirements.txt CHANGED
@@ -3,10 +3,6 @@ requests>=2.32.3
3
  beautifulsoup4>=4.12.3
4
  python-dotenv>=1.0.0
5
 
6
- # Web Scraping service
7
- crawl4ai==0.7.0
8
- playwright==1.53.0
9
-
10
  # Vector RAG dependencies (optional)
11
  sentence-transformers>=2.2.2
12
  faiss-cpu>=1.11.0
 
3
  beautifulsoup4>=4.12.3
4
  python-dotenv>=1.0.0
5
 
 
 
 
 
6
  # Vector RAG dependencies (optional)
7
  sentence-transformers>=2.2.2
8
  faiss-cpu>=1.11.0