milwright commited on
Commit
dad1ccd
·
1 Parent(s): aaf6589

Fix BeautifulSoup lambda type error in content extraction

Browse files
Files changed (1) hide show
  1. app.py +138 -16
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import gradio as gr
2
  import json
3
  import zipfile
@@ -97,7 +100,7 @@ def enhanced_fetch_url_content(url, enable_search_validation=False):
97
  element.decompose()
98
 
99
  # Extract main content preferentially
100
- main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=lambda x: x and 'content' in x.lower()) or soup
101
  text = main_content.get_text()
102
 
103
  # Enhanced text cleaning
@@ -719,16 +722,19 @@ Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} with Chat U/I Helper
719
 
720
  return readme_content
721
 
722
- def create_requirements(enable_vector_rag=False):
723
  """Generate requirements.txt"""
724
  base_requirements = "gradio>=5.35.0\nrequests>=2.32.3\nbeautifulsoup4>=4.12.3"
725
 
726
  if enable_vector_rag:
727
  base_requirements += "\nfaiss-cpu==1.7.4\nnumpy==1.24.3"
728
 
 
 
 
729
  return base_requirements
730
 
731
- def generate_zip(name, description, system_prompt, model, api_key_var, temperature, max_tokens, examples_text, access_code="", enable_dynamic_urls=False, url1="", url2="", url3="", url4="", enable_vector_rag=False, rag_data=None):
732
  """Generate deployable zip file"""
733
 
734
  # Process examples
@@ -773,7 +779,7 @@ def generate_zip(name, description, system_prompt, model, api_key_var, temperatu
773
  readme_config = config.copy()
774
  readme_config['access_code'] = access_code or ""
775
  readme_content = create_readme(readme_config)
776
- requirements_content = create_requirements(enable_vector_rag)
777
 
778
  # Create zip file with clean naming
779
  filename = f"{name.lower().replace(' ', '_').replace('-', '_')}.zip"
@@ -889,7 +895,7 @@ def update_sandbox_preview(config_data):
889
 
890
  return preview_text, preview_html
891
 
892
- def on_preview_combined(name, description, system_prompt, enable_research_assistant, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag):
893
  """Generate configuration and return preview updates"""
894
  if not name or not name.strip():
895
  return (
@@ -921,6 +927,7 @@ def on_preview_combined(name, description, system_prompt, enable_research_assist
921
  'max_tokens': max_tokens,
922
  'enable_dynamic_urls': enable_dynamic_urls,
923
  'enable_vector_rag': enable_vector_rag,
 
924
  'examples_text': examples_text,
925
  'preview_ready': True
926
  }
@@ -1071,8 +1078,32 @@ def preview_chat_response(message, history, config_data, url1="", url2="", url3=
1071
  if dynamic_context_parts:
1072
  dynamic_context = "\n".join(dynamic_context_parts)
1073
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1074
  # Build enhanced system prompt with all contexts
1075
- enhanced_system_prompt = config_data.get('system_prompt', '') + grounding_context + rag_context + dynamic_context
1076
 
1077
  # Build messages array for the API
1078
  messages = [{"role": "system", "content": enhanced_system_prompt}]
@@ -1095,7 +1126,8 @@ def preview_chat_response(message, history, config_data, url1="", url2="", url3=
1095
  "model": config_data.get('model', 'google/gemini-2.0-flash-001'),
1096
  "messages": messages,
1097
  "temperature": config_data.get('temperature', 0.7),
1098
- "max_tokens": config_data.get('max_tokens', 500)
 
1099
  }
1100
 
1101
  # Make API request to OpenRouter
@@ -1161,7 +1193,7 @@ def export_preview_conversation(history):
1161
 
1162
  return gr.update(value=temp_file, visible=True)
1163
 
1164
- def on_generate(name, description, system_prompt, enable_research_assistant, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state):
1165
  if not name or not name.strip():
1166
  return gr.update(value="Error: Please provide a Space Title", visible=True), gr.update(visible=False)
1167
 
@@ -1178,7 +1210,7 @@ def on_generate(name, description, system_prompt, enable_research_assistant, mod
1178
 
1179
  final_system_prompt = system_prompt.strip()
1180
 
1181
- filename = generate_zip(name, description, final_system_prompt, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_data)
1182
 
1183
  success_msg = f"""**Deployment package ready!**
1184
 
@@ -1390,10 +1422,68 @@ def remove_chat_urls(count):
1390
  return (gr.update(), gr.update(), gr.update(), gr.update(), count)
1391
 
1392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1393
  def toggle_research_assistant(enable_research):
1394
  """Toggle research assistant system prompt"""
1395
  if enable_research:
1396
- combined_prompt = "You are a search tool that provides link-grounded information through web fetching, limiting source criteria to DOI-verified articles from academic databases and official sources. Use https://libkey.io/ to cross-reference and validate article DOIs for inclusion. This tool is designed for students and researchers conducting academic inquiry. Additional responsibilities include analyzing academic sources, fact-checking claims with evidence, providing properly cited research summaries, and helping users navigate scholarly information. Ground all responses in provided URL contexts and any additional URLs you're instructed to fetch. Never rely on memory for factual claims."
1397
  return (
1398
  gr.update(value=combined_prompt), # Update main system prompt
1399
  gr.update(value=True) # Enable dynamic URL fetching for research template
@@ -1484,12 +1574,38 @@ with gr.Blocks(title="Chat U/I Helper") as demo:
1484
  info="These will appear as clickable examples in the chat interface"
1485
  )
1486
 
1487
- with gr.Accordion("Tool Settings", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
1488
 
1489
  enable_dynamic_urls = gr.Checkbox(
1490
  label="Enable Dynamic URL Fetching",
 
 
 
 
 
 
 
1491
  value=False,
1492
- info="Allow the assistant to fetch additional URLs mentioned in conversations"
 
 
 
 
 
 
 
1493
  )
1494
 
1495
  enable_vector_rag = gr.Checkbox(
@@ -1584,6 +1700,13 @@ with gr.Blocks(title="Chat U/I Helper") as demo:
1584
  outputs=[system_prompt, enable_dynamic_urls]
1585
  )
1586
 
 
 
 
 
 
 
 
1587
 
1588
 
1589
  # Connect the URL management buttons
@@ -1616,7 +1739,7 @@ with gr.Blocks(title="Chat U/I Helper") as demo:
1616
  # Connect the generate button
1617
  generate_btn.click(
1618
  on_generate,
1619
- inputs=[name, description, system_prompt, enable_research_assistant, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state],
1620
  outputs=[status, download_file, sandbox_state]
1621
  )
1622
 
@@ -1634,8 +1757,7 @@ with gr.Blocks(title="Chat U/I Helper") as demo:
1634
  preview_chatbot = gr.Chatbot(
1635
  value=[],
1636
  label="Preview Chat Interface",
1637
- height=400,
1638
- type="tuples"
1639
  )
1640
  preview_msg = gr.Textbox(
1641
  label="Test your assistant",
@@ -1735,7 +1857,7 @@ with gr.Blocks(title="Chat U/I Helper") as demo:
1735
  # Connect cross-tab functionality after all components are defined
1736
  preview_btn.click(
1737
  on_preview_combined,
1738
- inputs=[name, description, system_prompt, enable_research_assistant, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag],
1739
  outputs=[preview_config_state, preview_status_comp, preview_chat_section_comp, config_display_comp]
1740
  )
1741
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore", message="The 'tuples' format for chatbot messages is deprecated")
3
+
4
  import gradio as gr
5
  import json
6
  import zipfile
 
100
  element.decompose()
101
 
102
  # Extract main content preferentially
103
+ main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=lambda x: x and 'content' in x.lower() if x else False) or soup
104
  text = main_content.get_text()
105
 
106
  # Enhanced text cleaning
 
722
 
723
  return readme_content
724
 
725
+ def create_requirements(enable_vector_rag=False, enable_code_execution=False):
726
  """Generate requirements.txt"""
727
  base_requirements = "gradio>=5.35.0\nrequests>=2.32.3\nbeautifulsoup4>=4.12.3"
728
 
729
  if enable_vector_rag:
730
  base_requirements += "\nfaiss-cpu==1.7.4\nnumpy==1.24.3"
731
 
732
+ if enable_code_execution:
733
+ base_requirements += "\ngradio_client>=0.15.0"
734
+
735
  return base_requirements
736
 
737
+ def generate_zip(name, description, system_prompt, model, api_key_var, temperature, max_tokens, examples_text, access_code="", enable_dynamic_urls=False, url1="", url2="", url3="", url4="", enable_vector_rag=False, rag_data=None, enable_code_execution=False):
738
  """Generate deployable zip file"""
739
 
740
  # Process examples
 
779
  readme_config = config.copy()
780
  readme_config['access_code'] = access_code or ""
781
  readme_content = create_readme(readme_config)
782
+ requirements_content = create_requirements(enable_vector_rag, enable_code_execution)
783
 
784
  # Create zip file with clean naming
785
  filename = f"{name.lower().replace(' ', '_').replace('-', '_')}.zip"
 
895
 
896
  return preview_text, preview_html
897
 
898
+ def on_preview_combined(name, description, system_prompt, enable_research_assistant, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag, enable_code_execution):
899
  """Generate configuration and return preview updates"""
900
  if not name or not name.strip():
901
  return (
 
927
  'max_tokens': max_tokens,
928
  'enable_dynamic_urls': enable_dynamic_urls,
929
  'enable_vector_rag': enable_vector_rag,
930
+ 'enable_code_execution': enable_code_execution,
931
  'examples_text': examples_text,
932
  'preview_ready': True
933
  }
 
1078
  if dynamic_context_parts:
1079
  dynamic_context = "\n".join(dynamic_context_parts)
1080
 
1081
+ # Check for code execution request if enabled
1082
+ code_execution_result = ""
1083
+ if config_data.get('enable_code_execution'):
1084
+ # Simple pattern to detect code execution requests
1085
+ code_patterns = [
1086
+ r'```python\n(.*?)\n```',
1087
+ r'```\n(.*?)\n```',
1088
+ r'from\s+\w+\s+import|import\s+\w+',
1089
+ r'def\s+\w+\s*\(',
1090
+ r'print\s*\(',
1091
+ r'for\s+\w+\s+in\s+',
1092
+ r'if\s+.*:'
1093
+ ]
1094
+
1095
+ for pattern in code_patterns:
1096
+ if re.search(pattern, message, re.DOTALL | re.IGNORECASE):
1097
+ # Extract code from code blocks
1098
+ code_match = re.search(r'```(?:python)?\n(.*?)\n```', message, re.DOTALL)
1099
+ if code_match:
1100
+ code_to_execute = code_match.group(1)
1101
+ execution_result = execute_python_code(code_to_execute, "Code execution requested")
1102
+ code_execution_result = f"\n\n{execution_result}\n\n"
1103
+ break
1104
+
1105
  # Build enhanced system prompt with all contexts
1106
+ enhanced_system_prompt = config_data.get('system_prompt', '') + grounding_context + rag_context + dynamic_context + code_execution_result
1107
 
1108
  # Build messages array for the API
1109
  messages = [{"role": "system", "content": enhanced_system_prompt}]
 
1126
  "model": config_data.get('model', 'google/gemini-2.0-flash-001'),
1127
  "messages": messages,
1128
  "temperature": config_data.get('temperature', 0.7),
1129
+ "max_tokens": config_data.get('max_tokens', 500),
1130
+ "tools": None # Explicitly disable tool/function calling
1131
  }
1132
 
1133
  # Make API request to OpenRouter
 
1193
 
1194
  return gr.update(value=temp_file, visible=True)
1195
 
1196
+ def on_generate(name, description, system_prompt, enable_research_assistant, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state, enable_code_execution):
1197
  if not name or not name.strip():
1198
  return gr.update(value="Error: Please provide a Space Title", visible=True), gr.update(visible=False)
1199
 
 
1210
 
1211
  final_system_prompt = system_prompt.strip()
1212
 
1213
+ filename = generate_zip(name, description, final_system_prompt, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_data, enable_code_execution)
1214
 
1215
  success_msg = f"""**Deployment package ready!**
1216
 
 
1422
  return (gr.update(), gr.update(), gr.update(), gr.update(), count)
1423
 
1424
 
1425
+ def toggle_code_execution(enable_code):
1426
+ """Toggle visibility of code execution space field"""
1427
+ return gr.update(visible=enable_code)
1428
+
1429
+ def toggle_web_search(enable_search):
1430
+ """Toggle visibility of web search space field"""
1431
+ return gr.update(visible=enable_search)
1432
+
1433
+ def perform_web_search(query, description="Web search"):
1434
+ """Perform web search using HuggingFace Space"""
1435
+ try:
1436
+ from gradio_client import Client
1437
+
1438
+ # Try to connect to a web search space (you can change this to any search space)
1439
+ client = Client("huggingface-projects/web-search")
1440
+
1441
+ # Submit the search query
1442
+ result = client.predict(
1443
+ query,
1444
+ api_name="/predict"
1445
+ )
1446
+
1447
+ return f"**{description}**\n\nQuery: {query}\n\n**Search Results:**\n{result}"
1448
+
1449
+ except ImportError:
1450
+ return f"**Web Search Error:** gradio_client not installed. Install with: `pip install gradio_client`"
1451
+ except Exception as e:
1452
+ # Fallback to simple URL extraction and fetching
1453
+ urls = extract_urls_from_text(query)
1454
+ if urls:
1455
+ results = []
1456
+ for url in urls[:2]: # Limit to 2 URLs for fallback
1457
+ content = enhanced_fetch_url_content(url)
1458
+ results.append(f"Content from {url}:\n{content[:500]}...")
1459
+ return f"**Web Search Fallback:** {description}\n\n" + "\n\n".join(results)
1460
+ return f"**Web Search Error:** {str(e)}\n\nQuery: {query}"
1461
+
1462
+ def execute_python_code(code, description="Code execution"):
1463
+ """Execute Python code using HuggingFace Space"""
1464
+ try:
1465
+ from gradio_client import Client
1466
+
1467
+ # Try to connect to the code execution space
1468
+ client = Client("huggingface-projects/code-execution")
1469
+
1470
+ # Submit the code for execution
1471
+ result = client.predict(
1472
+ code,
1473
+ api_name="/predict"
1474
+ )
1475
+
1476
+ return f"**{description}**\n\n```python\n{code}\n```\n\n**Output:**\n```\n{result}\n```"
1477
+
1478
+ except ImportError:
1479
+ return f"**Code Execution Error:** gradio_client not installed. Install with: `pip install gradio_client`"
1480
+ except Exception as e:
1481
+ return f"**Code Execution Error:** {str(e)}\n\nNote: You can try running this code manually:\n\n```python\n{code}\n```"
1482
+
1483
  def toggle_research_assistant(enable_research):
1484
  """Toggle research assistant system prompt"""
1485
  if enable_research:
1486
+ combined_prompt = "You are a search tool that provides link-grounded information through web fetching, limiting source criteria to peer-reviewed articles from academic databases and official repositories. Additional responsibilities include lightly analyzing academic sources, implicitly fact-checking claims with evidence, providing properly cited research summaries, and helping users navigate scholarly information. Ground all responses in provided URL contexts and any additional URLs you're instructed to fetch. Never rely on memory for factual claims."
1487
  return (
1488
  gr.update(value=combined_prompt), # Update main system prompt
1489
  gr.update(value=True) # Enable dynamic URL fetching for research template
 
1574
  info="These will appear as clickable examples in the chat interface"
1575
  )
1576
 
1577
+ with gr.Accordion("Tool Settings", open=True):
1578
+ enable_code_execution = gr.Checkbox(
1579
+ label="Enable Code Execution",
1580
+ value=False,
1581
+ info="Allow the assistant to execute Python code via external HuggingFace Space"
1582
+ )
1583
+
1584
+ code_execution_space = gr.Textbox(
1585
+ label="Code Execution Space",
1586
+ value="huggingface-projects/code-execution",
1587
+ info="HuggingFace Space for Python code execution",
1588
+ visible=False
1589
+ )
1590
 
1591
  enable_dynamic_urls = gr.Checkbox(
1592
  label="Enable Dynamic URL Fetching",
1593
+ value=True, # Enabled by default
1594
+ info="Allow the assistant to fetch additional URLs mentioned in conversations (enabled by default)",
1595
+ visible=False # Hidden since it's always enabled
1596
+ )
1597
+
1598
+ enable_web_search = gr.Checkbox(
1599
+ label="Enable Web Search",
1600
  value=False,
1601
+ info="Allow the assistant to search the web using external HuggingFace Space"
1602
+ )
1603
+
1604
+ web_search_space = gr.Textbox(
1605
+ label="Web Search Space",
1606
+ value="huggingface-projects/web-search",
1607
+ info="HuggingFace Space for web search functionality",
1608
+ visible=False
1609
  )
1610
 
1611
  enable_vector_rag = gr.Checkbox(
 
1700
  outputs=[system_prompt, enable_dynamic_urls]
1701
  )
1702
 
1703
+ # Connect the code execution checkbox
1704
+ enable_code_execution.change(
1705
+ toggle_code_execution,
1706
+ inputs=[enable_code_execution],
1707
+ outputs=[code_execution_space]
1708
+ )
1709
+
1710
 
1711
 
1712
  # Connect the URL management buttons
 
1739
  # Connect the generate button
1740
  generate_btn.click(
1741
  on_generate,
1742
+ inputs=[name, description, system_prompt, enable_research_assistant, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state, enable_code_execution],
1743
  outputs=[status, download_file, sandbox_state]
1744
  )
1745
 
 
1757
  preview_chatbot = gr.Chatbot(
1758
  value=[],
1759
  label="Preview Chat Interface",
1760
+ height=400
 
1761
  )
1762
  preview_msg = gr.Textbox(
1763
  label="Test your assistant",
 
1857
  # Connect cross-tab functionality after all components are defined
1858
  preview_btn.click(
1859
  on_preview_combined,
1860
+ inputs=[name, description, system_prompt, enable_research_assistant, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag, enable_code_execution],
1861
  outputs=[preview_config_state, preview_status_comp, preview_chat_section_comp, config_display_comp]
1862
  )
1863