Spaces:
Running
Running
Fix BeautifulSoup lambda type error in content extraction
Browse files
app.py
CHANGED
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import json
|
3 |
import zipfile
|
@@ -97,7 +100,7 @@ def enhanced_fetch_url_content(url, enable_search_validation=False):
|
|
97 |
element.decompose()
|
98 |
|
99 |
# Extract main content preferentially
|
100 |
-
main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=lambda x: x and 'content' in x.lower()) or soup
|
101 |
text = main_content.get_text()
|
102 |
|
103 |
# Enhanced text cleaning
|
@@ -719,16 +722,19 @@ Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} with Chat U/I Helper
|
|
719 |
|
720 |
return readme_content
|
721 |
|
722 |
-
def create_requirements(enable_vector_rag=False):
|
723 |
"""Generate requirements.txt"""
|
724 |
base_requirements = "gradio>=5.35.0\nrequests>=2.32.3\nbeautifulsoup4>=4.12.3"
|
725 |
|
726 |
if enable_vector_rag:
|
727 |
base_requirements += "\nfaiss-cpu==1.7.4\nnumpy==1.24.3"
|
728 |
|
|
|
|
|
|
|
729 |
return base_requirements
|
730 |
|
731 |
-
def generate_zip(name, description, system_prompt, model, api_key_var, temperature, max_tokens, examples_text, access_code="", enable_dynamic_urls=False, url1="", url2="", url3="", url4="", enable_vector_rag=False, rag_data=None):
|
732 |
"""Generate deployable zip file"""
|
733 |
|
734 |
# Process examples
|
@@ -773,7 +779,7 @@ def generate_zip(name, description, system_prompt, model, api_key_var, temperatu
|
|
773 |
readme_config = config.copy()
|
774 |
readme_config['access_code'] = access_code or ""
|
775 |
readme_content = create_readme(readme_config)
|
776 |
-
requirements_content = create_requirements(enable_vector_rag)
|
777 |
|
778 |
# Create zip file with clean naming
|
779 |
filename = f"{name.lower().replace(' ', '_').replace('-', '_')}.zip"
|
@@ -889,7 +895,7 @@ def update_sandbox_preview(config_data):
|
|
889 |
|
890 |
return preview_text, preview_html
|
891 |
|
892 |
-
def on_preview_combined(name, description, system_prompt, enable_research_assistant, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag):
|
893 |
"""Generate configuration and return preview updates"""
|
894 |
if not name or not name.strip():
|
895 |
return (
|
@@ -921,6 +927,7 @@ def on_preview_combined(name, description, system_prompt, enable_research_assist
|
|
921 |
'max_tokens': max_tokens,
|
922 |
'enable_dynamic_urls': enable_dynamic_urls,
|
923 |
'enable_vector_rag': enable_vector_rag,
|
|
|
924 |
'examples_text': examples_text,
|
925 |
'preview_ready': True
|
926 |
}
|
@@ -1071,8 +1078,32 @@ def preview_chat_response(message, history, config_data, url1="", url2="", url3=
|
|
1071 |
if dynamic_context_parts:
|
1072 |
dynamic_context = "\n".join(dynamic_context_parts)
|
1073 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1074 |
# Build enhanced system prompt with all contexts
|
1075 |
-
enhanced_system_prompt = config_data.get('system_prompt', '') + grounding_context + rag_context + dynamic_context
|
1076 |
|
1077 |
# Build messages array for the API
|
1078 |
messages = [{"role": "system", "content": enhanced_system_prompt}]
|
@@ -1095,7 +1126,8 @@ def preview_chat_response(message, history, config_data, url1="", url2="", url3=
|
|
1095 |
"model": config_data.get('model', 'google/gemini-2.0-flash-001'),
|
1096 |
"messages": messages,
|
1097 |
"temperature": config_data.get('temperature', 0.7),
|
1098 |
-
"max_tokens": config_data.get('max_tokens', 500)
|
|
|
1099 |
}
|
1100 |
|
1101 |
# Make API request to OpenRouter
|
@@ -1161,7 +1193,7 @@ def export_preview_conversation(history):
|
|
1161 |
|
1162 |
return gr.update(value=temp_file, visible=True)
|
1163 |
|
1164 |
-
def on_generate(name, description, system_prompt, enable_research_assistant, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state):
|
1165 |
if not name or not name.strip():
|
1166 |
return gr.update(value="Error: Please provide a Space Title", visible=True), gr.update(visible=False)
|
1167 |
|
@@ -1178,7 +1210,7 @@ def on_generate(name, description, system_prompt, enable_research_assistant, mod
|
|
1178 |
|
1179 |
final_system_prompt = system_prompt.strip()
|
1180 |
|
1181 |
-
filename = generate_zip(name, description, final_system_prompt, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_data)
|
1182 |
|
1183 |
success_msg = f"""**Deployment package ready!**
|
1184 |
|
@@ -1390,10 +1422,68 @@ def remove_chat_urls(count):
|
|
1390 |
return (gr.update(), gr.update(), gr.update(), gr.update(), count)
|
1391 |
|
1392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1393 |
def toggle_research_assistant(enable_research):
|
1394 |
"""Toggle research assistant system prompt"""
|
1395 |
if enable_research:
|
1396 |
-
combined_prompt = "You are a search tool that provides link-grounded information through web fetching, limiting source criteria to
|
1397 |
return (
|
1398 |
gr.update(value=combined_prompt), # Update main system prompt
|
1399 |
gr.update(value=True) # Enable dynamic URL fetching for research template
|
@@ -1484,12 +1574,38 @@ with gr.Blocks(title="Chat U/I Helper") as demo:
|
|
1484 |
info="These will appear as clickable examples in the chat interface"
|
1485 |
)
|
1486 |
|
1487 |
-
with gr.Accordion("Tool Settings", open=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1488 |
|
1489 |
enable_dynamic_urls = gr.Checkbox(
|
1490 |
label="Enable Dynamic URL Fetching",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1491 |
value=False,
|
1492 |
-
info="Allow the assistant to
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1493 |
)
|
1494 |
|
1495 |
enable_vector_rag = gr.Checkbox(
|
@@ -1584,6 +1700,13 @@ with gr.Blocks(title="Chat U/I Helper") as demo:
|
|
1584 |
outputs=[system_prompt, enable_dynamic_urls]
|
1585 |
)
|
1586 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1587 |
|
1588 |
|
1589 |
# Connect the URL management buttons
|
@@ -1616,7 +1739,7 @@ with gr.Blocks(title="Chat U/I Helper") as demo:
|
|
1616 |
# Connect the generate button
|
1617 |
generate_btn.click(
|
1618 |
on_generate,
|
1619 |
-
inputs=[name, description, system_prompt, enable_research_assistant, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state],
|
1620 |
outputs=[status, download_file, sandbox_state]
|
1621 |
)
|
1622 |
|
@@ -1634,8 +1757,7 @@ with gr.Blocks(title="Chat U/I Helper") as demo:
|
|
1634 |
preview_chatbot = gr.Chatbot(
|
1635 |
value=[],
|
1636 |
label="Preview Chat Interface",
|
1637 |
-
height=400
|
1638 |
-
type="tuples"
|
1639 |
)
|
1640 |
preview_msg = gr.Textbox(
|
1641 |
label="Test your assistant",
|
@@ -1735,7 +1857,7 @@ with gr.Blocks(title="Chat U/I Helper") as demo:
|
|
1735 |
# Connect cross-tab functionality after all components are defined
|
1736 |
preview_btn.click(
|
1737 |
on_preview_combined,
|
1738 |
-
inputs=[name, description, system_prompt, enable_research_assistant, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag],
|
1739 |
outputs=[preview_config_state, preview_status_comp, preview_chat_section_comp, config_display_comp]
|
1740 |
)
|
1741 |
|
|
|
1 |
+
import warnings
|
2 |
+
warnings.filterwarnings("ignore", message="The 'tuples' format for chatbot messages is deprecated")
|
3 |
+
|
4 |
import gradio as gr
|
5 |
import json
|
6 |
import zipfile
|
|
|
100 |
element.decompose()
|
101 |
|
102 |
# Extract main content preferentially
|
103 |
+
main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=lambda x: x and 'content' in x.lower() if x else False) or soup
|
104 |
text = main_content.get_text()
|
105 |
|
106 |
# Enhanced text cleaning
|
|
|
722 |
|
723 |
return readme_content
|
724 |
|
725 |
+
def create_requirements(enable_vector_rag=False, enable_code_execution=False):
|
726 |
"""Generate requirements.txt"""
|
727 |
base_requirements = "gradio>=5.35.0\nrequests>=2.32.3\nbeautifulsoup4>=4.12.3"
|
728 |
|
729 |
if enable_vector_rag:
|
730 |
base_requirements += "\nfaiss-cpu==1.7.4\nnumpy==1.24.3"
|
731 |
|
732 |
+
if enable_code_execution:
|
733 |
+
base_requirements += "\ngradio_client>=0.15.0"
|
734 |
+
|
735 |
return base_requirements
|
736 |
|
737 |
+
def generate_zip(name, description, system_prompt, model, api_key_var, temperature, max_tokens, examples_text, access_code="", enable_dynamic_urls=False, url1="", url2="", url3="", url4="", enable_vector_rag=False, rag_data=None, enable_code_execution=False):
|
738 |
"""Generate deployable zip file"""
|
739 |
|
740 |
# Process examples
|
|
|
779 |
readme_config = config.copy()
|
780 |
readme_config['access_code'] = access_code or ""
|
781 |
readme_content = create_readme(readme_config)
|
782 |
+
requirements_content = create_requirements(enable_vector_rag, enable_code_execution)
|
783 |
|
784 |
# Create zip file with clean naming
|
785 |
filename = f"{name.lower().replace(' ', '_').replace('-', '_')}.zip"
|
|
|
895 |
|
896 |
return preview_text, preview_html
|
897 |
|
898 |
+
def on_preview_combined(name, description, system_prompt, enable_research_assistant, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag, enable_code_execution):
|
899 |
"""Generate configuration and return preview updates"""
|
900 |
if not name or not name.strip():
|
901 |
return (
|
|
|
927 |
'max_tokens': max_tokens,
|
928 |
'enable_dynamic_urls': enable_dynamic_urls,
|
929 |
'enable_vector_rag': enable_vector_rag,
|
930 |
+
'enable_code_execution': enable_code_execution,
|
931 |
'examples_text': examples_text,
|
932 |
'preview_ready': True
|
933 |
}
|
|
|
1078 |
if dynamic_context_parts:
|
1079 |
dynamic_context = "\n".join(dynamic_context_parts)
|
1080 |
|
1081 |
+
# Check for code execution request if enabled
|
1082 |
+
code_execution_result = ""
|
1083 |
+
if config_data.get('enable_code_execution'):
|
1084 |
+
# Simple pattern to detect code execution requests
|
1085 |
+
code_patterns = [
|
1086 |
+
r'```python\n(.*?)\n```',
|
1087 |
+
r'```\n(.*?)\n```',
|
1088 |
+
r'from\s+\w+\s+import|import\s+\w+',
|
1089 |
+
r'def\s+\w+\s*\(',
|
1090 |
+
r'print\s*\(',
|
1091 |
+
r'for\s+\w+\s+in\s+',
|
1092 |
+
r'if\s+.*:'
|
1093 |
+
]
|
1094 |
+
|
1095 |
+
for pattern in code_patterns:
|
1096 |
+
if re.search(pattern, message, re.DOTALL | re.IGNORECASE):
|
1097 |
+
# Extract code from code blocks
|
1098 |
+
code_match = re.search(r'```(?:python)?\n(.*?)\n```', message, re.DOTALL)
|
1099 |
+
if code_match:
|
1100 |
+
code_to_execute = code_match.group(1)
|
1101 |
+
execution_result = execute_python_code(code_to_execute, "Code execution requested")
|
1102 |
+
code_execution_result = f"\n\n{execution_result}\n\n"
|
1103 |
+
break
|
1104 |
+
|
1105 |
# Build enhanced system prompt with all contexts
|
1106 |
+
enhanced_system_prompt = config_data.get('system_prompt', '') + grounding_context + rag_context + dynamic_context + code_execution_result
|
1107 |
|
1108 |
# Build messages array for the API
|
1109 |
messages = [{"role": "system", "content": enhanced_system_prompt}]
|
|
|
1126 |
"model": config_data.get('model', 'google/gemini-2.0-flash-001'),
|
1127 |
"messages": messages,
|
1128 |
"temperature": config_data.get('temperature', 0.7),
|
1129 |
+
"max_tokens": config_data.get('max_tokens', 500),
|
1130 |
+
"tools": None # Explicitly disable tool/function calling
|
1131 |
}
|
1132 |
|
1133 |
# Make API request to OpenRouter
|
|
|
1193 |
|
1194 |
return gr.update(value=temp_file, visible=True)
|
1195 |
|
1196 |
+
def on_generate(name, description, system_prompt, enable_research_assistant, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state, enable_code_execution):
|
1197 |
if not name or not name.strip():
|
1198 |
return gr.update(value="Error: Please provide a Space Title", visible=True), gr.update(visible=False)
|
1199 |
|
|
|
1210 |
|
1211 |
final_system_prompt = system_prompt.strip()
|
1212 |
|
1213 |
+
filename = generate_zip(name, description, final_system_prompt, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_data, enable_code_execution)
|
1214 |
|
1215 |
success_msg = f"""**Deployment package ready!**
|
1216 |
|
|
|
1422 |
return (gr.update(), gr.update(), gr.update(), gr.update(), count)
|
1423 |
|
1424 |
|
1425 |
+
def toggle_code_execution(enable_code):
|
1426 |
+
"""Toggle visibility of code execution space field"""
|
1427 |
+
return gr.update(visible=enable_code)
|
1428 |
+
|
1429 |
+
def toggle_web_search(enable_search):
|
1430 |
+
"""Toggle visibility of web search space field"""
|
1431 |
+
return gr.update(visible=enable_search)
|
1432 |
+
|
1433 |
+
def perform_web_search(query, description="Web search"):
|
1434 |
+
"""Perform web search using HuggingFace Space"""
|
1435 |
+
try:
|
1436 |
+
from gradio_client import Client
|
1437 |
+
|
1438 |
+
# Try to connect to a web search space (you can change this to any search space)
|
1439 |
+
client = Client("huggingface-projects/web-search")
|
1440 |
+
|
1441 |
+
# Submit the search query
|
1442 |
+
result = client.predict(
|
1443 |
+
query,
|
1444 |
+
api_name="/predict"
|
1445 |
+
)
|
1446 |
+
|
1447 |
+
return f"**{description}**\n\nQuery: {query}\n\n**Search Results:**\n{result}"
|
1448 |
+
|
1449 |
+
except ImportError:
|
1450 |
+
return f"**Web Search Error:** gradio_client not installed. Install with: `pip install gradio_client`"
|
1451 |
+
except Exception as e:
|
1452 |
+
# Fallback to simple URL extraction and fetching
|
1453 |
+
urls = extract_urls_from_text(query)
|
1454 |
+
if urls:
|
1455 |
+
results = []
|
1456 |
+
for url in urls[:2]: # Limit to 2 URLs for fallback
|
1457 |
+
content = enhanced_fetch_url_content(url)
|
1458 |
+
results.append(f"Content from {url}:\n{content[:500]}...")
|
1459 |
+
return f"**Web Search Fallback:** {description}\n\n" + "\n\n".join(results)
|
1460 |
+
return f"**Web Search Error:** {str(e)}\n\nQuery: {query}"
|
1461 |
+
|
1462 |
+
def execute_python_code(code, description="Code execution"):
|
1463 |
+
"""Execute Python code using HuggingFace Space"""
|
1464 |
+
try:
|
1465 |
+
from gradio_client import Client
|
1466 |
+
|
1467 |
+
# Try to connect to the code execution space
|
1468 |
+
client = Client("huggingface-projects/code-execution")
|
1469 |
+
|
1470 |
+
# Submit the code for execution
|
1471 |
+
result = client.predict(
|
1472 |
+
code,
|
1473 |
+
api_name="/predict"
|
1474 |
+
)
|
1475 |
+
|
1476 |
+
return f"**{description}**\n\n```python\n{code}\n```\n\n**Output:**\n```\n{result}\n```"
|
1477 |
+
|
1478 |
+
except ImportError:
|
1479 |
+
return f"**Code Execution Error:** gradio_client not installed. Install with: `pip install gradio_client`"
|
1480 |
+
except Exception as e:
|
1481 |
+
return f"**Code Execution Error:** {str(e)}\n\nNote: You can try running this code manually:\n\n```python\n{code}\n```"
|
1482 |
+
|
1483 |
def toggle_research_assistant(enable_research):
|
1484 |
"""Toggle research assistant system prompt"""
|
1485 |
if enable_research:
|
1486 |
+
combined_prompt = "You are a search tool that provides link-grounded information through web fetching, limiting source criteria to peer-reviewed articles from academic databases and official repositories. Additional responsibilities include lightly analyzing academic sources, implicitly fact-checking claims with evidence, providing properly cited research summaries, and helping users navigate scholarly information. Ground all responses in provided URL contexts and any additional URLs you're instructed to fetch. Never rely on memory for factual claims."
|
1487 |
return (
|
1488 |
gr.update(value=combined_prompt), # Update main system prompt
|
1489 |
gr.update(value=True) # Enable dynamic URL fetching for research template
|
|
|
1574 |
info="These will appear as clickable examples in the chat interface"
|
1575 |
)
|
1576 |
|
1577 |
+
with gr.Accordion("Tool Settings", open=True):
|
1578 |
+
enable_code_execution = gr.Checkbox(
|
1579 |
+
label="Enable Code Execution",
|
1580 |
+
value=False,
|
1581 |
+
info="Allow the assistant to execute Python code via external HuggingFace Space"
|
1582 |
+
)
|
1583 |
+
|
1584 |
+
code_execution_space = gr.Textbox(
|
1585 |
+
label="Code Execution Space",
|
1586 |
+
value="huggingface-projects/code-execution",
|
1587 |
+
info="HuggingFace Space for Python code execution",
|
1588 |
+
visible=False
|
1589 |
+
)
|
1590 |
|
1591 |
enable_dynamic_urls = gr.Checkbox(
|
1592 |
label="Enable Dynamic URL Fetching",
|
1593 |
+
value=True, # Enabled by default
|
1594 |
+
info="Allow the assistant to fetch additional URLs mentioned in conversations (enabled by default)",
|
1595 |
+
visible=False # Hidden since it's always enabled
|
1596 |
+
)
|
1597 |
+
|
1598 |
+
enable_web_search = gr.Checkbox(
|
1599 |
+
label="Enable Web Search",
|
1600 |
value=False,
|
1601 |
+
info="Allow the assistant to search the web using external HuggingFace Space"
|
1602 |
+
)
|
1603 |
+
|
1604 |
+
web_search_space = gr.Textbox(
|
1605 |
+
label="Web Search Space",
|
1606 |
+
value="huggingface-projects/web-search",
|
1607 |
+
info="HuggingFace Space for web search functionality",
|
1608 |
+
visible=False
|
1609 |
)
|
1610 |
|
1611 |
enable_vector_rag = gr.Checkbox(
|
|
|
1700 |
outputs=[system_prompt, enable_dynamic_urls]
|
1701 |
)
|
1702 |
|
1703 |
+
# Connect the code execution checkbox
|
1704 |
+
enable_code_execution.change(
|
1705 |
+
toggle_code_execution,
|
1706 |
+
inputs=[enable_code_execution],
|
1707 |
+
outputs=[code_execution_space]
|
1708 |
+
)
|
1709 |
+
|
1710 |
|
1711 |
|
1712 |
# Connect the URL management buttons
|
|
|
1739 |
# Connect the generate button
|
1740 |
generate_btn.click(
|
1741 |
on_generate,
|
1742 |
+
inputs=[name, description, system_prompt, enable_research_assistant, model, api_key_var, temperature, max_tokens, examples_text, access_code, enable_dynamic_urls, url1, url2, url3, url4, enable_vector_rag, rag_tool_state, enable_code_execution],
|
1743 |
outputs=[status, download_file, sandbox_state]
|
1744 |
)
|
1745 |
|
|
|
1757 |
preview_chatbot = gr.Chatbot(
|
1758 |
value=[],
|
1759 |
label="Preview Chat Interface",
|
1760 |
+
height=400
|
|
|
1761 |
)
|
1762 |
preview_msg = gr.Textbox(
|
1763 |
label="Test your assistant",
|
|
|
1857 |
# Connect cross-tab functionality after all components are defined
|
1858 |
preview_btn.click(
|
1859 |
on_preview_combined,
|
1860 |
+
inputs=[name, description, system_prompt, enable_research_assistant, model, temperature, max_tokens, examples_text, enable_dynamic_urls, enable_vector_rag, enable_code_execution],
|
1861 |
outputs=[preview_config_state, preview_status_comp, preview_chat_section_comp, config_display_comp]
|
1862 |
)
|
1863 |
|