Spaces:
Running
Running
Update app2.py
Browse files
app2.py
CHANGED
@@ -769,8 +769,6 @@ class EnhancedFileProcessor:
|
|
769 |
elif extracted_file_path.suffix.lower() in self.archive_extensions:
|
770 |
logger.info(f"Found nested archive '{member.name}', processing recursively.")
|
771 |
dataset.extend(self._process_archive(extracted_file_path, extract_to))
|
772 |
-
else:
|
773 |
-
logger.debug(f"Skipping unsupported file in archive: '{member.name}'")
|
774 |
else:
|
775 |
logger.warning(f"Could not get file-like object for {member.name} from tar.")
|
776 |
|
@@ -1413,6 +1411,7 @@ def respond_to_chat(
|
|
1413 |
"I'm equipped to filter your data. Try 'find entries where name contains widget'."
|
1414 |
])
|
1415 |
|
|
|
1416 |
except Exception as e:
|
1417 |
logger.error(f"Chatbot runtime error: {e}")
|
1418 |
response = f"An internal error occurred while processing your request: {e}"
|
@@ -1420,6 +1419,12 @@ def respond_to_chat(
|
|
1420 |
# On unexpected error, preserve the current_filtered_df_state rather than clearing or modifying it.
|
1421 |
# new_filtered_df_state = current_filtered_df_state # This line is effectively already done by initialization
|
1422 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1423 |
if not response: # Final safety net for response, if it's somehow still empty
|
1424 |
response = "I'm not sure how to respond to that. Please try rephrasing or ask for help on available commands."
|
1425 |
|
@@ -1646,49 +1651,205 @@ def create_modern_interface():
|
|
1646 |
clear_chat_btn = gr.Button("Clear Chat History")
|
1647 |
|
1648 |
# Event handlers must be defined within the Blocks context
|
1649 |
-
example_btn.click(load_example, inputs=[], outputs=text_input)
|
1650 |
-
clear_btn.click(clear_input, inputs=[], outputs=[url_input, file_input, text_input, chatbot_data])
|
1651 |
|
1652 |
-
|
1653 |
-
|
1654 |
-
|
1655 |
-
|
1656 |
-
|
1657 |
-
|
1658 |
-
|
1659 |
-
|
1660 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1661 |
|
1662 |
-
|
|
|
|
|
|
|
1663 |
|
1664 |
-
|
1665 |
-
|
1666 |
-
|
1667 |
-
|
1668 |
-
).then(
|
1669 |
-
lambda: "",
|
1670 |
-
inputs=None,
|
1671 |
-
outputs=chat_input
|
1672 |
-
)
|
1673 |
|
1674 |
-
|
1675 |
-
|
1676 |
-
inputs=[chat_input, chat_history, chatbot_data, filtered_chatbot_df_state], # Pass filtered_chatbot_df_state here too
|
1677 |
-
outputs=[chatbot, chatbot_data, filtered_chatbot_df_state] # And return it
|
1678 |
-
).then(
|
1679 |
-
lambda: "",
|
1680 |
-
inputs=None,
|
1681 |
-
outputs=chat_input
|
1682 |
-
)
|
1683 |
|
1684 |
-
|
1685 |
-
|
1686 |
-
|
1687 |
-
|
1688 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1689 |
|
1690 |
# --- Download Logic ---
|
1691 |
def download_json_data(data_df: Optional[pd.DataFrame], filename_prefix: str) -> Optional[str]:
|
|
|
1692 |
if data_df is None or data_df.empty:
|
1693 |
logger.info(f"No data provided for download with prefix '{filename_prefix}'.")
|
1694 |
return None
|
@@ -1715,13 +1876,14 @@ def create_modern_interface():
|
|
1715 |
return None
|
1716 |
|
1717 |
def handle_download_full_json(current_chatbot_data_state: Optional[List[Dict]]) -> Optional[str]:
|
1718 |
-
|
|
|
1719 |
if not current_chatbot_data_state:
|
1720 |
logger.info("No full data available to download.")
|
1721 |
return None
|
1722 |
|
1723 |
try:
|
1724 |
-
# Attempt to create a DataFrame from the full data state
|
1725 |
# This uses the same flattening logic as the chatbot
|
1726 |
flat_data = []
|
1727 |
def flatten_item_for_download(d, parent_key='', sep='_'):
|
@@ -1743,8 +1905,10 @@ def create_modern_interface():
|
|
1743 |
for item in current_chatbot_data_state:
|
1744 |
if isinstance(item, dict):
|
1745 |
flat_data.append(flatten_item_for_download(item))
|
|
|
1746 |
elif isinstance(item, (list, str, int, float, bool, type(None))):
|
1747 |
-
|
|
|
1748 |
|
1749 |
if not flat_data:
|
1750 |
logger.info("Full data flattened to empty list. Nothing to download.")
|
@@ -1765,24 +1929,77 @@ def create_modern_interface():
|
|
1765 |
|
1766 |
|
1767 |
def handle_download_filtered_json(current_filtered_df_state: Optional[pd.DataFrame]) -> Optional[str]:
|
1768 |
-
|
|
|
1769 |
if current_filtered_df_state is None or current_filtered_df_state.empty:
|
1770 |
logger.info("No filtered data available to download.")
|
1771 |
return None
|
1772 |
# Pass the DataFrame directly to the generic download function
|
1773 |
return download_json_data(current_filtered_df_state, "filtered_data")
|
1774 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1775 |
download_full_json_btn.click(
|
1776 |
fn=handle_download_full_json,
|
1777 |
-
inputs=[chatbot_data],
|
1778 |
-
outputs=[download_file_output]
|
1779 |
)
|
1780 |
download_filtered_json_btn.click(
|
1781 |
fn=handle_download_filtered_json,
|
1782 |
-
inputs=[filtered_chatbot_df_state],
|
1783 |
-
outputs=[download_file_output]
|
1784 |
)
|
1785 |
|
|
|
1786 |
gr.Markdown("""
|
1787 |
### π Features
|
1788 |
- **Enhanced URL Scraping**: Extracts HTML text, title, meta description, links, and attempts parsing JSON/XML from URLs based on content type. Supports crawling links up to a specified depth. **(Now performs real fetching)**
|
@@ -1818,10 +2035,10 @@ def main():
|
|
1818 |
mimetypes.init()
|
1819 |
interface = create_modern_interface()
|
1820 |
interface.launch(
|
1821 |
-
share=False,
|
1822 |
-
debug=False,
|
1823 |
-
show_error=True,
|
1824 |
-
show_api=False
|
1825 |
)
|
1826 |
except Exception as e:
|
1827 |
logger.error(f"Application startup error: {e}")
|
@@ -1829,4 +2046,5 @@ def main():
|
|
1829 |
raise
|
1830 |
|
1831 |
if __name__ == "__main__":
|
|
|
1832 |
main()
|
|
|
769 |
elif extracted_file_path.suffix.lower() in self.archive_extensions:
|
770 |
logger.info(f"Found nested archive '{member.name}', processing recursively.")
|
771 |
dataset.extend(self._process_archive(extracted_file_path, extract_to))
|
|
|
|
|
772 |
else:
|
773 |
logger.warning(f"Could not get file-like object for {member.name} from tar.")
|
774 |
|
|
|
1411 |
"I'm equipped to filter your data. Try 'find entries where name contains widget'."
|
1412 |
])
|
1413 |
|
1414 |
+
# --- End of main try block ---
|
1415 |
except Exception as e:
|
1416 |
logger.error(f"Chatbot runtime error: {e}")
|
1417 |
response = f"An internal error occurred while processing your request: {e}"
|
|
|
1419 |
# On unexpected error, preserve the current_filtered_df_state rather than clearing or modifying it.
|
1420 |
# new_filtered_df_state = current_filtered_df_state # This line is effectively already done by initialization
|
1421 |
|
1422 |
+
# --- Finally block (optional, but good practice if cleanup is needed) ---
|
1423 |
+
# finally:
|
1424 |
+
# # Any cleanup code can go here
|
1425 |
+
# pass
|
1426 |
+
|
1427 |
+
|
1428 |
if not response: # Final safety net for response, if it's somehow still empty
|
1429 |
response = "I'm not sure how to respond to that. Please try rephrasing or ask for help on available commands."
|
1430 |
|
|
|
1651 |
clear_chat_btn = gr.Button("Clear Chat History")
|
1652 |
|
1653 |
# Event handlers must be defined within the Blocks context
|
|
|
|
|
1654 |
|
1655 |
+
def load_example():
|
1656 |
+
example = {
|
1657 |
+
"type": "product_catalog",
|
1658 |
+
"items": [
|
1659 |
+
{
|
1660 |
+
"id": "123",
|
1661 |
+
"name": "Premium Widget",
|
1662 |
+
"description": "High-quality widget with advanced features",
|
1663 |
+
"price": 299.99,
|
1664 |
+
"category": "electronics",
|
1665 |
+
"tags": ["premium", "featured", "new"]
|
1666 |
+
},
|
1667 |
+
{
|
1668 |
+
"id": "456",
|
1669 |
+
"name": "Basic Widget",
|
1670 |
+
"description": "Reliable widget for everyday use",
|
1671 |
+
"price": 149.99,
|
1672 |
+
"category": "electronics",
|
1673 |
+
"tags": ["basic", "popular"]
|
1674 |
+
}
|
1675 |
+
],
|
1676 |
+
"metadata": {
|
1677 |
+
"timestamp": datetime.now().isoformat(),
|
1678 |
+
"version": "2.0",
|
1679 |
+
"source": "example"
|
1680 |
+
}
|
1681 |
+
}
|
1682 |
+
return json.dumps(example, indent=2)
|
1683 |
+
|
1684 |
+
def clear_input():
|
1685 |
+
# Clear all input fields and the chatbot data state
|
1686 |
+
return "", None, "", None
|
1687 |
+
|
1688 |
+
def update_viewport(paths, enabled_states):
|
1689 |
+
if not paths:
|
1690 |
+
return "<p>No QR codes generated yet.</p>"
|
1691 |
+
|
1692 |
+
num_qr_codes = len(paths)
|
1693 |
+
# Determine grid columns based on the number of QRs, aiming for a roughly square layout
|
1694 |
+
cols = math.ceil(math.sqrt(num_qr_codes))
|
1695 |
+
cols = max(1, min(cols, 6)) # Clamp columns between 1 and 6
|
1696 |
+
|
1697 |
+
viewport_html = f'<div class="viewport-container" style="grid-template-columns: repeat({cols}, 1fr);">'
|
1698 |
+
|
1699 |
+
# Ensure enabled_states is a list of indices if it's None or doesn't match current paths
|
1700 |
+
if enabled_states is None or len(enabled_states) != num_qr_codes:
|
1701 |
+
enabled_states = list(range(num_qr_codes))
|
1702 |
+
|
1703 |
+
for i, path in enumerate(paths):
|
1704 |
+
is_enabled = i in enabled_states
|
1705 |
+
border = "border: 2px solid green;" if is_enabled else "border: 2px solid lightgray;"
|
1706 |
+
opacity = "opacity: 1.0;" if is_enabled else "opacity: 0.5;"
|
1707 |
+
# Use /file= prefix for Gradio to serve local files
|
1708 |
+
viewport_html += f'<div class="viewport-item" id="qr_item_{i}">'
|
1709 |
+
viewport_html += f'<img src="/file={path}" style="{border} {opacity}" alt="QR Code {i+1}">'
|
1710 |
+
# Add checkbox with data-index for JS to identify which QR it controls
|
1711 |
+
viewport_html += f'<label><input type="checkbox" data-index="{i}" {"checked" if is_enabled else ""} onchange="updateEnabledStates(this)"> Enable</label>'
|
1712 |
+
viewport_html += '</div>'
|
1713 |
+
viewport_html += '</div>'
|
1714 |
+
|
1715 |
+
return viewport_html
|
1716 |
+
|
1717 |
+
def on_qr_generation(qr_paths_list):
|
1718 |
+
"""Handler to initialize enabled_qr_codes state after QR generation."""
|
1719 |
+
if qr_paths_list is None:
|
1720 |
+
num_qrs = 0
|
1721 |
+
else:
|
1722 |
+
num_qrs = len(qr_paths_list)
|
1723 |
|
1724 |
+
# Initially enable all generated QR codes
|
1725 |
+
initial_enabled_states = list(range(num_qrs))
|
1726 |
+
# Return the paths list and the initial enabled states
|
1727 |
+
return qr_paths_list, initial_enabled_states
|
1728 |
|
1729 |
+
def process_inputs(urls, files, text, combine, crawl_depth, generate_qr_enabled):
|
1730 |
+
"""Process all inputs and generate QR codes based on toggle"""
|
1731 |
+
results = []
|
1732 |
+
processing_status_messages = []
|
|
|
|
|
|
|
|
|
|
|
1733 |
|
1734 |
+
url_processor = EnhancedURLProcessor()
|
1735 |
+
file_processor = EnhancedFileProcessor()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1736 |
|
1737 |
+
try:
|
1738 |
+
if text and text.strip():
|
1739 |
+
try:
|
1740 |
+
json_data = json.loads(text)
|
1741 |
+
results.append({
|
1742 |
+
'source': 'json_input',
|
1743 |
+
'extracted_data': json_data,
|
1744 |
+
'timestamp': datetime.now().isoformat(),
|
1745 |
+
'processing_notes': ['Parsed from direct JSON input.']
|
1746 |
+
})
|
1747 |
+
processing_status_messages.append("β
Successfully parsed direct JSON input.")
|
1748 |
+
except json.JSONDecodeError as e:
|
1749 |
+
processing_status_messages.append(f"β Invalid JSON format in text input: {str(e)}")
|
1750 |
+
logger.error(f"Invalid JSON format in text input: {e}")
|
1751 |
+
except Exception as e:
|
1752 |
+
processing_status_messages.append(f"β Error processing direct JSON input: {str(e)}")
|
1753 |
+
logger.error(f"Error processing direct JSON input: {e}")
|
1754 |
+
|
1755 |
+
if urls and urls.strip():
|
1756 |
+
url_list = re.split(r'[,\n]', urls)
|
1757 |
+
url_list = [url.strip() for url in url_list if url.strip()]
|
1758 |
+
for url in url_list:
|
1759 |
+
processing_status_messages.append(f"π Processing URL: {url} with crawl depth {crawl_depth}...")
|
1760 |
+
# Call fetch_content_with_depth which handles recursion
|
1761 |
+
content_result = url_processor.fetch_content_with_depth(url, max_steps=crawl_depth)
|
1762 |
+
|
1763 |
+
# The result from fetch_content_with_depth is already structured
|
1764 |
+
# It includes the main fetch_result and linked_extractions
|
1765 |
+
if content_result: # Check if a result dictionary was returned
|
1766 |
+
results.append(content_result)
|
1767 |
+
# Provide status based on the fetch_result within the recursive structure
|
1768 |
+
main_fetch_status = content_result.get('fetch_result', {}).get('status_code')
|
1769 |
+
if main_fetch_status is not None and 200 <= main_fetch_status < 300:
|
1770 |
+
processing_status_messages.append(f"β
Processed URL: {url} (Level 0, Status: {main_fetch_status})")
|
1771 |
+
if content_result.get('processing_notes'):
|
1772 |
+
processing_status_messages.append(f" Notes for {url}: {'; '.join(content_result['processing_notes'])}")
|
1773 |
+
|
1774 |
+
# Count successfully processed linked pages
|
1775 |
+
def count_successful_fetches(crawl_result):
|
1776 |
+
count = 0
|
1777 |
+
if crawl_result and crawl_result.get('fetch_result') is not None:
|
1778 |
+
status = crawl_result['fetch_result'].get('status_code')
|
1779 |
+
if status is not None and 200 <= status < 300:
|
1780 |
+
count += 1
|
1781 |
+
for linked_result in crawl_result.get('linked_extractions', []):
|
1782 |
+
count += count_successful_fetches(linked_result)
|
1783 |
+
return count
|
1784 |
+
|
1785 |
+
total_attempted_links = len(content_result.get('linked_extractions', []))
|
1786 |
+
total_successful_linked = count_successful_fetches({'linked_extractions': content_result.get('linked_extractions', [])}) # Wrap to match expected structure
|
1787 |
+
|
1788 |
+
if total_attempted_links > 0:
|
1789 |
+
processing_status_messages.append(f" Processed {total_successful_linked}/{total_attempted_links} linked pages up to depth {crawl_depth}.")
|
1790 |
+
|
1791 |
+
else:
|
1792 |
+
processing_status_messages.append(f"β Failed to fetch or process URL: {url} (Status: {main_fetch_status})")
|
1793 |
+
if content_result.get('processing_notes'):
|
1794 |
+
processing_status_messages.append(f" Notes for {url}: {'; '.join(content_result['processing_notes'])}")
|
1795 |
+
else:
|
1796 |
+
processing_status_messages.append(f"β Failed to process URL: {url} (No result returned)")
|
1797 |
+
|
1798 |
+
|
1799 |
+
if files:
|
1800 |
+
for file in files:
|
1801 |
+
processing_status_messages.append(f"π Processing file: {file.name}...")
|
1802 |
+
file_results = file_processor.process_file(file)
|
1803 |
+
if file_results:
|
1804 |
+
results.extend(file_results)
|
1805 |
+
processing_status_messages.append(f"β
Processed file: {file.name}")
|
1806 |
+
for res in file_results:
|
1807 |
+
if res.get('processing_notes'):
|
1808 |
+
processing_status_messages.append(f" Notes for {res.get('filename', 'item')}: {'; '.join(res['processing_notes'])}")
|
1809 |
+
else:
|
1810 |
+
processing_status_messages.append(f"β Failed to process file: {file.name}")
|
1811 |
+
# Add a default note if process_file returned empty list without notes
|
1812 |
+
if not file_results and file and hasattr(file, 'name'):
|
1813 |
+
processing_status_messages.append(f" No results returned for file: {file.name}")
|
1814 |
+
|
1815 |
+
|
1816 |
+
qr_paths = []
|
1817 |
+
final_json_output = None
|
1818 |
+
|
1819 |
+
if results:
|
1820 |
+
final_json_output = results # Assign processed data regardless of QR generation
|
1821 |
+
if generate_qr_enabled:
|
1822 |
+
processing_status_messages.append("βοΈ Generating QR codes as requested...")
|
1823 |
+
# generate_qr_codes expects a List[Dict]
|
1824 |
+
qr_paths = generate_qr_codes(results, combine)
|
1825 |
+
if qr_paths:
|
1826 |
+
processing_status_messages.append(f"β
Successfully generated {len(qr_paths)} QR codes.")
|
1827 |
+
else:
|
1828 |
+
processing_status_messages.append("β Failed to generate QR codes (empty result or error). Check logs.")
|
1829 |
+
else:
|
1830 |
+
processing_status_messages.append("βοΈ QR code generation was disabled. Processed data is available.")
|
1831 |
+
qr_paths = [] # Ensure it's empty
|
1832 |
+
else:
|
1833 |
+
processing_status_messages.append("β οΈ No valid content collected from inputs.")
|
1834 |
+
final_json_output = [] # Ensure output_json is cleared if no results
|
1835 |
+
|
1836 |
+
except Exception as e:
|
1837 |
+
logger.error(f"Overall processing error in process_inputs: {e}")
|
1838 |
+
processing_status_messages.append(f"β An unexpected error occurred during processing: {str(e)}")
|
1839 |
+
final_json_output = [] # Clear output on unexpected error
|
1840 |
+
qr_paths = [] # Clear qrs on unexpected error
|
1841 |
+
|
1842 |
+
# Return the processed data, QR paths, status messages, and update chatbot_data state
|
1843 |
+
return (
|
1844 |
+
final_json_output,
|
1845 |
+
[str(path) for path in qr_paths], # Return paths as strings for Gradio Gallery
|
1846 |
+
"\n".join(processing_status_messages),
|
1847 |
+
final_json_output # Update chatbot_data state
|
1848 |
+
)
|
1849 |
|
1850 |
# --- Download Logic ---
|
1851 |
def download_json_data(data_df: Optional[pd.DataFrame], filename_prefix: str) -> Optional[str]:
|
1852 |
+
"""Helper function to convert DataFrame to JSON file for download."""
|
1853 |
if data_df is None or data_df.empty:
|
1854 |
logger.info(f"No data provided for download with prefix '{filename_prefix}'.")
|
1855 |
return None
|
|
|
1876 |
return None
|
1877 |
|
1878 |
def handle_download_full_json(current_chatbot_data_state: Optional[List[Dict]]) -> Optional[str]:
|
1879 |
+
"""Handler for the 'Download Full JSON' button."""
|
1880 |
+
# This function receives the full processed data (List[Dict]) from the chatbot_data state
|
1881 |
if not current_chatbot_data_state:
|
1882 |
logger.info("No full data available to download.")
|
1883 |
return None
|
1884 |
|
1885 |
try:
|
1886 |
+
# Attempt to create a DataFrame from the full data state for consistent output structure
|
1887 |
# This uses the same flattening logic as the chatbot
|
1888 |
flat_data = []
|
1889 |
def flatten_item_for_download(d, parent_key='', sep='_'):
|
|
|
1905 |
for item in current_chatbot_data_state:
|
1906 |
if isinstance(item, dict):
|
1907 |
flat_data.append(flatten_item_for_download(item))
|
1908 |
+
# Handle cases where top-level items might not be dicts, wrap them
|
1909 |
elif isinstance(item, (list, str, int, float, bool, type(None))):
|
1910 |
+
flat_data.append({'item_value': item})
|
1911 |
+
|
1912 |
|
1913 |
if not flat_data:
|
1914 |
logger.info("Full data flattened to empty list. Nothing to download.")
|
|
|
1929 |
|
1930 |
|
1931 |
def handle_download_filtered_json(current_filtered_df_state: Optional[pd.DataFrame]) -> Optional[str]:
|
1932 |
+
"""Handler for the 'Download Filtered JSON' button."""
|
1933 |
+
# This function receives the already filtered DataFrame from the state
|
1934 |
if current_filtered_df_state is None or current_filtered_df_state.empty:
|
1935 |
logger.info("No filtered data available to download.")
|
1936 |
return None
|
1937 |
# Pass the DataFrame directly to the generic download function
|
1938 |
return download_json_data(current_filtered_df_state, "filtered_data")
|
1939 |
|
1940 |
+
|
1941 |
+
# Connect event handlers within the Blocks context
|
1942 |
+
example_btn.click(load_example, inputs=[], outputs=text_input)
|
1943 |
+
clear_btn.click(clear_input, inputs=[], outputs=[url_input, file_input, text_input, chatbot_data])
|
1944 |
+
|
1945 |
+
process_btn.click(
|
1946 |
+
process_inputs,
|
1947 |
+
inputs=[url_input, file_input, text_input, combine_data, crawl_depth_slider, generate_qr_toggle],
|
1948 |
+
outputs=[output_json, output_gallery, output_text, chatbot_data]
|
1949 |
+
).then(
|
1950 |
+
# This .then() is triggered after process_inputs completes and updates output_gallery
|
1951 |
+
on_qr_generation,
|
1952 |
+
inputs=[output_gallery], # Pass the list of QR paths from the gallery output
|
1953 |
+
outputs=[qr_code_paths, enabled_qr_codes] # Update the state variables
|
1954 |
+
)
|
1955 |
+
|
1956 |
+
# When the viewport tab is selected, update the viewport HTML
|
1957 |
+
viewport_tab.select(update_viewport, inputs=[qr_code_paths, enabled_qr_codes], outputs=[viewport_output])
|
1958 |
+
|
1959 |
+
# Chatbot send button and text input submit events
|
1960 |
+
send_msg_btn.click(
|
1961 |
+
respond_to_chat,
|
1962 |
+
inputs=[chat_input, chat_history, chatbot_data, filtered_chatbot_df_state],
|
1963 |
+
outputs=[chatbot, chatbot_data, filtered_chatbot_df_state]
|
1964 |
+
).then(
|
1965 |
+
# Clear the chat input box after sending message
|
1966 |
+
lambda: "",
|
1967 |
+
inputs=None,
|
1968 |
+
outputs=chat_input
|
1969 |
+
)
|
1970 |
+
|
1971 |
+
chat_input.submit( # Allow submitting by pressing Enter in the text box
|
1972 |
+
respond_to_chat,
|
1973 |
+
inputs=[chat_input, chat_history, chatbot_data, filtered_chatbot_df_state], # Pass filtered_chatbot_df_state here too
|
1974 |
+
outputs=[chatbot, chatbot_data, filtered_chatbot_df_state] # And return it
|
1975 |
+
).then(
|
1976 |
+
# Clear the chat input box after submitting
|
1977 |
+
lambda: "",
|
1978 |
+
inputs=None,
|
1979 |
+
outputs=chat_input
|
1980 |
+
)
|
1981 |
+
|
1982 |
+
# Clear chat history button
|
1983 |
+
clear_chat_btn.click(
|
1984 |
+
# Clear chat history component and the filtered data state
|
1985 |
+
lambda: ([], None),
|
1986 |
+
inputs=None,
|
1987 |
+
outputs=[chatbot, filtered_chatbot_df_state]
|
1988 |
+
)
|
1989 |
+
|
1990 |
+
# Download buttons
|
1991 |
download_full_json_btn.click(
|
1992 |
fn=handle_download_full_json,
|
1993 |
+
inputs=[chatbot_data], # chatbot_data is the gr.State holding the full dataset (List[Dict])
|
1994 |
+
outputs=[download_file_output] # The File component acts as the download trigger
|
1995 |
)
|
1996 |
download_filtered_json_btn.click(
|
1997 |
fn=handle_download_filtered_json,
|
1998 |
+
inputs=[filtered_chatbot_df_state], # This state holds the filtered DataFrame
|
1999 |
+
outputs=[download_file_output] # The File component acts as the download trigger
|
2000 |
)
|
2001 |
|
2002 |
+
|
2003 |
gr.Markdown("""
|
2004 |
### π Features
|
2005 |
- **Enhanced URL Scraping**: Extracts HTML text, title, meta description, links, and attempts parsing JSON/XML from URLs based on content type. Supports crawling links up to a specified depth. **(Now performs real fetching)**
|
|
|
2035 |
mimetypes.init()
|
2036 |
interface = create_modern_interface()
|
2037 |
interface.launch(
|
2038 |
+
share=False, # Set to True to create a public link (requires auth token)
|
2039 |
+
debug=False, # Set to True for detailed debug output
|
2040 |
+
show_error=True, # Show errors in the UI
|
2041 |
+
show_api=False # Hide API endpoint details
|
2042 |
)
|
2043 |
except Exception as e:
|
2044 |
logger.error(f"Application startup error: {e}")
|
|
|
2046 |
raise
|
2047 |
|
2048 |
if __name__ == "__main__":
|
2049 |
+
# Ensure the script is run directly (not imported)
|
2050 |
main()
|