Spaces:
Sleeping
Sleeping
# app.py | |
import gradio as gr | |
from selectolax.parser import HTMLParser | |
def parse_url(url): | |
try: | |
import requests | |
response = requests.get(url) | |
response.raise_for_status() | |
parser = HTMLParser(response.text) | |
# Extract the desired information from the parsed HTML | |
# For example, let's extract all the text content | |
text_content = parser.text() | |
# You can also extract specific elements if needed | |
# For example, all paragraph texts | |
paragraphs = [node.text() for node in parser.css('p')] | |
# Combine the extracted information into a single string | |
output = f"Text Content:\n{text_content}\n\nParagraphs:\n{'\n'.join(paragraphs)}" | |
return output | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=parse_url, | |
inputs="text", # Input is a text box for the URL | |
outputs="text", # Output is a text box for the parsed content | |
title="URL Parser", | |
description="Enter a URL to parse the page using Selectolax and output the information." | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
iface.launch() |