Spaces:
Sleeping
Sleeping
# app.py | |
import gradio as gr | |
from selectolax.parser import HTMLParser | |
import requests | |
def parse_url(url): | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
parser = HTMLParser(response.text) | |
# Extract the desired information from the parsed HTML | |
text_content = parser.text() | |
# Extract specific elements if needed, for example, all paragraph texts | |
paragraphs = [node.text() for node in parser.css('p')] | |
# Combine the extracted information into a single string | |
output = r"""Text Content: | |
{text_content} | |
Paragraphs: | |
{'\n'.join(paragraphs)}""" | |
return output | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=parse_url, | |
inputs="text", # Input is a text box for the URL | |
outputs="text", # Output is a text box for the parsed content | |
title="URL Parser", | |
description="Enter a URL to parse the page using Selectolax and output the information." | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
iface.launch() |