Spaces:

MegaTronX
/

HTML_to_Gradio_Input

Sleeping

MegaTronX commited on Nov 23, 2024

Commit

b874025

verified ·

1 Parent(s): dcfa1e5

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+# app.py
+import gradio as gr
+from selectolax.parser import HTMLParser
+def parse_url(url):
+    try:
+        import requests
+        response = requests.get(url)
+        response.raise_for_status()
+        parser = HTMLParser(response.text)
+        # Extract the desired information from the parsed HTML
+        # For example, let's extract all the text content
+        text_content = parser.text()
+        # You can also extract specific elements if needed
+        # For example, all paragraph texts
+        paragraphs = [node.text() for node in parser.css('p')]
+        # Combine the extracted information into a single string
+        output = f"Text Content:\n{text_content}\n\nParagraphs:\n{'\n'.join(paragraphs)}"
+        return output
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=parse_url,
+    inputs="text",  # Input is a text box for the URL
+    outputs="text",  # Output is a text box for the parsed content
+    title="URL Parser",
+    description="Enter a URL to parse the page using Selectolax and output the information."
+)
+# Launch the Gradio app
+if __name__ == "__main__":
+    iface.launch()