MegaTronX commited on
Commit
b874025
·
verified ·
1 Parent(s): dcfa1e5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import gradio as gr
4
+ from selectolax.parser import HTMLParser
5
+
6
+ def parse_url(url):
7
+ try:
8
+ import requests
9
+ response = requests.get(url)
10
+ response.raise_for_status()
11
+ parser = HTMLParser(response.text)
12
+
13
+ # Extract the desired information from the parsed HTML
14
+ # For example, let's extract all the text content
15
+ text_content = parser.text()
16
+
17
+ # You can also extract specific elements if needed
18
+ # For example, all paragraph texts
19
+ paragraphs = [node.text() for node in parser.css('p')]
20
+
21
+ # Combine the extracted information into a single string
22
+ output = f"Text Content:\n{text_content}\n\nParagraphs:\n{'\n'.join(paragraphs)}"
23
+
24
+ return output
25
+ except Exception as e:
26
+ return f"Error: {str(e)}"
27
+
28
+ # Create the Gradio interface
29
+ iface = gr.Interface(
30
+ fn=parse_url,
31
+ inputs="text", # Input is a text box for the URL
32
+ outputs="text", # Output is a text box for the parsed content
33
+ title="URL Parser",
34
+ description="Enter a URL to parse the page using Selectolax and output the information."
35
+ )
36
+
37
+ # Launch the Gradio app
38
+ if __name__ == "__main__":
39
+ iface.launch()