MegaTronX commited on
Commit
0606278
·
verified ·
1 Parent(s): 0508087

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -35
app.py CHANGED
@@ -1,41 +1,18 @@
1
- # app.py
2
-
3
  import gradio as gr
4
- from selectolax.parser import HTMLParser
5
  import requests
 
6
 
7
- def parse_url(url):
8
- try:
9
- response = requests.get(url)
10
- response.raise_for_status()
11
- parser = HTMLParser(response.text)
12
-
13
- # Extract the desired information from the parsed HTML
14
- text_content = parser.text()
15
-
16
- # Extract specific elements if needed, for example, all paragraph texts
17
- paragraphs = [node.text() for node in parser.css('p')]
18
-
19
- # Combine the extracted information into a single string
20
- output = r"""Text Content:
21
- {text_content}
22
-
23
- Paragraphs:
24
- {'\n'.join(paragraphs)}"""
25
-
26
- return output
27
- except Exception as e:
28
- return f"Error: {str(e)}"
29
 
30
- # Create the Gradio interface
31
- iface = gr.Interface(
32
- fn=parse_url,
33
- inputs="text", # Input is a text box for the URL
34
- outputs="text", # Output is a text box for the parsed content
35
- title="URL Parser",
36
- description="Enter a URL to parse the page using Selectolax and output the information."
37
  )
38
 
39
- # Launch the Gradio app
40
- if __name__ == "__main__":
41
- iface.launch()
 
 
 
1
  import gradio as gr
 
2
  import requests
3
+ from selectolax.parser import HTMLParser
4
 
5
+ def get_web_page_data(url):
6
+ response = requests.get(url)
7
+ parser = HTMLParser(html=response.text)
8
+ return parser.html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ demo = gr.Interface(
11
+ fn=get_web_page_data,
12
+ inputs="text",
13
+ outputs="text",
14
+ title="Web Page Data Extractor",
15
+ description="Enter a URL to extract its web page data"
 
16
  )
17
 
18
+ demo.launch()