Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,24 +2,26 @@
|
|
2 |
|
3 |
import gradio as gr
|
4 |
from selectolax.parser import HTMLParser
|
|
|
5 |
|
6 |
def parse_url(url):
|
7 |
try:
|
8 |
-
import requests
|
9 |
response = requests.get(url)
|
10 |
response.raise_for_status()
|
11 |
parser = HTMLParser(response.text)
|
12 |
|
13 |
# Extract the desired information from the parsed HTML
|
14 |
-
# For example, let's extract all the text content
|
15 |
text_content = parser.text()
|
16 |
|
17 |
-
#
|
18 |
-
# For example, all paragraph texts
|
19 |
paragraphs = [node.text() for node in parser.css('p')]
|
20 |
|
21 |
# Combine the extracted information into a single string
|
22 |
-
output = f"Text Content
|
|
|
|
|
|
|
|
|
23 |
|
24 |
return output
|
25 |
except Exception as e:
|
|
|
2 |
|
3 |
import gradio as gr
|
4 |
from selectolax.parser import HTMLParser
|
5 |
+
import requests
|
6 |
|
7 |
def parse_url(url):
|
8 |
try:
|
|
|
9 |
response = requests.get(url)
|
10 |
response.raise_for_status()
|
11 |
parser = HTMLParser(response.text)
|
12 |
|
13 |
# Extract the desired information from the parsed HTML
|
|
|
14 |
text_content = parser.text()
|
15 |
|
16 |
+
# Extract specific elements if needed, for example, all paragraph texts
|
|
|
17 |
paragraphs = [node.text() for node in parser.css('p')]
|
18 |
|
19 |
# Combine the extracted information into a single string
|
20 |
+
output = f"""Text Content:
|
21 |
+
{text_content}
|
22 |
+
|
23 |
+
Paragraphs:
|
24 |
+
{'\n'.join(paragraphs)}"""
|
25 |
|
26 |
return output
|
27 |
except Exception as e:
|