MegaTronX commited on
Commit
b25b52b
·
verified ·
1 Parent(s): b874025

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -2,24 +2,26 @@
2
 
3
  import gradio as gr
4
  from selectolax.parser import HTMLParser
 
5
 
6
  def parse_url(url):
7
  try:
8
- import requests
9
  response = requests.get(url)
10
  response.raise_for_status()
11
  parser = HTMLParser(response.text)
12
 
13
  # Extract the desired information from the parsed HTML
14
- # For example, let's extract all the text content
15
  text_content = parser.text()
16
 
17
- # You can also extract specific elements if needed
18
- # For example, all paragraph texts
19
  paragraphs = [node.text() for node in parser.css('p')]
20
 
21
  # Combine the extracted information into a single string
22
- output = f"Text Content:\n{text_content}\n\nParagraphs:\n{'\n'.join(paragraphs)}"
 
 
 
 
23
 
24
  return output
25
  except Exception as e:
 
2
 
3
  import gradio as gr
4
  from selectolax.parser import HTMLParser
5
+ import requests
6
 
7
  def parse_url(url):
8
  try:
 
9
  response = requests.get(url)
10
  response.raise_for_status()
11
  parser = HTMLParser(response.text)
12
 
13
  # Extract the desired information from the parsed HTML
 
14
  text_content = parser.text()
15
 
16
+ # Extract specific elements if needed, for example, all paragraph texts
 
17
  paragraphs = [node.text() for node in parser.css('p')]
18
 
19
  # Combine the extracted information into a single string
20
+ output = f"""Text Content:
21
+ {text_content}
22
+
23
+ Paragraphs:
24
+ {'\n'.join(paragraphs)}"""
25
 
26
  return output
27
  except Exception as e: