Spaces:

MegaTronX
/

HTML_to_Gradio_Input

Sleeping

MegaTronX commited on Nov 23, 2024

Commit

b25b52b

verified ·

1 Parent(s): b874025

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,24 +2,26 @@
 import gradio as gr
 from selectolax.parser import HTMLParser
 def parse_url(url):
     try:
-        import requests
         response = requests.get(url)
         response.raise_for_status()
         parser = HTMLParser(response.text)
         # Extract the desired information from the parsed HTML
-        # For example, let's extract all the text content
         text_content = parser.text()
-        # You can also extract specific elements if needed
-        # For example, all paragraph texts
         paragraphs = [node.text() for node in parser.css('p')]
         # Combine the extracted information into a single string
-        output = f"Text Content:\n{text_content}\n\nParagraphs:\n{'\n'.join(paragraphs)}"
         return output
     except Exception as e:

 import gradio as gr
 from selectolax.parser import HTMLParser
+import requests
 def parse_url(url):
     try:
         response = requests.get(url)
         response.raise_for_status()
         parser = HTMLParser(response.text)
         # Extract the desired information from the parsed HTML
         text_content = parser.text()
+        # Extract specific elements if needed, for example, all paragraph texts
         paragraphs = [node.text() for node in parser.css('p')]
         # Combine the extracted information into a single string
+        output = f"""Text Content:
+{text_content}
+Paragraphs:
+{'\n'.join(paragraphs)}"""
         return output
     except Exception as e: