Spaces:

KSh100
/

websearch

Sleeping

KSh100 commited on Apr 11

Commit

0deeeca

verified ·

1 Parent(s): b8a4185

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -50,20 +50,21 @@ def extract_images(soup, base_url):
         images.append({"Alt Text": alt_text, "Image URL": full_img_url})
     return images
-def format_detailed_output(structured_data):
     """Formats the structured data into a Markdown string."""
-    result = "### Structured Page Content\n\n"
-    result += "**Texts:**\n" + (" ".join(structured_data["Texts"]) if structured_data["Texts"] else "No textual content found.") + "\n\n"
-    result += "**Links:**\n"
     if structured_data["Links"]:
-        result += "\n".join(f"[{link['Text']}]({link['URL']})" for link in structured_data["Links"]) + "\n"
     else:
-        result += "No links found.\n"
-    result += "**Images:**\n"
     if structured_data["Images"]:
-        result += "\n".join(f"![{img['Alt Text']}]({img['Image URL']})" for img in structured_data["Images"]) + "\n"
     else:
-        result += "No images found.\n"
     return result
 # Web Page Processing Function
@@ -75,12 +76,16 @@ def download_and_process_web_page(url):
     try:
         response = get(url)
         soup = response.soup()
         structured_data = {
             "Texts": extract_texts(soup),
             "Links": extract_links(soup, url),
             "Images": extract_images(soup, url)
         }
-        return format_detailed_output(structured_data)
     except urllib3.exceptions.HTTPError as e:
         return f"Error: {e}"

         images.append({"Alt Text": alt_text, "Image URL": full_img_url})
     return images
+def format_detailed_output(structured_data, page_title):
     """Formats the structured data into a Markdown string."""
+    result = f"## Page Title: {page_title}\n\n"
+    result += "### Texts\n\n"
+    result += " ".join(structured_data["Texts"]) if structured_data["Texts"] else "No textual content found."
+    result += "\n\n### Links\n\n"
     if structured_data["Links"]:
+        result += "\n".join(f"[{link['Text']}]({link['URL']})" for link in structured_data["Links"])
     else:
+        result += "No links found."
+    result += "\n\n### Images\n\n"
     if structured_data["Images"]:
+        result += "\n".join(f"![{img['Alt Text']}]({img['Image URL']})" for img in structured_data["Images"])
     else:
+        result += "No images found."
     return result
 # Web Page Processing Function
     try:
         response = get(url)
         soup = response.soup()
+        # Extract page title
+        page_title = soup.title.string if soup.title else "No Title Found"
         structured_data = {
             "Texts": extract_texts(soup),
             "Links": extract_links(soup, url),
             "Images": extract_images(soup, url)
         }
+        return format_detailed_output(structured_data, page_title)
     except urllib3.exceptions.HTTPError as e:
         return f"Error: {e}"