Spaces:
Runtime error
Runtime error
# scraper.py | |
import requests | |
from bs4 import BeautifulSoup | |
from urllib.parse import urljoin | |
def scrape_url(url): | |
"""Fetch text + image URLs from webpage.""" | |
try: | |
res = requests.get(url, timeout=10) | |
res.raise_for_status() | |
soup = BeautifulSoup(res.text, 'html.parser') | |
# get text | |
text = soup.get_text(separator='\n', strip=True) | |
# get image URLs (absolute) | |
images = [] | |
for img in soup.find_all('img'): | |
src = img.get('src') | |
if src: | |
images.append(urljoin(url, src)) | |
return text, images | |
except Exception as e: | |
return f"[Error scraping {url}: {e}]", [] |