Scaper_search / scraper.py
gaur3009's picture
Update scraper.py
b5cde6a verified
raw
history blame
690 Bytes
# scraper.py
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
def scrape_url(url):
"""Fetch text + image URLs from webpage."""
try:
res = requests.get(url, timeout=10)
res.raise_for_status()
soup = BeautifulSoup(res.text, 'html.parser')
# get text
text = soup.get_text(separator='\n', strip=True)
# get image URLs (absolute)
images = []
for img in soup.find_all('img'):
src = img.get('src')
if src:
images.append(urljoin(url, src))
return text, images
except Exception as e:
return f"[Error scraping {url}: {e}]", []