Spaces:

gaur3009
/

Scaper_search

Runtime error

Scaper_search / scraper.py

Update scraper.py

b5cde6a verified 3 months ago

690 Bytes

	# scraper.py
	import requests
	from bs4 import BeautifulSoup
	from urllib.parse import urljoin

	def scrape_url(url):
	"""Fetch text + image URLs from webpage."""
	try:
	res = requests.get(url, timeout=10)
	res.raise_for_status()
	soup = BeautifulSoup(res.text, 'html.parser')

	# get text
	text = soup.get_text(separator='\n', strip=True)

	# get image URLs (absolute)
	images = []
	for img in soup.find_all('img'):
	src = img.get('src')
	if src:
	images.append(urljoin(url, src))

	return text, images
	except Exception as e:
	return f"[Error scraping {url}: {e}]", []