Spaces:

Agents-MCP-Hackathon
/

WebpageCreator

Running

WebpageCreator / myTools /ExtractWikipediaSection.py

Upload 29 files

77c658d verified 2 months ago

1.33 kB

	from smolagents import Tool
	import requests
	import re
	from markdownify import markdownify as md

	class ExtractWikipediaSection(Tool):
	name = "extract_wikipedia_section"
	description = "Extracts a specific section from a Wikipedia page in Markdown format."

	inputs = {
	"url": {
	"type": "string",
	"description": "URL of the Wikipedia page"
	},
	"section": {
	"type": "string",
	"description": "Title of the section to extract"
	},
	}

	output_type = "string"

	def forward(self, url: str, section: str) -> str:
	headers = {
	"User-Agent": "Mozilla/5.0 (compatible; WebScraper/1.0; +https://example.com/bot)"
	}

	try:
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()
	except Exception as e:
	raise RuntimeError(f"Failed to fetch page: {e}")

	markdown = md(response.text, heading_style="ATX")

	# RegEx pour détecter la section markdown
	pattern = rf"^##+\s{re.escape(section)}\s$(.*?)^##+"
	match = re.search(pattern, markdown, re.DOTALL \| re.MULTILINE)
	if match:
	return match.group(1).strip()
	else:
	return f"❌ Section '{section}' not found on page."