from smolagents import Tool import requests import re from markdownify import markdownify as md class ExtractWikipediaSection(Tool): name = "extract_wikipedia_section" description = "Extracts a specific section from a Wikipedia page in Markdown format." inputs = { "url": { "type": "string", "description": "URL of the Wikipedia page" }, "section": { "type": "string", "description": "Title of the section to extract" }, } output_type = "string" def forward(self, url: str, section: str) -> str: headers = { "User-Agent": "Mozilla/5.0 (compatible; WebScraper/1.0; +https://example.com/bot)" } try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() except Exception as e: raise RuntimeError(f"Failed to fetch page: {e}") markdown = md(response.text, heading_style="ATX") # RegEx pour détecter la section markdown pattern = rf"^##+\s*{re.escape(section)}\s*$(.*?)^##+" match = re.search(pattern, markdown, re.DOTALL | re.MULTILINE) if match: return match.group(1).strip() else: return f"❌ Section '{section}' not found on page."