Spaces:
Running
Running
docs: improve check_url_official tool docstring for LLM usage
Browse files
app.py
CHANGED
|
@@ -61,5 +61,55 @@ async def get_public_org_domains_csv() -> str:
|
|
| 61 |
response.raise_for_status()
|
| 62 |
return response.text
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
if __name__ == "__main__":
|
| 65 |
mcp.run(transport="sse", host="0.0.0.0", port=7860)
|
|
|
|
| 61 |
response.raise_for_status()
|
| 62 |
return response.text
|
| 63 |
|
| 64 |
+
@mcp.tool(
|
| 65 |
+
name="check_url_official",
|
| 66 |
+
description="Check if a given URL is present in the official French government URL list (sitesgouv.txt)."
|
| 67 |
+
)
|
| 68 |
+
async def check_url_official(url: str) -> dict:
|
| 69 |
+
"""
|
| 70 |
+
Tool: check_url_official
|
| 71 |
+
-------------------------
|
| 72 |
+
Checks if a given URL is present in the official French government URL list (sitesgouv.txt).
|
| 73 |
+
|
| 74 |
+
Parameters:
|
| 75 |
+
url (str): The URL to check. The URL can be with or without protocol (http/https), and with or without a trailing slash. Example: "https://www.gouvernement.fr" or "gouvernement.fr".
|
| 76 |
+
|
| 77 |
+
Returns:
|
| 78 |
+
dict: A dictionary with the following keys:
|
| 79 |
+
- url (str): The input URL as provided.
|
| 80 |
+
- is_official (bool): True if the normalized URL is present in the official list, False otherwise.
|
| 81 |
+
|
| 82 |
+
Normalization:
|
| 83 |
+
- The tool normalizes both the input URL and the official list by:
|
| 84 |
+
* Lowercasing all characters
|
| 85 |
+
* Removing the protocol (http:// or https://)
|
| 86 |
+
* Removing any trailing slashes
|
| 87 |
+
- This ensures that URLs are compared in a consistent way, regardless of formatting.
|
| 88 |
+
|
| 89 |
+
Example usage:
|
| 90 |
+
>>> check_url_official("https://www.gouvernement.fr/")
|
| 91 |
+
{"url": "https://www.gouvernement.fr/", "is_official": True}
|
| 92 |
+
|
| 93 |
+
>>> check_url_official("example.com")
|
| 94 |
+
{"url": "example.com", "is_official": False}
|
| 95 |
+
|
| 96 |
+
Notes for LLMs:
|
| 97 |
+
- Use this tool to verify if a website is officially recognized as a French government site.
|
| 98 |
+
- The official list is fetched live from https://www.auracom.fr/gouv/sitesgouv.txt for each call.
|
| 99 |
+
- This tool is useful for compliance, trust, or filtering tasks involving French government domains.
|
| 100 |
+
"""
|
| 101 |
+
response = requests.get("https://www.auracom.fr/gouv/sitesgouv.txt", timeout=10)
|
| 102 |
+
response.raise_for_status()
|
| 103 |
+
official_urls = set(line.strip() for line in response.text.splitlines() if line.strip())
|
| 104 |
+
# Simple normalization: ignore protocol and trailing slashes
|
| 105 |
+
def normalize(u):
|
| 106 |
+
return u.lower().replace("http://", "").replace("https://", "").rstrip("/")
|
| 107 |
+
normalized_official = set(normalize(u) for u in official_urls)
|
| 108 |
+
normalized_input = normalize(url)
|
| 109 |
+
return {
|
| 110 |
+
"url": url,
|
| 111 |
+
"is_official": normalized_input in normalized_official
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
if __name__ == "__main__":
|
| 115 |
mcp.run(transport="sse", host="0.0.0.0", port=7860)
|