kokluch commited on
Commit
6fba1cf
·
1 Parent(s): 7bb2406

docs: improve check_url_official tool docstring for LLM usage

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py CHANGED
@@ -61,5 +61,55 @@ async def get_public_org_domains_csv() -> str:
61
  response.raise_for_status()
62
  return response.text
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  if __name__ == "__main__":
65
  mcp.run(transport="sse", host="0.0.0.0", port=7860)
 
61
  response.raise_for_status()
62
  return response.text
63
 
64
+ @mcp.tool(
65
+ name="check_url_official",
66
+ description="Check if a given URL is present in the official French government URL list (sitesgouv.txt)."
67
+ )
68
+ async def check_url_official(url: str) -> dict:
69
+ """
70
+ Tool: check_url_official
71
+ -------------------------
72
+ Checks if a given URL is present in the official French government URL list (sitesgouv.txt).
73
+
74
+ Parameters:
75
+ url (str): The URL to check. The URL can be with or without protocol (http/https), and with or without a trailing slash. Example: "https://www.gouvernement.fr" or "gouvernement.fr".
76
+
77
+ Returns:
78
+ dict: A dictionary with the following keys:
79
+ - url (str): The input URL as provided.
80
+ - is_official (bool): True if the normalized URL is present in the official list, False otherwise.
81
+
82
+ Normalization:
83
+ - The tool normalizes both the input URL and the official list by:
84
+ * Lowercasing all characters
85
+ * Removing the protocol (http:// or https://)
86
+ * Removing any trailing slashes
87
+ - This ensures that URLs are compared in a consistent way, regardless of formatting.
88
+
89
+ Example usage:
90
+ >>> check_url_official("https://www.gouvernement.fr/")
91
+ {"url": "https://www.gouvernement.fr/", "is_official": True}
92
+
93
+ >>> check_url_official("example.com")
94
+ {"url": "example.com", "is_official": False}
95
+
96
+ Notes for LLMs:
97
+ - Use this tool to verify if a website is officially recognized as a French government site.
98
+ - The official list is fetched live from https://www.auracom.fr/gouv/sitesgouv.txt for each call.
99
+ - This tool is useful for compliance, trust, or filtering tasks involving French government domains.
100
+ """
101
+ response = requests.get("https://www.auracom.fr/gouv/sitesgouv.txt", timeout=10)
102
+ response.raise_for_status()
103
+ official_urls = set(line.strip() for line in response.text.splitlines() if line.strip())
104
+ # Simple normalization: ignore protocol and trailing slashes
105
+ def normalize(u):
106
+ return u.lower().replace("http://", "").replace("https://", "").rstrip("/")
107
+ normalized_official = set(normalize(u) for u in official_urls)
108
+ normalized_input = normalize(url)
109
+ return {
110
+ "url": url,
111
+ "is_official": normalized_input in normalized_official
112
+ }
113
+
114
  if __name__ == "__main__":
115
  mcp.run(transport="sse", host="0.0.0.0", port=7860)