pedrobento988 commited on
Commit
f1d068a
·
verified ·
1 Parent(s): 0efeabf

Add RDAP tool (#6)

Browse files

- feat: Add RDAP tool (b7c5e0bed5b4dcd56ecb5e31f60107b517b49002)

app.py CHANGED
@@ -8,9 +8,12 @@ from tdagent.tools.lookup_company_cloud_account_information import (
8
  gr_lookup_company_cloud_account_information,
9
  )
10
  from tdagent.tools.query_abuse_ip_db import gr_query_abuseipdb
 
11
  from tdagent.tools.send_email import gr_send_email
12
  from tdagent.tools.virus_total import gr_virus_total_url_info
13
- from tdagent.tools.whois import gr_query_whois
 
 
14
 
15
 
16
  ## Tools to load into the application interface ##
@@ -26,7 +29,9 @@ class ToolInfo(NamedTuple):
26
  TOOLS = (
27
  ToolInfo("Get URL Content", gr_get_url_http_content),
28
  ToolInfo("Query AbuseIPDB", gr_query_abuseipdb),
29
- ToolInfo("Query WHOIS", gr_query_whois),
 
 
30
  ToolInfo("Virus Total URL info", gr_virus_total_url_info),
31
  ## Fake tools
32
  ToolInfo("Fake company directory", gr_internal_company),
 
8
  gr_lookup_company_cloud_account_information,
9
  )
10
  from tdagent.tools.query_abuse_ip_db import gr_query_abuseipdb
11
+ from tdagent.tools.rdap import gr_query_rdap
12
  from tdagent.tools.send_email import gr_send_email
13
  from tdagent.tools.virus_total import gr_virus_total_url_info
14
+
15
+
16
+ # from tdagent.tools.whois import gr_query_whois
17
 
18
 
19
  ## Tools to load into the application interface ##
 
29
  TOOLS = (
30
  ToolInfo("Get URL Content", gr_get_url_http_content),
31
  ToolInfo("Query AbuseIPDB", gr_query_abuseipdb),
32
+ # Whois does not work from Spaces (port 43 blocked)
33
+ # ToolInfo("Query WHOIS", gr_query_whois),
34
+ ToolInfo("Query RDAP", gr_query_rdap),
35
  ToolInfo("Virus Total URL info", gr_virus_total_url_info),
36
  ## Fake tools
37
  ToolInfo("Fake company directory", gr_internal_company),
pyproject.toml CHANGED
@@ -128,4 +128,5 @@ convention = "google"
128
  [tool.ruff.lint.per-file-ignores]
129
  "*/__init__.py" = ["F401"]
130
  "tdagent/cli/**/*.py" = ["D103", "T201"]
 
131
  "tests/*.py" = ["D103", "PLR2004", "S101"]
 
128
  [tool.ruff.lint.per-file-ignores]
129
  "*/__init__.py" = ["F401"]
130
  "tdagent/cli/**/*.py" = ["D103", "T201"]
131
+ "tdagent/tools/rdap.py" = ["PLR2004"]
132
  "tests/*.py" = ["D103", "PLR2004", "S101"]
tdagent/constants.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import enum
2
+
3
+
4
+ class HttpContentType(str, enum.Enum):
5
+ """Http content type values."""
6
+
7
+ HTML = "text/html"
8
+ JSON = "application/json"
tdagent/tools/get_url_content.py CHANGED
@@ -1,15 +1,9 @@
1
- import enum
2
  from collections.abc import Sequence
3
 
4
  import gradio as gr
5
  import requests
6
 
7
-
8
- class HttpContentType(str, enum.Enum):
9
- """Http content type values."""
10
-
11
- HTML = "text/html"
12
- JSON = "application/json"
13
 
14
 
15
  def get_url_http_content(
 
 
1
  from collections.abc import Sequence
2
 
3
  import gradio as gr
4
  import requests
5
 
6
+ from tdagent.constants import HttpContentType
 
 
 
 
 
7
 
8
 
9
  def get_url_http_content(
tdagent/tools/rdap.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import enum
2
+
3
+ import cachetools
4
+ import gradio as gr
5
+ import requests
6
+ import whois
7
+
8
+ from tdagent.constants import HttpContentType
9
+
10
+
11
+ # one of domain, ip, autnum, entity etc
12
+ _RDAP_URL_TEMPLATE = r"https://rdap.org/{rdap_type}/{rdap_object}"
13
+ _CACHE_MAX_SIZE = 4096
14
+ _CACHE_TTL_SECONDS = 3600
15
+
16
+
17
+ class RdapTypes(str, enum.Enum):
18
+ """RDAP object types."""
19
+
20
+ DOMAIN = "domain"
21
+ IP = "ip"
22
+ AUTNUM = "autnum"
23
+ ENTITY = "entity"
24
+
25
+
26
+ @cachetools.cached(
27
+ cache=cachetools.TTLCache(maxsize=_CACHE_MAX_SIZE, ttl=_CACHE_TTL_SECONDS),
28
+ )
29
+ def query_rdap( # noqa: PLR0911
30
+ url_or_ip: str,
31
+ timeout: int = 30,
32
+ ) -> dict[str, str | int | float]:
33
+ """Query RDAP to get information about Internet resources.
34
+
35
+ The Registration Data Access Protocol (RDAP) is the successor to WHOIS.
36
+ Like WHOIS, RDAP provides access to information about Internet resources
37
+ (domain names, autonomous system numbers, and IP addresses).
38
+
39
+ Args:
40
+ url_or_ip: URL, domain or IP to query for RDAP information.
41
+ timeout: Request timeout in seconds. Defaults to 30.
42
+
43
+ Returns:
44
+ A JSON formatted string with RDAP information. In there is
45
+ an error, the JSON will contain the key "error" with an
46
+ error message.
47
+ """
48
+ rdap_type = RdapTypes.DOMAIN
49
+ rdap_object = url_or_ip
50
+ if whois.IPV4_OR_V6.match(url_or_ip):
51
+ rdap_type = RdapTypes.IP
52
+ else:
53
+ rdap_object = whois.extract_domain(url_or_ip)
54
+
55
+ query_url = _RDAP_URL_TEMPLATE.format(rdap_type=rdap_type, rdap_object=rdap_object)
56
+ response = requests.get(
57
+ query_url,
58
+ timeout=timeout,
59
+ headers={"Accept": HttpContentType.JSON},
60
+ )
61
+
62
+ try:
63
+ response.raise_for_status()
64
+ except requests.HTTPError as err:
65
+ if err.response.status_code == 302:
66
+ if "Location" in err.response.headers:
67
+ return {
68
+ "message": "Follow the location to find RDAP information",
69
+ "location": err.response.headers["Location"],
70
+ }
71
+ return {
72
+ "error": (
73
+ "Information not found in RDAP.org but it knows of"
74
+ " a service which is authoritative for the requested resource."
75
+ ),
76
+ }
77
+ if err.response.status_code == 400:
78
+ return {
79
+ "error": (
80
+ "Invalid request (malformed path, unsupported object "
81
+ " type, invalid IP address, etc)"
82
+ ),
83
+ }
84
+ if err.response.status_code == 403:
85
+ return {
86
+ "error": "You've been blocked due to abuse or other misbehavior",
87
+ }
88
+ if err.response.status_code == 404:
89
+ return {
90
+ "error": (
91
+ "RDAP.org doesn't know of an RDAP service which is"
92
+ " authoritative for the requested resource. RDAP.org"
93
+ " only knows about servers that are registered with IANA"
94
+ ),
95
+ }
96
+ return {
97
+ "error": str(err),
98
+ }
99
+
100
+ return response.json()
101
+
102
+
103
+ gr_query_rdap = gr.Interface(
104
+ fn=query_rdap,
105
+ inputs=["text"],
106
+ outputs="json",
107
+ title="Get RDAP information for a given URL.",
108
+ description="Query a RDAP database to gather information about a url or domain.",
109
+ )