Spaces:
Runtime error
Runtime error
feat: add tools and interfaces to inspect domains
Browse files
tdagent/tools/get_domain_information.py
ADDED
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
from concurrent.futures import ThreadPoolExecutor
|
4 |
+
from pathlib import Path
|
5 |
+
from typing import Any
|
6 |
+
|
7 |
+
import dns.resolver as dnsenum
|
8 |
+
import gradio as gr
|
9 |
+
import requests
|
10 |
+
import urllib3
|
11 |
+
|
12 |
+
|
13 |
+
_DNS_RECORD_TYPES = [
|
14 |
+
"A",
|
15 |
+
"AAAA",
|
16 |
+
"CNAME",
|
17 |
+
"MX",
|
18 |
+
"NS",
|
19 |
+
"SOA",
|
20 |
+
"TXT",
|
21 |
+
"RP",
|
22 |
+
"LOC",
|
23 |
+
"CAA",
|
24 |
+
"SPF",
|
25 |
+
"SRV",
|
26 |
+
"NSEC",
|
27 |
+
"RRSIG",
|
28 |
+
]
|
29 |
+
|
30 |
+
_COMMON_SUBDOMAINS_TXT_PATH = Path("subdomains.txt")
|
31 |
+
|
32 |
+
|
33 |
+
def get_geolocation(ip: str) -> dict[str, Any] | str:
|
34 |
+
"""Get location information from an ip address.
|
35 |
+
|
36 |
+
Returns the following information on an ip address:
|
37 |
+
1. IPv4
|
38 |
+
2. city
|
39 |
+
4. country_code
|
40 |
+
5. country_name
|
41 |
+
6. latitude
|
42 |
+
7. longitude
|
43 |
+
8. postal
|
44 |
+
9. state
|
45 |
+
|
46 |
+
Example:
|
47 |
+
>>> from pprint import pprint
|
48 |
+
>>> pprint(get_location("103.100.104.0"))
|
49 |
+
... {'IPv4': '103.100.104.0',
|
50 |
+
'city': None,
|
51 |
+
'country_code': 'NZ',
|
52 |
+
'country_name': 'New Zealand',
|
53 |
+
'latitude': -41,
|
54 |
+
'longitude': 174,
|
55 |
+
'postal': None,
|
56 |
+
'state': None}
|
57 |
+
|
58 |
+
Args:
|
59 |
+
ip: ip address
|
60 |
+
|
61 |
+
Returns:
|
62 |
+
Location information on the ip address.
|
63 |
+
"""
|
64 |
+
try:
|
65 |
+
return requests.get(
|
66 |
+
f"https://geolocation-db.com/json/{ip}",
|
67 |
+
timeout=0.5,
|
68 |
+
).json()
|
69 |
+
except Exception as e: # noqa: BLE001
|
70 |
+
return str(e)
|
71 |
+
|
72 |
+
# see: https://thepythoncode.com/article/dns-enumeration-with-python
|
73 |
+
# https://dnspython.readthedocs.io
|
74 |
+
def enumerate_dns(domain_name: str) -> dict[str, Any] | None:
|
75 |
+
r"""Enumerates information about a specific domain's DNS configuration.
|
76 |
+
|
77 |
+
Information collected about the domain name:
|
78 |
+
1. A records: the IPv4 associated with the domain
|
79 |
+
2. AAAA records: the IPv6 associated with the domain
|
80 |
+
3. CAA records: used by owners to specify which Certificate Authorities
|
81 |
+
are authorized to issue SSL/TLS certificates for their domains.
|
82 |
+
4. CNAME records: alias of one name to another - the DNS lookup will
|
83 |
+
continue by retrying the lookup with the new name.
|
84 |
+
5. LOC records: geographic location associated with a domain name.
|
85 |
+
6. MX records: associated email servers to the domain.
|
86 |
+
7. NS records: DNS servers that are authoritative for a particular domain.
|
87 |
+
These may be use to inquire information about the domain.
|
88 |
+
8. SOA records: defines authoritative information about a DNS zone,
|
89 |
+
including zone transfers and cache expiration.
|
90 |
+
9. TXT records: used for domain verification and email security.
|
91 |
+
10. RP records: the responsible person for a domain.
|
92 |
+
11. SPF records: defines authorized email servers.
|
93 |
+
12. SRV records: specifies location of specific services
|
94 |
+
(port and host) for the domain.
|
95 |
+
14. NSEC records: proves non-existence of DNS records
|
96 |
+
and prevents zone enumeration.
|
97 |
+
15. RRSIG records: contains cryptographic signatures for DNSSEC-signed
|
98 |
+
records, providing authentication and integrity.
|
99 |
+
|
100 |
+
Example:
|
101 |
+
>>> from pprint import pprint
|
102 |
+
>>> pprint(enumerate_dns("youtube.com"))
|
103 |
+
... {'A': 'youtube.com. 300 IN A 142.250.200.142',
|
104 |
+
'AAAA': 'youtube.com. 286 IN AAAA 2a00:1450:4003:80f::200e',
|
105 |
+
'CAA': 'youtube.com. 14352 IN CAA 0 issue "pki.goog"',
|
106 |
+
'CNAME': None,
|
107 |
+
'LOC': None,
|
108 |
+
'MX': 'youtube.com. 300 IN MX 0 smtp.google.com.',
|
109 |
+
'NS': 'youtube.com. 21600 IN NS ns4.google.com.\n'
|
110 |
+
'youtube.com. 21600 IN NS ns1.google.com.\n'
|
111 |
+
'youtube.com. 21600 IN NS ns2.google.com.\n'
|
112 |
+
'youtube.com. 21600 IN NS ns3.google.com.',
|
113 |
+
'NSEC': None,
|
114 |
+
'RP': None,
|
115 |
+
'RRSIG': None,
|
116 |
+
'SOA': 'youtube.com. 60 IN SOA ns1.google.com. dns-admin.google.com. '
|
117 |
+
'766113658 900 900 1800 60',
|
118 |
+
'SPF': None,
|
119 |
+
'SRV': None,
|
120 |
+
'TXT': 'youtube.com. 3586 IN TXT "v=spf1 include:google.com mx -all"\n'
|
121 |
+
'youtube.com. 3586 IN TXT '
|
122 |
+
'"facebook-domain-verification=64jdes7le4h7e7lfpi22rijygx58j1"\n'
|
123 |
+
'youtube.com. 3586 IN TXT '
|
124 |
+
'"google-site-verification=QtQWEwHWM8tHiJ4s-jJWzEQrD_fF3luPnpzNDH-Nw-w"'}
|
125 |
+
|
126 |
+
Args:
|
127 |
+
domain_name: domain name for which to
|
128 |
+
enumerate the DNS configuration.
|
129 |
+
|
130 |
+
Returns:
|
131 |
+
The domain's DNS configuration.
|
132 |
+
"""
|
133 |
+
enumeration = {}
|
134 |
+
resolver = dnsenum.Resolver()
|
135 |
+
resolver.port = 443
|
136 |
+
for record_type in _DNS_RECORD_TYPES:
|
137 |
+
try:
|
138 |
+
record = resolver.resolve(
|
139 |
+
domain_name,
|
140 |
+
record_type,
|
141 |
+
).rrset
|
142 |
+
if record:
|
143 |
+
enumeration[record_type] = record.to_text()
|
144 |
+
except Exception as e: # noqa: BLE001, PERF203
|
145 |
+
enumeration[record_type] = str(e)
|
146 |
+
continue
|
147 |
+
return enumeration if enumeration else None
|
148 |
+
|
149 |
+
|
150 |
+
|
151 |
+
def resolve_subdomain(domain: str) -> str | None:
|
152 |
+
"""Resolve the IPv4 address of a domain.
|
153 |
+
|
154 |
+
Args:
|
155 |
+
domain: domain name
|
156 |
+
|
157 |
+
Returns:
|
158 |
+
The domain is returned provided
|
159 |
+
it was resolved. Otherwise nothing
|
160 |
+
is returned.
|
161 |
+
"""
|
162 |
+
try:
|
163 |
+
dnsenum.resolve(
|
164 |
+
domain,
|
165 |
+
"A",
|
166 |
+
lifetime=0.1,
|
167 |
+
)
|
168 |
+
return domain # noqa: TRY300
|
169 |
+
except Exception: # noqa: BLE001
|
170 |
+
return None
|
171 |
+
|
172 |
+
|
173 |
+
def scrap_subdomains_for_domain(domain_name: str) -> list[str]:
|
174 |
+
"""Retrieves subdomains associated to a domain if any.
|
175 |
+
|
176 |
+
The information retrieved from a domain is its subdomains
|
177 |
+
provided they are the top 1000 subdomain prefixes as
|
178 |
+
indicated by https://github.com/rbsec/dnscan/tree/master
|
179 |
+
|
180 |
+
Importantly, it finds subdomains only if their prefixes
|
181 |
+
are along the top 1000 most common. Hence, it may not
|
182 |
+
yield all the subdomains associated to the domain.
|
183 |
+
|
184 |
+
Example:
|
185 |
+
>>> scrap_subdomains_for_domain("github.com")
|
186 |
+
... ['www.github.com', 'smtp.github.com', 'ns1.github.com',
|
187 |
+
'ns2.github.com','autodiscover.github.com', 'test.github.com',
|
188 |
+
'blog.github.com', 'admin.github.com', 'support.github.com',
|
189 |
+
'docs.github.com', 'shop.github.com', 'wiki.github.com',
|
190 |
+
'api.github.com', 'live.github.com', 'help.github.com',
|
191 |
+
'jobs.github.com', 'services.github.com', 'de.github.com',
|
192 |
+
'cs.github.com', 'fr.github.com', 'ssh.github.com',
|
193 |
+
'partner.github.com', 'community.github.com',
|
194 |
+
'mailer.github.com', 'training.github.com', ...]
|
195 |
+
|
196 |
+
Args:
|
197 |
+
domain_name: domain name for which to retrieve a
|
198 |
+
list of subdomains
|
199 |
+
|
200 |
+
Returns:
|
201 |
+
List of subdomains if any.
|
202 |
+
"""
|
203 |
+
try:
|
204 |
+
with open(_COMMON_SUBDOMAINS_TXT_PATH) as file: # noqa: PTH123
|
205 |
+
subdomains = [line.strip() for line in file if line.strip()]
|
206 |
+
except FileNotFoundError:
|
207 |
+
return []
|
208 |
+
|
209 |
+
potential_subdomains = [f"{subdomain}.{domain_name}" for subdomain in subdomains]
|
210 |
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
211 |
+
results = executor.map(resolve_subdomain, potential_subdomains)
|
212 |
+
return [domain for domain in results if domain]
|
213 |
+
|
214 |
+
def retrieve_ioc_from_threatfox(potentially_ioc: str) -> str:
|
215 |
+
r"""Retrieves information about a potential IoC from ThreatFox.
|
216 |
+
|
217 |
+
It may be used to retrieve information of indicators of compromise
|
218 |
+
(IOCs) associated with malware, with the infosec community, AV
|
219 |
+
vendors and cyber threat intelligence providers.
|
220 |
+
|
221 |
+
Examples:
|
222 |
+
>>> retrieve_ioc_from_threatfox("139.180.203.104")
|
223 |
+
... {
|
224 |
+
"query_status": "ok",
|
225 |
+
"data": [
|
226 |
+
{
|
227 |
+
"id": "12",
|
228 |
+
"ioc": "139.180.203.104:443",
|
229 |
+
"threat_type": "botnet_cc",
|
230 |
+
"threat_type_desc": "Indicator that identifies a botnet command&control...",
|
231 |
+
"ioc_type": "ip:port",
|
232 |
+
"ioc_type_desc": "ip:port combination that is used for botnet Command&...,
|
233 |
+
"malware": "win.cobalt_strike",
|
234 |
+
"malware_printable": "Cobalt Strike",
|
235 |
+
"malware_alias": "Agentemis,BEACON,CobaltStrike",
|
236 |
+
"malware_malpedia": "https:\/\/malpedia.caad.fkie.fraunhofer.de\/...",
|
237 |
+
"confidence_level": 75,
|
238 |
+
"first_seen": "2020-12-06 09:10:23 UTC",
|
239 |
+
"last_seen": null,
|
240 |
+
"reference": null,
|
241 |
+
"reporter": "abuse_ch",
|
242 |
+
"tags": null,
|
243 |
+
"malware_samples": [
|
244 |
+
{
|
245 |
+
"time_stamp": "2021-03-23 08:18:06 UTC",
|
246 |
+
"md5_hash": "5b7e82e051ade4b14d163eea2a17bf8b",
|
247 |
+
"sha256_hash": "b325c92fa540edeb89b95dbfd4400c1cb33599c66859....",
|
248 |
+
"malware_bazaar": "https:\/\/bazaar.abuse.ch\/sample\/b325c...\/"
|
249 |
+
},
|
250 |
+
]
|
251 |
+
|
252 |
+
}
|
253 |
+
]
|
254 |
+
}
|
255 |
+
|
256 |
+
Args:
|
257 |
+
potentially_ioc: this can be a url, a domain, a hash,
|
258 |
+
or any other type of IoC.
|
259 |
+
|
260 |
+
Returns:
|
261 |
+
Information of the input as an IoC: threat type, malware type andsamples,
|
262 |
+
confidence level, first/last seen dates, and more IoC information.
|
263 |
+
"""
|
264 |
+
headers = {"Auth-Key": os.environ["THREATFOX_APIKEY"]}
|
265 |
+
pool = urllib3.HTTPSConnectionPool(
|
266 |
+
"threatfox-api.abuse.ch",
|
267 |
+
port=443,
|
268 |
+
maxsize=50,
|
269 |
+
headers=headers,
|
270 |
+
)
|
271 |
+
data = {
|
272 |
+
"query": "search_ioc",
|
273 |
+
"search_term": potentially_ioc,
|
274 |
+
}
|
275 |
+
json_data = json.dumps(data)
|
276 |
+
try:
|
277 |
+
response = pool.request("POST", "/api/v1/", body=json_data)
|
278 |
+
return response.data.decode("utf-8", "ignore")
|
279 |
+
except Exception as e: # noqa: BLE001
|
280 |
+
return str(e)
|
281 |
+
|
282 |
+
|
283 |
+
geo_location_tool = gr.Interface(
|
284 |
+
fn=get_geolocation,
|
285 |
+
inputs=["text"],
|
286 |
+
outputs=["text"],
|
287 |
+
title="Domain Associated Geolocation Finder",
|
288 |
+
description="Retrieves the geolocation associated to an input ip address",
|
289 |
+
theme="default",
|
290 |
+
)
|
291 |
+
|
292 |
+
dns_enumeration_tool = gr.Interface(
|
293 |
+
fn=enumerate_dns,
|
294 |
+
inputs=["text"],
|
295 |
+
outputs=["text"],
|
296 |
+
title="DNS record enumerator of domains",
|
297 |
+
description="Retrieves several dns record types for the input domain names",
|
298 |
+
theme="default",
|
299 |
+
)
|
300 |
+
|
301 |
+
scrap_subdomains_tool = gr.Interface(
|
302 |
+
fn=scrap_subdomains_for_domain,
|
303 |
+
inputs=["text"],
|
304 |
+
outputs=["text"],
|
305 |
+
title="Subdomains Extractor of domains",
|
306 |
+
description="Retrieves the subdomains for the input domain if they are common",
|
307 |
+
theme="default",
|
308 |
+
)
|
309 |
+
|
310 |
+
extractor_of_ioc_from_threatfox_tool = gr.Interface(
|
311 |
+
fn=retrieve_ioc_from_threatfox,
|
312 |
+
inputs=["text"],
|
313 |
+
outputs=["text"],
|
314 |
+
title="IoC information extractor associated to particular entities",
|
315 |
+
description=(
|
316 |
+
"If information as an Indicator of Compromise (IoC) exists"
|
317 |
+
"for the input url, domain or hash, it retrieves it"
|
318 |
+
),
|
319 |
+
theme="default",
|
320 |
+
)
|