RodDoSanz commited on
Commit
dc8fed0
·
1 Parent(s): 1c6e88d

feat: add tools and interfaces to inspect domains

Browse files
tdagent/tools/get_domain_information.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from concurrent.futures import ThreadPoolExecutor
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ import dns.resolver as dnsenum
8
+ import gradio as gr
9
+ import requests
10
+ import urllib3
11
+
12
+
13
+ _DNS_RECORD_TYPES = [
14
+ "A",
15
+ "AAAA",
16
+ "CNAME",
17
+ "MX",
18
+ "NS",
19
+ "SOA",
20
+ "TXT",
21
+ "RP",
22
+ "LOC",
23
+ "CAA",
24
+ "SPF",
25
+ "SRV",
26
+ "NSEC",
27
+ "RRSIG",
28
+ ]
29
+
30
+ _COMMON_SUBDOMAINS_TXT_PATH = Path("subdomains.txt")
31
+
32
+
33
+ def get_geolocation(ip: str) -> dict[str, Any] | str:
34
+ """Get location information from an ip address.
35
+
36
+ Returns the following information on an ip address:
37
+ 1. IPv4
38
+ 2. city
39
+ 4. country_code
40
+ 5. country_name
41
+ 6. latitude
42
+ 7. longitude
43
+ 8. postal
44
+ 9. state
45
+
46
+ Example:
47
+ >>> from pprint import pprint
48
+ >>> pprint(get_location("103.100.104.0"))
49
+ ... {'IPv4': '103.100.104.0',
50
+ 'city': None,
51
+ 'country_code': 'NZ',
52
+ 'country_name': 'New Zealand',
53
+ 'latitude': -41,
54
+ 'longitude': 174,
55
+ 'postal': None,
56
+ 'state': None}
57
+
58
+ Args:
59
+ ip: ip address
60
+
61
+ Returns:
62
+ Location information on the ip address.
63
+ """
64
+ try:
65
+ return requests.get(
66
+ f"https://geolocation-db.com/json/{ip}",
67
+ timeout=0.5,
68
+ ).json()
69
+ except Exception as e: # noqa: BLE001
70
+ return str(e)
71
+
72
+ # see: https://thepythoncode.com/article/dns-enumeration-with-python
73
+ # https://dnspython.readthedocs.io
74
+ def enumerate_dns(domain_name: str) -> dict[str, Any] | None:
75
+ r"""Enumerates information about a specific domain's DNS configuration.
76
+
77
+ Information collected about the domain name:
78
+ 1. A records: the IPv4 associated with the domain
79
+ 2. AAAA records: the IPv6 associated with the domain
80
+ 3. CAA records: used by owners to specify which Certificate Authorities
81
+ are authorized to issue SSL/TLS certificates for their domains.
82
+ 4. CNAME records: alias of one name to another - the DNS lookup will
83
+ continue by retrying the lookup with the new name.
84
+ 5. LOC records: geographic location associated with a domain name.
85
+ 6. MX records: associated email servers to the domain.
86
+ 7. NS records: DNS servers that are authoritative for a particular domain.
87
+ These may be use to inquire information about the domain.
88
+ 8. SOA records: defines authoritative information about a DNS zone,
89
+ including zone transfers and cache expiration.
90
+ 9. TXT records: used for domain verification and email security.
91
+ 10. RP records: the responsible person for a domain.
92
+ 11. SPF records: defines authorized email servers.
93
+ 12. SRV records: specifies location of specific services
94
+ (port and host) for the domain.
95
+ 14. NSEC records: proves non-existence of DNS records
96
+ and prevents zone enumeration.
97
+ 15. RRSIG records: contains cryptographic signatures for DNSSEC-signed
98
+ records, providing authentication and integrity.
99
+
100
+ Example:
101
+ >>> from pprint import pprint
102
+ >>> pprint(enumerate_dns("youtube.com"))
103
+ ... {'A': 'youtube.com. 300 IN A 142.250.200.142',
104
+ 'AAAA': 'youtube.com. 286 IN AAAA 2a00:1450:4003:80f::200e',
105
+ 'CAA': 'youtube.com. 14352 IN CAA 0 issue "pki.goog"',
106
+ 'CNAME': None,
107
+ 'LOC': None,
108
+ 'MX': 'youtube.com. 300 IN MX 0 smtp.google.com.',
109
+ 'NS': 'youtube.com. 21600 IN NS ns4.google.com.\n'
110
+ 'youtube.com. 21600 IN NS ns1.google.com.\n'
111
+ 'youtube.com. 21600 IN NS ns2.google.com.\n'
112
+ 'youtube.com. 21600 IN NS ns3.google.com.',
113
+ 'NSEC': None,
114
+ 'RP': None,
115
+ 'RRSIG': None,
116
+ 'SOA': 'youtube.com. 60 IN SOA ns1.google.com. dns-admin.google.com. '
117
+ '766113658 900 900 1800 60',
118
+ 'SPF': None,
119
+ 'SRV': None,
120
+ 'TXT': 'youtube.com. 3586 IN TXT "v=spf1 include:google.com mx -all"\n'
121
+ 'youtube.com. 3586 IN TXT '
122
+ '"facebook-domain-verification=64jdes7le4h7e7lfpi22rijygx58j1"\n'
123
+ 'youtube.com. 3586 IN TXT '
124
+ '"google-site-verification=QtQWEwHWM8tHiJ4s-jJWzEQrD_fF3luPnpzNDH-Nw-w"'}
125
+
126
+ Args:
127
+ domain_name: domain name for which to
128
+ enumerate the DNS configuration.
129
+
130
+ Returns:
131
+ The domain's DNS configuration.
132
+ """
133
+ enumeration = {}
134
+ resolver = dnsenum.Resolver()
135
+ resolver.port = 443
136
+ for record_type in _DNS_RECORD_TYPES:
137
+ try:
138
+ record = resolver.resolve(
139
+ domain_name,
140
+ record_type,
141
+ ).rrset
142
+ if record:
143
+ enumeration[record_type] = record.to_text()
144
+ except Exception as e: # noqa: BLE001, PERF203
145
+ enumeration[record_type] = str(e)
146
+ continue
147
+ return enumeration if enumeration else None
148
+
149
+
150
+
151
+ def resolve_subdomain(domain: str) -> str | None:
152
+ """Resolve the IPv4 address of a domain.
153
+
154
+ Args:
155
+ domain: domain name
156
+
157
+ Returns:
158
+ The domain is returned provided
159
+ it was resolved. Otherwise nothing
160
+ is returned.
161
+ """
162
+ try:
163
+ dnsenum.resolve(
164
+ domain,
165
+ "A",
166
+ lifetime=0.1,
167
+ )
168
+ return domain # noqa: TRY300
169
+ except Exception: # noqa: BLE001
170
+ return None
171
+
172
+
173
+ def scrap_subdomains_for_domain(domain_name: str) -> list[str]:
174
+ """Retrieves subdomains associated to a domain if any.
175
+
176
+ The information retrieved from a domain is its subdomains
177
+ provided they are the top 1000 subdomain prefixes as
178
+ indicated by https://github.com/rbsec/dnscan/tree/master
179
+
180
+ Importantly, it finds subdomains only if their prefixes
181
+ are along the top 1000 most common. Hence, it may not
182
+ yield all the subdomains associated to the domain.
183
+
184
+ Example:
185
+ >>> scrap_subdomains_for_domain("github.com")
186
+ ... ['www.github.com', 'smtp.github.com', 'ns1.github.com',
187
+ 'ns2.github.com','autodiscover.github.com', 'test.github.com',
188
+ 'blog.github.com', 'admin.github.com', 'support.github.com',
189
+ 'docs.github.com', 'shop.github.com', 'wiki.github.com',
190
+ 'api.github.com', 'live.github.com', 'help.github.com',
191
+ 'jobs.github.com', 'services.github.com', 'de.github.com',
192
+ 'cs.github.com', 'fr.github.com', 'ssh.github.com',
193
+ 'partner.github.com', 'community.github.com',
194
+ 'mailer.github.com', 'training.github.com', ...]
195
+
196
+ Args:
197
+ domain_name: domain name for which to retrieve a
198
+ list of subdomains
199
+
200
+ Returns:
201
+ List of subdomains if any.
202
+ """
203
+ try:
204
+ with open(_COMMON_SUBDOMAINS_TXT_PATH) as file: # noqa: PTH123
205
+ subdomains = [line.strip() for line in file if line.strip()]
206
+ except FileNotFoundError:
207
+ return []
208
+
209
+ potential_subdomains = [f"{subdomain}.{domain_name}" for subdomain in subdomains]
210
+ with ThreadPoolExecutor(max_workers=5) as executor:
211
+ results = executor.map(resolve_subdomain, potential_subdomains)
212
+ return [domain for domain in results if domain]
213
+
214
+ def retrieve_ioc_from_threatfox(potentially_ioc: str) -> str:
215
+ r"""Retrieves information about a potential IoC from ThreatFox.
216
+
217
+ It may be used to retrieve information of indicators of compromise
218
+ (IOCs) associated with malware, with the infosec community, AV
219
+ vendors and cyber threat intelligence providers.
220
+
221
+ Examples:
222
+ >>> retrieve_ioc_from_threatfox("139.180.203.104")
223
+ ... {
224
+ "query_status": "ok",
225
+ "data": [
226
+ {
227
+ "id": "12",
228
+ "ioc": "139.180.203.104:443",
229
+ "threat_type": "botnet_cc",
230
+ "threat_type_desc": "Indicator that identifies a botnet command&control...",
231
+ "ioc_type": "ip:port",
232
+ "ioc_type_desc": "ip:port combination that is used for botnet Command&...,
233
+ "malware": "win.cobalt_strike",
234
+ "malware_printable": "Cobalt Strike",
235
+ "malware_alias": "Agentemis,BEACON,CobaltStrike",
236
+ "malware_malpedia": "https:\/\/malpedia.caad.fkie.fraunhofer.de\/...",
237
+ "confidence_level": 75,
238
+ "first_seen": "2020-12-06 09:10:23 UTC",
239
+ "last_seen": null,
240
+ "reference": null,
241
+ "reporter": "abuse_ch",
242
+ "tags": null,
243
+ "malware_samples": [
244
+ {
245
+ "time_stamp": "2021-03-23 08:18:06 UTC",
246
+ "md5_hash": "5b7e82e051ade4b14d163eea2a17bf8b",
247
+ "sha256_hash": "b325c92fa540edeb89b95dbfd4400c1cb33599c66859....",
248
+ "malware_bazaar": "https:\/\/bazaar.abuse.ch\/sample\/b325c...\/"
249
+ },
250
+ ]
251
+
252
+ }
253
+ ]
254
+ }
255
+
256
+ Args:
257
+ potentially_ioc: this can be a url, a domain, a hash,
258
+ or any other type of IoC.
259
+
260
+ Returns:
261
+ Information of the input as an IoC: threat type, malware type andsamples,
262
+ confidence level, first/last seen dates, and more IoC information.
263
+ """
264
+ headers = {"Auth-Key": os.environ["THREATFOX_APIKEY"]}
265
+ pool = urllib3.HTTPSConnectionPool(
266
+ "threatfox-api.abuse.ch",
267
+ port=443,
268
+ maxsize=50,
269
+ headers=headers,
270
+ )
271
+ data = {
272
+ "query": "search_ioc",
273
+ "search_term": potentially_ioc,
274
+ }
275
+ json_data = json.dumps(data)
276
+ try:
277
+ response = pool.request("POST", "/api/v1/", body=json_data)
278
+ return response.data.decode("utf-8", "ignore")
279
+ except Exception as e: # noqa: BLE001
280
+ return str(e)
281
+
282
+
283
+ geo_location_tool = gr.Interface(
284
+ fn=get_geolocation,
285
+ inputs=["text"],
286
+ outputs=["text"],
287
+ title="Domain Associated Geolocation Finder",
288
+ description="Retrieves the geolocation associated to an input ip address",
289
+ theme="default",
290
+ )
291
+
292
+ dns_enumeration_tool = gr.Interface(
293
+ fn=enumerate_dns,
294
+ inputs=["text"],
295
+ outputs=["text"],
296
+ title="DNS record enumerator of domains",
297
+ description="Retrieves several dns record types for the input domain names",
298
+ theme="default",
299
+ )
300
+
301
+ scrap_subdomains_tool = gr.Interface(
302
+ fn=scrap_subdomains_for_domain,
303
+ inputs=["text"],
304
+ outputs=["text"],
305
+ title="Subdomains Extractor of domains",
306
+ description="Retrieves the subdomains for the input domain if they are common",
307
+ theme="default",
308
+ )
309
+
310
+ extractor_of_ioc_from_threatfox_tool = gr.Interface(
311
+ fn=retrieve_ioc_from_threatfox,
312
+ inputs=["text"],
313
+ outputs=["text"],
314
+ title="IoC information extractor associated to particular entities",
315
+ description=(
316
+ "If information as an Indicator of Compromise (IoC) exists"
317
+ "for the input url, domain or hash, it retrieves it"
318
+ ),
319
+ theme="default",
320
+ )