iptvrd commited on
Commit
f788bbf
verified
1 Parent(s): 3af868f

Create extract_m3u8.py

Browse files
Files changed (1) hide show
  1. extract_m3u8.py +147 -0
extract_m3u8.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import json
3
+ import re
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.options import Options
6
+ from selenium.webdriver.chrome.service import Service
7
+ from webdriver_manager.chrome import ChromeDriverManager
8
+
9
+ def extract_m3u8_urls(url, wait_time=10, headers=None):
10
+ """
11
+ Captura URLs m3u8 de una p谩gina web y las devuelve directamente.
12
+
13
+ Args:
14
+ url (str): URL de la p谩gina a analizar
15
+ wait_time (int): Tiempo de espera en segundos
16
+ headers (dict): Headers personalizados
17
+
18
+ Returns:
19
+ list: Lista de URLs m3u8 encontradas
20
+ """
21
+ # Configurar opciones de Chrome
22
+ chrome_options = Options()
23
+ chrome_options.add_argument("--headless")
24
+ chrome_options.add_argument("--disable-gpu")
25
+ chrome_options.add_argument("--window-size=1920x1080")
26
+ chrome_options.add_argument("--no-sandbox")
27
+ chrome_options.add_argument("--disable-dev-shm-usage")
28
+
29
+ # Agregar headers personalizados
30
+ if headers:
31
+ for key, value in headers.items():
32
+ if key.lower() == 'user-agent':
33
+ chrome_options.add_argument(f"--user-agent={value}")
34
+ else:
35
+ chrome_options.add_argument(f"--header={key}: {value}")
36
+
37
+ # Habilitar registro de red
38
+ chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
39
+
40
+ # Iniciar navegador silenciosamente
41
+ service = Service()
42
+ driver = webdriver.Chrome(service=service, options=chrome_options)
43
+
44
+ try:
45
+ # Navegar a la URL
46
+ driver.get(url)
47
+
48
+ # Aplicar headers a futuras peticiones XHR y fetch
49
+ if headers:
50
+ header_script = """
51
+ // Headers para XHR
52
+ (function(open) {
53
+ XMLHttpRequest.prototype.open = function(method, url) {
54
+ var xhr = this;
55
+ var args = arguments;
56
+ open.apply(xhr, args);
57
+ """
58
+
59
+ for key, value in headers.items():
60
+ if key.lower() != 'user-agent':
61
+ header_script += f'xhr.setRequestHeader("{key}", "{value}");'
62
+
63
+ header_script += """
64
+ };
65
+ })(XMLHttpRequest.prototype.open);
66
+
67
+ // Headers para fetch
68
+ (function(fetch) {
69
+ window.fetch = function(url, options) {
70
+ options = options || {};
71
+ options.headers = options.headers || {};
72
+ """
73
+
74
+ for key, value in headers.items():
75
+ header_script += f'options.headers["{key}"] = "{value}";'
76
+
77
+ header_script += """
78
+ return fetch.call(this, url, options);
79
+ };
80
+ })(window.fetch);
81
+ """
82
+
83
+ driver.execute_script(header_script)
84
+
85
+ # Esperar a que la p谩gina cargue completamente
86
+ time.sleep(wait_time)
87
+
88
+ # Colecci贸n para todas las URLs m3u8
89
+ m3u8_urls = set()
90
+
91
+ # 1. Buscar en peticiones de red
92
+ logs = driver.get_log("performance")
93
+ for log in logs:
94
+ try:
95
+ log_entry = json.loads(log["message"])["message"]
96
+
97
+ # Filtrar peticiones de red
98
+ if "Network.responseReceived" in log_entry["method"] or "Network.requestWillBeSent" in log_entry["method"]:
99
+ if "request" in log_entry["params"] and "url" in log_entry["params"]["request"]:
100
+ url = log_entry["params"]["request"]["url"]
101
+ if ".m3u8" in url:
102
+ m3u8_urls.add(url)
103
+
104
+ # Tambi茅n buscar en respuestas
105
+ elif "response" in log_entry["params"] and "url" in log_entry["params"]["response"]:
106
+ url = log_entry["params"]["response"]["url"]
107
+ if ".m3u8" in url:
108
+ m3u8_urls.add(url)
109
+ except:
110
+ continue
111
+
112
+ # 2. Buscar en el contenido de la p谩gina y scripts
113
+ m3u8_pattern = re.compile(r'https?://[^"\'\s]+\.m3u8[^"\'\s]*')
114
+
115
+ # En el HTML
116
+ for match in m3u8_pattern.finditer(driver.page_source):
117
+ m3u8_urls.add(match.group(0))
118
+
119
+ # En scripts
120
+ scripts = driver.find_elements("tag name", "script")
121
+ for script in scripts:
122
+ content = script.get_attribute("innerHTML")
123
+ for match in m3u8_pattern.finditer(content):
124
+ m3u8_urls.add(match.group(0))
125
+
126
+ # 3. Buscar en variables JavaScript
127
+ js_variables = driver.execute_script("""
128
+ var allVars = {};
129
+ for (var key in window) {
130
+ try {
131
+ if (typeof window[key] === 'string' && window[key].includes('.m3u8')) {
132
+ allVars[key] = window[key];
133
+ }
134
+ } catch(e) {}
135
+ }
136
+ return allVars;
137
+ """)
138
+
139
+ for key, value in js_variables.items():
140
+ for match in m3u8_pattern.finditer(value):
141
+ m3u8_urls.add(match.group(0))
142
+
143
+ return list(m3u8_urls)
144
+
145
+ finally:
146
+ # Cerrar navegador
147
+ driver.quit()