Spaces:
Sleeping
Sleeping
File size: 5,684 Bytes
f788bbf e7c470d f788bbf e7c470d f788bbf e7c470d f788bbf e7c470d f788bbf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import time
import json
import re
import os
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.core.os_manager import ChromeType
def extract_m3u8_urls(url, wait_time=10, headers=None):
"""
Captura URLs m3u8 de una página web y las devuelve directamente.
Args:
url (str): URL de la página a analizar
wait_time (int): Tiempo de espera en segundos
headers (dict): Headers personalizados
Returns:
list: Lista de URLs m3u8 encontradas
"""
# Configurar opciones de Chrome
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--window-size=1920x1080")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
# Agregar headers personalizados
if headers:
for key, value in headers.items():
if key.lower() == 'user-agent':
chrome_options.add_argument(f"--user-agent={value}")
else:
chrome_options.add_argument(f"--header={key}: {value}")
# Habilitar registro de red
chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
# Crear un directorio de caché en el directorio del usuario
cache_dir = os.path.join(os.path.expanduser("~"), ".wdm_cache")
os.makedirs(cache_dir, exist_ok=True)
# Iniciar navegador usando ChromeDriverManager con directorio de caché personalizado
service = Service(ChromeDriverManager(path=cache_dir).install())
driver = webdriver.Chrome(service=service, options=chrome_options)
try:
# Navegar a la URL
driver.get(url)
# Aplicar headers a futuras peticiones XHR y fetch
if headers:
header_script = """
// Headers para XHR
(function(open) {
XMLHttpRequest.prototype.open = function(method, url) {
var xhr = this;
var args = arguments;
open.apply(xhr, args);
"""
for key, value in headers.items():
if key.lower() != 'user-agent':
header_script += f'xhr.setRequestHeader("{key}", "{value}");'
header_script += """
};
})(XMLHttpRequest.prototype.open);
// Headers para fetch
(function(fetch) {
window.fetch = function(url, options) {
options = options || {};
options.headers = options.headers || {};
"""
for key, value in headers.items():
header_script += f'options.headers["{key}"] = "{value}";'
header_script += """
return fetch.call(this, url, options);
};
})(window.fetch);
"""
driver.execute_script(header_script)
# Esperar a que la página cargue completamente
time.sleep(wait_time)
# Colección para todas las URLs m3u8
m3u8_urls = set()
# 1. Buscar en peticiones de red
logs = driver.get_log("performance")
for log in logs:
try:
log_entry = json.loads(log["message"])["message"]
# Filtrar peticiones de red
if "Network.responseReceived" in log_entry["method"] or "Network.requestWillBeSent" in log_entry["method"]:
if "request" in log_entry["params"] and "url" in log_entry["params"]["request"]:
url = log_entry["params"]["request"]["url"]
if ".m3u8" in url:
m3u8_urls.add(url)
# También buscar en respuestas
elif "response" in log_entry["params"] and "url" in log_entry["params"]["response"]:
url = log_entry["params"]["response"]["url"]
if ".m3u8" in url:
m3u8_urls.add(url)
except:
continue
# 2. Buscar en el contenido de la página y scripts
m3u8_pattern = re.compile(r'https?://[^"\'\s]+\.m3u8[^"\'\s]*')
# En el HTML
for match in m3u8_pattern.finditer(driver.page_source):
m3u8_urls.add(match.group(0))
# En scripts
scripts = driver.find_elements("tag name", "script")
for script in scripts:
content = script.get_attribute("innerHTML")
for match in m3u8_pattern.finditer(content):
m3u8_urls.add(match.group(0))
# 3. Buscar en variables JavaScript
js_variables = driver.execute_script("""
var allVars = {};
for (var key in window) {
try {
if (typeof window[key] === 'string' && window[key].includes('.m3u8')) {
allVars[key] = window[key];
}
} catch(e) {}
}
return allVars;
""")
for key, value in js_variables.items():
for match in m3u8_pattern.finditer(value):
m3u8_urls.add(match.group(0))
return list(m3u8_urls)
finally:
# Cerrar navegador
driver.quit() |