File size: 5,342 Bytes
f788bbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import time
import json
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

def extract_m3u8_urls(url, wait_time=10, headers=None):
    """
    Captura URLs m3u8 de una página web y las devuelve directamente.
    
    Args:
        url (str): URL de la página a analizar
        wait_time (int): Tiempo de espera en segundos
        headers (dict): Headers personalizados
        
    Returns:
        list: Lista de URLs m3u8 encontradas
    """
    # Configurar opciones de Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    
    # Agregar headers personalizados
    if headers:
        for key, value in headers.items():
            if key.lower() == 'user-agent':
                chrome_options.add_argument(f"--user-agent={value}")
            else:
                chrome_options.add_argument(f"--header={key}: {value}")
    
    # Habilitar registro de red
    chrome_options.set_capability("goog:loggingPrefs", {"performance": "ALL"})
    
    # Iniciar navegador silenciosamente
    service = Service()
    driver = webdriver.Chrome(service=service, options=chrome_options)
    
    try:
        # Navegar a la URL
        driver.get(url)
        
        # Aplicar headers a futuras peticiones XHR y fetch
        if headers:
            header_script = """
            // Headers para XHR
            (function(open) {
                XMLHttpRequest.prototype.open = function(method, url) {
                    var xhr = this;
                    var args = arguments;
                    open.apply(xhr, args);
            """
            
            for key, value in headers.items():
                if key.lower() != 'user-agent':
                    header_script += f'xhr.setRequestHeader("{key}", "{value}");'
            
            header_script += """
                };
            })(XMLHttpRequest.prototype.open);
            
            // Headers para fetch
            (function(fetch) {
                window.fetch = function(url, options) {
                    options = options || {};
                    options.headers = options.headers || {};
            """
            
            for key, value in headers.items():
                header_script += f'options.headers["{key}"] = "{value}";'
            
            header_script += """
                    return fetch.call(this, url, options);
                };
            })(window.fetch);
            """
            
            driver.execute_script(header_script)
        
        # Esperar a que la página cargue completamente
        time.sleep(wait_time)
        
        # Colección para todas las URLs m3u8
        m3u8_urls = set()
        
        # 1. Buscar en peticiones de red
        logs = driver.get_log("performance")
        for log in logs:
            try:
                log_entry = json.loads(log["message"])["message"]
                
                # Filtrar peticiones de red
                if "Network.responseReceived" in log_entry["method"] or "Network.requestWillBeSent" in log_entry["method"]:
                    if "request" in log_entry["params"] and "url" in log_entry["params"]["request"]:
                        url = log_entry["params"]["request"]["url"]
                        if ".m3u8" in url:
                            m3u8_urls.add(url)
                    
                    # También buscar en respuestas
                    elif "response" in log_entry["params"] and "url" in log_entry["params"]["response"]:
                        url = log_entry["params"]["response"]["url"]
                        if ".m3u8" in url:
                            m3u8_urls.add(url)
            except:
                continue
        
        # 2. Buscar en el contenido de la página y scripts
        m3u8_pattern = re.compile(r'https?://[^"\'\s]+\.m3u8[^"\'\s]*')
        
        # En el HTML
        for match in m3u8_pattern.finditer(driver.page_source):
            m3u8_urls.add(match.group(0))
        
        # En scripts
        scripts = driver.find_elements("tag name", "script")
        for script in scripts:
            content = script.get_attribute("innerHTML")
            for match in m3u8_pattern.finditer(content):
                m3u8_urls.add(match.group(0))
        
        # 3. Buscar en variables JavaScript
        js_variables = driver.execute_script("""
            var allVars = {};
            for (var key in window) {
                try {
                    if (typeof window[key] === 'string' && window[key].includes('.m3u8')) {
                        allVars[key] = window[key];
                    }
                } catch(e) {}
            }
            return allVars;
        """)
        
        for key, value in js_variables.items():
            for match in m3u8_pattern.finditer(value):
                m3u8_urls.add(match.group(0))
        
        return list(m3u8_urls)
    
    finally:
        # Cerrar navegador
        driver.quit()