import gradio as gr import requests import re import os import zipfile import tempfile from urllib.parse import urljoin def process_url(url): try: headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'} response = requests.get(url, headers=headers) response.raise_for_status() except Exception as e: return None, f"Erreur de connexion : {str(e)}" # Recherche approfondie dans les scripts mp3_links = [] pattern = r'(?:contentUrl|url)"\s*:\s*"([^"]+?\.mp3)(?:[^"]*)"' # Capture les URLs .mp3 dans les objets JS matches = re.findall(pattern, response.text) for match in matches: # Nettoyage de l'URL clean_url = match.split('";')[0] if '";' in match else match absolute_url = urljoin(response.url, clean_url) mp3_links.append(absolute_url) # Dédoublonnage tout en conservant l'ordre seen = set() mp3_links = [x for x in mp3_links if not (x in seen or seen.add(x))] if not mp3_links: return None, "Aucun lien MP3 détecté dans le code source" # Téléchargement des fichiers temp_dir = tempfile.mkdtemp() filenames = [] for idx, mp3_url in enumerate(mp3_links, 1): try: filename = f"{idx:02d}_{os.path.basename(mp3_url).split('?')[0]}" filepath = os.path.join(temp_dir, filename) with requests.get(mp3_url, headers=headers, stream=True) as r: r.raise_for_status() with open(filepath, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): if chunk: f.write(chunk) filenames.append(filepath) except Exception as e: print(f"Échec du téléchargement {mp3_url} : {str(e)}") continue if not filenames: return None, "Échec de tous les téléchargements" # Création du ZIP zip_path = os.path.join(temp_dir, 'episodes.zip') with zipfile.ZipFile(zip_path, 'w') as zipf: for file in filenames: zipf.write(file, arcname=os.path.basename(file)) return zip_path, None def download_podcast(url): zip_path, error = process_url(url) if error: raise gr.Error(error) return zip_path iface = gr.Interface( fn=download_podcast, inputs=gr.Textbox(label="URL Radio France", placeholder="Collez l'URL ici..."), outputs=gr.File(label="Épisodes téléchargés"), examples=[[ "https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin" ]], title="Extracteur MP3 Radio France", description="Téléchargez les épisodes MP3 des podcasts Radio France directement depuis l'URL de la série" ) iface.launch()