import gradio as gr import requests import re import os import zipfile import tempfile from urllib.parse import urljoin def process_url(url): try: headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Referer': 'https://www.radiofrance.fr/' } response = requests.get(url, headers=headers) response.raise_for_status() except Exception as e: return None, f"Erreur de connexion : {str(e)}" # Nouvelle méthode de détection des MP3 mp3_links = [] # 1. Recherche dans les balises script scripts = re.findall(r'(.*?)', response.text, re.DOTALL) for script in scripts: matches = re.findall(r'(https://[^\s"\']+?\.mp3)', script) mp3_links.extend(matches) # 2. Recherche dans les attributs HTML html_matches = re.findall(r'(?:href|src|rl|contentUrl)\s*=\s*["\'](.*?\.mp3.*?)["\']', response.text) mp3_links.extend([urljoin(url, m.split('";')[0]) for m in html_matches]) # 3. Suppression des paramètres et dédoublonnage clean_links = [] seen = set() for link in mp3_links: clean = link.split('?')[0].split('";')[0] if clean not in seen: seen.add(clean) clean_links.append(clean) if not clean_links: return None, "Aucun MP3 trouvé - Essayez avec l'URL complète d'une série" # Téléchargement temp_dir = tempfile.mkdtemp() filenames = [] for idx, mp3_url in enumerate(clean_links, 1): try: filename = f"{idx:02d}_{os.path.basename(mp3_url)}" filepath = os.path.join(temp_dir, filename) with requests.get(mp3_url, headers=headers, stream=True, timeout=10) as r: r.raise_for_status() with open(filepath, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) filenames.append(filepath) except Exception as e: continue if not filenames: return None, "Tous les téléchargements ont échoué" # Création du ZIP zip_path = os.path.join(temp_dir, 'radiofrance_podcast.zip') with zipfile.ZipFile(zip_path, 'w') as zipf: for file in filenames: zipf.write(file, arcname=os.path.basename(file)) return zip_path, None def download_podcast(url): zip_path, error = process_url(url) if error: raise gr.Error(error) return zip_path with gr.Blocks(title="RadioFrance Podcaster") as app: gr.Markdown("## 🎧 Téléchargement de podcasts Radio France") with gr.Row(): url_input = gr.Textbox( label="URL de la série podcast", placeholder="Ex: https://www.radiofrance.fr/.../mon-podcast", max_lines=1 ) btn = gr.Button("Télécharger les épisodes", variant="primary") output = gr.File(label="Fichier ZIP contenant les MP3") examples = gr.Examples( examples=[[ "https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin" ]], inputs=[url_input], label="Exemple fonctionnel" ) btn.click( fn=download_podcast, inputs=url_input, outputs=output, api_name="download" ) app.launch(show_error=True, share=False)