import os import re import sys import subprocess import importlib.util import shutil import tempfile # === INSTALLATION AUTOMATIQUE DES DÉPENDANCES === def install_if_missing(package_name, import_name=None): import_name = import_name or package_name if importlib.util.find_spec(import_name) is None: print(f"Installation de {package_name}...") subprocess.check_call([sys.executable, "-m", "pip", "install", package_name]) for package in [ ("requests",), ("bs4", "bs4"), ("gradio",), ]: install_if_missing(*package) # === IMPORTS === import requests import gradio as gr from bs4 import BeautifulSoup # === UTILITAIRES === def slugify(text, max_length=50): text = text.lower() text = re.sub(r'[^\w\s-]', '', text) text = re.sub(r'[-\s]+', '_', text) return text[:max_length].strip('_') def get_episode_links(main_url): """Récupère toutes les URL des pages d’épisodes depuis la page principale.""" response = requests.get(main_url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') links = [] for a in soup.find_all('a', href=True): href = a['href'] if "/podcasts/" in href and href != main_url: full_url = href if href.startswith("http") else f"https://www.radiofrance.fr{href}" if full_url not in links: links.append(full_url) return list(dict.fromkeys(links)) # dédoublonner def extract_mp3_from_episode(url): """Extrait le lien MP3 d’un épisode.""" try: response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') audio_tag = soup.find("audio") if audio_tag and audio_tag.get("src", "").endswith(".mp3"): return audio_tag["src"] except Exception: pass return None def get_podcast_title(url): """Extrait le titre général du podcast.""" try: response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') title_tag = soup.find('h1') or soup.find('title') return slugify(title_tag.get_text()) if title_tag else "podcast" except Exception: return "podcast" def download_and_zip_podcast_series(main_url): try: title = get_podcast_title(main_url) episode_pages = get_episode_links(main_url) if not episode_pages: return "Aucune page d’épisode trouvée.", None mp3_links = [] for ep_url in episode_pages: mp3 = extract_mp3_from_episode(ep_url) if mp3: mp3_links.append(mp3) if not mp3_links: return "Aucun fichier MP3 trouvé dans les épisodes.", None temp_dir = tempfile.mkdtemp() for i, mp3_url in enumerate(mp3_links, start=1): filename = f"{title}_{i:02}.mp3" filepath = os.path.join(temp_dir, filename) with requests.get(mp3_url, stream=True) as r: r.raise_for_status() with open(filepath, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) zip_path = os.path.join(temp_dir, f"{title}.zip") shutil.make_archive(zip_path.replace('.zip', ''), 'zip', temp_dir) return f"{len(mp3_links)} épisode(s) téléchargé(s).", zip_path except Exception as e: return f"Erreur : {str(e)}", None # === INTERFACE GRADIO === with gr.Blocks() as app: gr.Markdown("# Téléchargeur de Podcasts MP3 - France Culture") with gr.Row(): url_input = gr.Textbox(label="URL de la série", placeholder="https://www.radiofrance.fr/...") download_button = gr.Button("Télécharger et compresser") output_text = gr.Textbox(label="Message") file_output = gr.File(label="Fichier ZIP", file_types=[".zip"]) def process(url): message, zip_file = download_and_zip_podcast_series(url) return message, zip_file download_button.click(fn=process, inputs=[url_input], outputs=[output_text, file_output]) # === LANCEMENT === if __name__ == "__main__": app.launch(share=True)