import gradio as gr import requests from bs4 import BeautifulSoup import re import os from urllib.parse import urlparse from zipfile import ZipFile def extract_podcast_episodes(url): headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(url, headers=headers) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") # Titre principal du podcast (pour filtrer les titres) main_title = soup.find("h1").get_text(strip=True).lower() # Section contenant les épisodes listés explicitement episodes = [] for article in soup.select("article"): title_tag = article.find("h3") if not title_tag: continue title = title_tag.get_text(strip=True) if not any(word in title.lower() for word in main_title.split()): continue # Cherche lien MP3 directement dans l'article mp3_url = None for a in article.find_all("a", href=True): if a["href"].endswith(".mp3"): mp3_url = a["href"] break if mp3_url: episodes.append({"title": title, "url": mp3_url}) return episodes def download_and_zip_episodes(url): episodes = extract_podcast_episodes(url) if not episodes: return "Aucun épisode valide trouvé", None os.makedirs("downloads", exist_ok=True) zip_filename = "episodes_radiofrance.zip" zip_path = os.path.join("downloads", zip_filename) with ZipFile(zip_path, "w") as zipf: for i, episode in enumerate(episodes, start=1): mp3_url = episode["url"] title = episode["title"] ext = os.path.splitext(urlparse(mp3_url).path)[1] filename = f"{i:02d} - {title}{ext}".replace("/", "_") try: mp3_response = requests.get(mp3_url, stream=True) mp3_response.raise_for_status() local_path = os.path.join("downloads", filename) with open(local_path, "wb") as f: for chunk in mp3_response.iter_content(1024): f.write(chunk) zipf.write(local_path, arcname=filename) os.remove(local_path) except Exception as e: print(f"Erreur lors du téléchargement de {mp3_url}: {e}") return f"{len(episodes)} épisode(s) téléchargé(s)", zip_path iface = gr.Interface( fn=download_and_zip_episodes, inputs=gr.Textbox(label="URL de la page podcast de France Culture"), outputs=[ gr.Text(label="Résultat"), gr.File(label="Fichier ZIP") ], title="Téléchargeur de Podcast France Culture", description="Saisissez l’URL d’une série sur France Culture (ex: https://www.radiofrance.fr/franceculture/podcasts/...) pour télécharger uniquement les bons épisodes listés sur la page." ) if __name__ == "__main__": iface.launch()