|
import gradio as gr |
|
import requests |
|
from bs4 import BeautifulSoup |
|
import re |
|
import os |
|
from urllib.parse import urlparse |
|
from zipfile import ZipFile |
|
|
|
def extract_podcast_episodes(url): |
|
headers = {"User-Agent": "Mozilla/5.0"} |
|
response = requests.get(url, headers=headers) |
|
response.raise_for_status() |
|
|
|
soup = BeautifulSoup(response.text, "html.parser") |
|
|
|
|
|
main_title = soup.find("h1").get_text(strip=True).lower() |
|
|
|
|
|
episodes = [] |
|
for article in soup.select("article"): |
|
title_tag = article.find("h3") |
|
if not title_tag: |
|
continue |
|
title = title_tag.get_text(strip=True) |
|
if not any(word in title.lower() for word in main_title.split()): |
|
continue |
|
|
|
|
|
mp3_url = None |
|
for a in article.find_all("a", href=True): |
|
if a["href"].endswith(".mp3"): |
|
mp3_url = a["href"] |
|
break |
|
|
|
if mp3_url: |
|
episodes.append({"title": title, "url": mp3_url}) |
|
|
|
return episodes |
|
|
|
def download_and_zip_episodes(url): |
|
episodes = extract_podcast_episodes(url) |
|
|
|
if not episodes: |
|
return "Aucun épisode valide trouvé", None |
|
|
|
os.makedirs("downloads", exist_ok=True) |
|
zip_filename = "episodes_radiofrance.zip" |
|
zip_path = os.path.join("downloads", zip_filename) |
|
|
|
with ZipFile(zip_path, "w") as zipf: |
|
for i, episode in enumerate(episodes, start=1): |
|
mp3_url = episode["url"] |
|
title = episode["title"] |
|
ext = os.path.splitext(urlparse(mp3_url).path)[1] |
|
filename = f"{i:02d} - {title}{ext}".replace("/", "_") |
|
|
|
try: |
|
mp3_response = requests.get(mp3_url, stream=True) |
|
mp3_response.raise_for_status() |
|
local_path = os.path.join("downloads", filename) |
|
with open(local_path, "wb") as f: |
|
for chunk in mp3_response.iter_content(1024): |
|
f.write(chunk) |
|
zipf.write(local_path, arcname=filename) |
|
os.remove(local_path) |
|
except Exception as e: |
|
print(f"Erreur lors du téléchargement de {mp3_url}: {e}") |
|
|
|
return f"{len(episodes)} épisode(s) téléchargé(s)", zip_path |
|
|
|
iface = gr.Interface( |
|
fn=download_and_zip_episodes, |
|
inputs=gr.Textbox(label="URL de la page podcast de France Culture"), |
|
outputs=[ |
|
gr.Text(label="Résultat"), |
|
gr.File(label="Fichier ZIP") |
|
], |
|
title="Téléchargeur de Podcast France Culture", |
|
description="Saisissez l’URL d’une série sur France Culture (ex: https://www.radiofrance.fr/franceculture/podcasts/...) pour télécharger uniquement les bons épisodes listés sur la page." |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|