File size: 2,931 Bytes
eeae908 734bffb eeae908 734bffb eeae908 96682d9 eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb eeae908 734bffb 11fd592 eeae908 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
import re
import os
from urllib.parse import urlparse
from zipfile import ZipFile
def extract_podcast_episodes(url):
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# Titre principal du podcast (pour filtrer les titres)
main_title = soup.find("h1").get_text(strip=True).lower()
# Section contenant les épisodes listés explicitement
episodes = []
for article in soup.select("article"):
title_tag = article.find("h3")
if not title_tag:
continue
title = title_tag.get_text(strip=True)
if not any(word in title.lower() for word in main_title.split()):
continue
# Cherche lien MP3 directement dans l'article
mp3_url = None
for a in article.find_all("a", href=True):
if a["href"].endswith(".mp3"):
mp3_url = a["href"]
break
if mp3_url:
episodes.append({"title": title, "url": mp3_url})
return episodes
def download_and_zip_episodes(url):
episodes = extract_podcast_episodes(url)
if not episodes:
return "Aucun épisode valide trouvé", None
os.makedirs("downloads", exist_ok=True)
zip_filename = "episodes_radiofrance.zip"
zip_path = os.path.join("downloads", zip_filename)
with ZipFile(zip_path, "w") as zipf:
for i, episode in enumerate(episodes, start=1):
mp3_url = episode["url"]
title = episode["title"]
ext = os.path.splitext(urlparse(mp3_url).path)[1]
filename = f"{i:02d} - {title}{ext}".replace("/", "_")
try:
mp3_response = requests.get(mp3_url, stream=True)
mp3_response.raise_for_status()
local_path = os.path.join("downloads", filename)
with open(local_path, "wb") as f:
for chunk in mp3_response.iter_content(1024):
f.write(chunk)
zipf.write(local_path, arcname=filename)
os.remove(local_path)
except Exception as e:
print(f"Erreur lors du téléchargement de {mp3_url}: {e}")
return f"{len(episodes)} épisode(s) téléchargé(s)", zip_path
iface = gr.Interface(
fn=download_and_zip_episodes,
inputs=gr.Textbox(label="URL de la page podcast de France Culture"),
outputs=[
gr.Text(label="Résultat"),
gr.File(label="Fichier ZIP")
],
title="Téléchargeur de Podcast France Culture",
description="Saisissez l’URL d’une série sur France Culture (ex: https://www.radiofrance.fr/franceculture/podcasts/...) pour télécharger uniquement les bons épisodes listés sur la page."
)
if __name__ == "__main__":
iface.launch()
|