PodMagic / app.py
Ribot's picture
Update app.py
88da9f3 verified
raw
history blame
2.3 kB
import gradio as gr
import requests
from bs4 import BeautifulSoup
import os
import zipfile
import tempfile
from urllib.parse import urljoin
def process_url(url):
try:
response = requests.get(url)
response.raise_for_status()
except requests.RequestException as e:
return None, f"Erreur lors de la récupération de la page : {e}"
soup = BeautifulSoup(response.text, 'html.parser')
mp3_links = []
for link in soup.find_all('a', href=True):
href = link['href']
if href.lower().endswith('.mp3'):
absolute_url = urljoin(response.url, href)
mp3_links.append(absolute_url)
# Supprimer les doublons en conservant l'ordre
seen = set()
mp3_links = [x for x in mp3_links if not (x in seen or seen.add(x))]
if not mp3_links:
return None, "Aucun lien MP3 trouvé sur la page."
temp_dir = tempfile.mkdtemp()
filenames = []
for idx, mp3_url in enumerate(mp3_links, start=1):
try:
mp3_response = requests.get(mp3_url)
mp3_response.raise_for_status()
filename = os.path.join(temp_dir, f"{idx:02d}_{os.path.basename(mp3_url)}")
with open(filename, 'wb') as f:
f.write(mp3_response.content)
filenames.append(filename)
except requests.RequestException as e:
print(f"Erreur de téléchargement {mp3_url}: {e}")
continue
if not filenames:
return None, "Aucun épisode téléchargé."
zip_filename = os.path.join(temp_dir, 'podcast_episodes.zip')
with zipfile.ZipFile(zip_filename, 'w') as zipf:
for file in filenames:
zipf.write(file, arcname=os.path.basename(file))
return zip_filename, None
def download_podcast(url):
zip_path, error = process_url(url)
if error:
raise gr.Error(error)
return zip_path
iface = gr.Interface(
fn=download_podcast,
inputs=gr.Textbox(label="URL de la page du podcast", placeholder="https://www.radiofrance.fr/..."),
outputs=gr.File(label="Télécharger le ZIP des épisodes"),
title="Téléchargeur de Podcast",
description="Entrez l'URL d'une page contenant des épisodes de podcast pour télécharger tous les MP3 dans un ZIP ordonné."
)
iface.launch()