PodMagic / app.py
Ribot's picture
Update app.py
fea2cb5 verified
raw
history blame
3.65 kB
import gradio as gr
import requests
import re
import os
import zipfile
import tempfile
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from mutagen.mp3 import MP3
from mutagen.id3 import ID3
def get_clean_title(filepath):
try:
audio = MP3(filepath, ID3=ID3)
for tag in ['TIT2', 'TIT3', 'TALB']:
if tag in audio:
title = audio[tag].text[0]
title = re.sub(r'[\\/*?:"<>|]', '', title).strip()
return title[:100]
return os.path.basename(filepath).split('.')[0]
except Exception:
return os.path.basename(filepath).split('.')[0]
def process_url(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
response = requests.get(url, headers=headers, timeout=15)
response.raise_for_status()
except Exception as e:
return None, f"Erreur : {str(e)}"
soup = BeautifulSoup(response.text, 'html.parser')
mp3_links = []
# Méthode 1 : Balises JSON-LD
for script in soup.find_all('script', type='application/ld+json'):
if script.string:
matches = re.findall(r'"contentUrl"\s*:\s*"([^"]+?\.mp3)', script.string)
for match in matches:
clean_url = urljoin(url, match.split('?')[0])
if clean_url not in mp3_links:
mp3_links.append(clean_url)
# Méthode 2 : Fallback HTML
if not mp3_links:
for a in soup.find_all('a', href=re.compile(r'\.mp3')):
mp3_url = urljoin(url, a['href'].split('?')[0])
if mp3_url not in mp3_links:
mp3_links.append(mp3_url)
if not mp3_links:
return None, "Aucun épisode trouvé"
temp_dir = tempfile.mkdtemp()
filenames = []
for idx, mp3_url in enumerate(mp3_links, 1):
try:
temp_path = os.path.join(temp_dir, f"temp_{idx}.mp3")
# Téléchargement
with requests.get(mp3_url, headers=headers, stream=True, timeout=20) as r:
r.raise_for_status()
with open(temp_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# Renommage
title = get_clean_title(temp_path)
final_name = f"{idx:02d} - {title}.mp3"
final_path = os.path.join(temp_dir, final_name)
os.rename(temp_path, final_path)
filenames.append(final_path)
except Exception:
continue
if not filenames:
return None, "Échec des téléchargements"
zip_path = os.path.join(temp_dir, 'podcast.zip')
with zipfile.ZipFile(zip_path, 'w') as zipf:
for file in filenames:
zipf.write(file, arcname=os.path.basename(file))
return zip_path, None
def download_podcast(url):
zip_path, error = process_url(url)
if error:
raise gr.Error(error)
return zip_path
with gr.Blocks() as app:
gr.Markdown("## 🎙️ Téléchargeur Radio France")
with gr.Row():
url = gr.Textbox(label="URL de la série", placeholder="Collez l'URL ici...")
btn = gr.Button("Télécharger", variant="primary")
output = gr.File(label="Épisodes")
examples = gr.Examples(
examples=[[
"https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin"
]],
inputs=[url]
)
btn.click(download_podcast, inputs=url, outputs=output)
app.launch()