File size: 3,650 Bytes
eeae908 734bffb c77e282 734bffb 6ca2249 88da9f3 6ca2249 31f35d4 52a320c fea2cb5 52a320c fea2cb5 31f35d4 96682d9 88da9f3 fea2cb5 31f35d4 88da9f3 c77e282 fea2cb5 734bffb 52a320c fea2cb5 2a11a1f fea2cb5 31f35d4 fea2cb5 2a11a1f 52a320c fea2cb5 734bffb 88da9f3 0a6bdb5 52a320c 6ca2249 fea2cb5 0a6bdb5 fea2cb5 31f35d4 0a6bdb5 52a320c 0a6bdb5 2a11a1f fea2cb5 52a320c fea2cb5 88da9f3 734bffb 88da9f3 fea2cb5 734bffb 31f35d4 0a6bdb5 88da9f3 6ca2249 734bffb 0a6bdb5 734bffb 88da9f3 734bffb fea2cb5 09d051e fea2cb5 09d051e fea2cb5 09d051e 2a11a1f fea2cb5 11fd592 fea2cb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
import requests
import re
import os
import zipfile
import tempfile
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from mutagen.mp3 import MP3
from mutagen.id3 import ID3
def get_clean_title(filepath):
try:
audio = MP3(filepath, ID3=ID3)
for tag in ['TIT2', 'TIT3', 'TALB']:
if tag in audio:
title = audio[tag].text[0]
title = re.sub(r'[\\/*?:"<>|]', '', title).strip()
return title[:100]
return os.path.basename(filepath).split('.')[0]
except Exception:
return os.path.basename(filepath).split('.')[0]
def process_url(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
response = requests.get(url, headers=headers, timeout=15)
response.raise_for_status()
except Exception as e:
return None, f"Erreur : {str(e)}"
soup = BeautifulSoup(response.text, 'html.parser')
mp3_links = []
# Méthode 1 : Balises JSON-LD
for script in soup.find_all('script', type='application/ld+json'):
if script.string:
matches = re.findall(r'"contentUrl"\s*:\s*"([^"]+?\.mp3)', script.string)
for match in matches:
clean_url = urljoin(url, match.split('?')[0])
if clean_url not in mp3_links:
mp3_links.append(clean_url)
# Méthode 2 : Fallback HTML
if not mp3_links:
for a in soup.find_all('a', href=re.compile(r'\.mp3')):
mp3_url = urljoin(url, a['href'].split('?')[0])
if mp3_url not in mp3_links:
mp3_links.append(mp3_url)
if not mp3_links:
return None, "Aucun épisode trouvé"
temp_dir = tempfile.mkdtemp()
filenames = []
for idx, mp3_url in enumerate(mp3_links, 1):
try:
temp_path = os.path.join(temp_dir, f"temp_{idx}.mp3")
# Téléchargement
with requests.get(mp3_url, headers=headers, stream=True, timeout=20) as r:
r.raise_for_status()
with open(temp_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# Renommage
title = get_clean_title(temp_path)
final_name = f"{idx:02d} - {title}.mp3"
final_path = os.path.join(temp_dir, final_name)
os.rename(temp_path, final_path)
filenames.append(final_path)
except Exception:
continue
if not filenames:
return None, "Échec des téléchargements"
zip_path = os.path.join(temp_dir, 'podcast.zip')
with zipfile.ZipFile(zip_path, 'w') as zipf:
for file in filenames:
zipf.write(file, arcname=os.path.basename(file))
return zip_path, None
def download_podcast(url):
zip_path, error = process_url(url)
if error:
raise gr.Error(error)
return zip_path
with gr.Blocks() as app:
gr.Markdown("## 🎙️ Téléchargeur Radio France")
with gr.Row():
url = gr.Textbox(label="URL de la série", placeholder="Collez l'URL ici...")
btn = gr.Button("Télécharger", variant="primary")
output = gr.File(label="Épisodes")
examples = gr.Examples(
examples=[[
"https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin"
]],
inputs=[url]
)
btn.click(download_podcast, inputs=url, outputs=output)
app.launch() |