File size: 4,501 Bytes
eeae908 734bffb c77e282 734bffb 6ca2249 88da9f3 6ca2249 31f35d4 52a320c 31f35d4 52a320c 31f35d4 52a320c 31f35d4 96682d9 88da9f3 2a11a1f 31f35d4 88da9f3 c77e282 734bffb 31f35d4 52a320c 31f35d4 0a6bdb5 52a320c 31f35d4 2a11a1f 31f35d4 2a11a1f 52a320c 31f35d4 734bffb 88da9f3 0a6bdb5 52a320c 6ca2249 31f35d4 0a6bdb5 31f35d4 0a6bdb5 52a320c 0a6bdb5 2a11a1f 52a320c 31f35d4 52a320c 0a6bdb5 88da9f3 734bffb 88da9f3 31f35d4 734bffb 31f35d4 0a6bdb5 88da9f3 6ca2249 734bffb 0a6bdb5 734bffb 88da9f3 734bffb 31f35d4 09d051e 31f35d4 2a11a1f 09d051e 31f35d4 09d051e 31f35d4 09d051e 2a11a1f 09d051e 2a11a1f 09d051e 11fd592 31f35d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
import requests
import re
import os
import zipfile
import tempfile
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, TIT3
def get_clean_title(filepath):
try:
audio = MP3(filepath, ID3=ID3)
# Utilisation de TIT3 (Subtitle) qui contient souvent le numéro d'épisode
if 'TIT3' in audio:
title = audio['TIT3'].text[0]
elif 'TIT2' in audio:
title = audio['TIT2'].text[0]
else:
return os.path.basename(filepath)
# Nettoyage avancé du titre
title = re.sub(r'\s*-\s*Radio France$', '', title, flags=re.IGNORECASE)
title = re.sub(r'[\\/*?:"<>|]', '', title).strip()
return title[:100] # Limite la longueur du nom de fichier
except Exception:
return os.path.basename(filepath).split('.')[0]
def process_url(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'https://www.radiofrance.fr/'
}
response = requests.get(url, headers=headers, timeout=15)
response.raise_for_status()
except Exception as e:
return None, f"Erreur de connexion : {str(e)}"
# Détection des épisodes avec BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
main_content = soup.find('main') or soup
episodes = main_content.find_all(['article', 'div'], class_=re.compile(r'episode|podcast|card', re.IGNORECASE))
mp3_links = []
for episode in episodes:
script_tag = episode.find('script', type='application/ld+json')
if script_tag:
match = re.search(r'"contentUrl"\s*:\s*"([^"]+?\.mp3)', script_tag.string)
if match:
mp3_url = urljoin(url, match.group(1).split('?')[0]
if mp3_url not in mp3_links:
mp3_links.append(mp3_url)
# Alternative si détection échoue
if not mp3_links:
matches = re.findall(r'(https://media\.radiofrance-podcast\.net[^"\']+?\.mp3)', response.text)
mp3_links = list(dict.fromkeys(matches))
if not mp3_links:
return None, "Aucun épisode trouvé - Vérifiez l'URL"
temp_dir = tempfile.mkdtemp()
filenames = []
for idx, mp3_url in enumerate(mp3_links, 1):
try:
# Téléchargement temporaire
temp_name = f"temp_{idx}.mp3"
temp_path = os.path.join(temp_dir, temp_name)
with requests.get(mp3_url, headers=headers, stream=True, timeout=20) as r:
r.raise_for_status()
with open(temp_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# Renommage final
clean_title = get_clean_title(temp_path)
final_name = f"{idx:02d} - {clean_title}.mp3"
final_path = os.path.join(temp_dir, final_name)
os.rename(temp_path, final_path)
filenames.append(final_path)
except Exception as e:
continue
if not filenames:
return None, "Échec du téléchargement"
# Création ZIP
zip_path = os.path.join(temp_dir, 'podcast.zip')
with zipfile.ZipFile(zip_path, 'w') as zipf:
for file in filenames:
zipf.write(file, arcname=os.path.basename(file))
return zip_path, None
def download_podcast(url):
zip_path, error = process_url(url)
if error:
raise gr.Error(error)
return zip_path
with gr.Blocks(title="RadioFrance Podcaster Pro") as app:
gr.Markdown("## 🎧 Téléchargeur Intelligent Radio France")
with gr.Row():
url_input = gr.Textbox(
label="URL de la série podcast",
placeholder="Ex: https://www.radiofrance.fr/...",
max_lines=1
)
btn = gr.Button("Générer le ZIP", variant="primary")
output = gr.File(label="Épisodes téléchargés")
examples = gr.Examples(
examples=[[
"https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin"
]],
inputs=[url_input]
)
btn.click(
fn=download_podcast,
inputs=url_input,
outputs=output,
api_name="download"
)
app.launch(show_error=True) |