Spaces:

Ribot
/

PodMagic

Running

App Files Files Community

PodMagic / app.py

Ribot

Update app.py

31f35d4 verified 17 days ago

raw

history blame

4.5 kB

	import gradio as gr
	import requests
	import re
	import os
	import zipfile
	import tempfile
	from urllib.parse import urljoin
	from bs4 import BeautifulSoup
	from mutagen.mp3 import MP3
	from mutagen.id3 import ID3, TIT3

	def get_clean_title(filepath):
	try:
	audio = MP3(filepath, ID3=ID3)
	# Utilisation de TIT3 (Subtitle) qui contient souvent le numéro d'épisode
	if 'TIT3' in audio:
	title = audio['TIT3'].text[0]
	elif 'TIT2' in audio:
	title = audio['TIT2'].text[0]
	else:
	return os.path.basename(filepath)

	# Nettoyage avancé du titre
	title = re.sub(r'\s-\sRadio France$', '', title, flags=re.IGNORECASE)
	title = re.sub(r'[\\/*?:"<>\|]', '', title).strip()
	return title[:100] # Limite la longueur du nom de fichier

	except Exception:
	return os.path.basename(filepath).split('.')[0]

	def process_url(url):
	try:
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
	'Referer': 'https://www.radiofrance.fr/'
	}
	response = requests.get(url, headers=headers, timeout=15)
	response.raise_for_status()
	except Exception as e:
	return None, f"Erreur de connexion : {str(e)}"

	# Détection des épisodes avec BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')
	main_content = soup.find('main') or soup
	episodes = main_content.find_all(['article', 'div'], class_=re.compile(r'episode\|podcast\|card', re.IGNORECASE))

	mp3_links = []
	for episode in episodes:
	script_tag = episode.find('script', type='application/ld+json')
	if script_tag:
	match = re.search(r'"contentUrl"\s:\s"([^"]+?\.mp3)', script_tag.string)
	if match:
	mp3_url = urljoin(url, match.group(1).split('?')[0]
	if mp3_url not in mp3_links:
	mp3_links.append(mp3_url)

	# Alternative si détection échoue
	if not mp3_links:
	matches = re.findall(r'(https://media\.radiofrance-podcast\.net[^"\']+?\.mp3)', response.text)
	mp3_links = list(dict.fromkeys(matches))

	if not mp3_links:
	return None, "Aucun épisode trouvé - Vérifiez l'URL"

	temp_dir = tempfile.mkdtemp()
	filenames = []

	for idx, mp3_url in enumerate(mp3_links, 1):
	try:
	# Téléchargement temporaire
	temp_name = f"temp_{idx}.mp3"
	temp_path = os.path.join(temp_dir, temp_name)

	with requests.get(mp3_url, headers=headers, stream=True, timeout=20) as r:
	r.raise_for_status()
	with open(temp_path, 'wb') as f:
	for chunk in r.iter_content(chunk_size=8192):
	f.write(chunk)

	# Renommage final
	clean_title = get_clean_title(temp_path)
	final_name = f"{idx:02d} - {clean_title}.mp3"
	final_path = os.path.join(temp_dir, final_name)
	os.rename(temp_path, final_path)

	filenames.append(final_path)
	except Exception as e:
	continue

	if not filenames:
	return None, "Échec du téléchargement"

	# Création ZIP
	zip_path = os.path.join(temp_dir, 'podcast.zip')
	with zipfile.ZipFile(zip_path, 'w') as zipf:
	for file in filenames:
	zipf.write(file, arcname=os.path.basename(file))

	return zip_path, None

	def download_podcast(url):
	zip_path, error = process_url(url)
	if error:
	raise gr.Error(error)
	return zip_path

	with gr.Blocks(title="RadioFrance Podcaster Pro") as app:
	gr.Markdown("## 🎧 Téléchargeur Intelligent Radio France")
	with gr.Row():
	url_input = gr.Textbox(
	label="URL de la série podcast",
	placeholder="Ex: https://www.radiofrance.fr/...",
	max_lines=1
	)
	btn = gr.Button("Générer le ZIP", variant="primary")
	output = gr.File(label="Épisodes téléchargés")

	examples = gr.Examples(
	examples=[[
	"https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin"
	]],
	inputs=[url_input]
	)

	btn.click(
	fn=download_podcast,
	inputs=url_input,
	outputs=output,
	api_name="download"
	)

	app.launch(show_error=True)