Spaces:

Ribot
/

PodMagic

Running

File size: 4,501 Bytes

eeae908
734bffb
c77e282
734bffb
6ca2249
88da9f3
6ca2249
31f35d4
52a320c
31f35d4
52a320c
 
 
 
31f35d4
 
 
 
52a320c
31f35d4
 
 
 
 
 
 
 
 
 
96682d9
88da9f3
 
2a11a1f
 
 
 
31f35d4
88da9f3
c77e282
 
734bffb
31f35d4
52a320c
 
31f35d4
0a6bdb5
52a320c
 
 
 
 
 
31f35d4
 
 
2a11a1f
31f35d4
 
 
 
2a11a1f
52a320c
31f35d4
734bffb
88da9f3
 
0a6bdb5
52a320c
6ca2249
31f35d4
 
 
0a6bdb5
31f35d4
0a6bdb5
52a320c
0a6bdb5
2a11a1f
52a320c
31f35d4
52a320c
 
 
 
 
 
0a6bdb5
88da9f3
734bffb
88da9f3
31f35d4
734bffb
31f35d4
 
0a6bdb5
88da9f3
6ca2249
734bffb
0a6bdb5
734bffb
88da9f3
 
 
 
 
734bffb
31f35d4
 
09d051e
 
31f35d4
 
2a11a1f
09d051e
31f35d4
 
09d051e
 
 
 
 
31f35d4
09d051e
2a11a1f
 
09d051e
 
2a11a1f
09d051e
 
11fd592
31f35d4

import gradio as gr
import requests
import re
import os
import zipfile
import tempfile
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, TIT3

def get_clean_title(filepath):
    try:
        audio = MP3(filepath, ID3=ID3)
        # Utilisation de TIT3 (Subtitle) qui contient souvent le numéro d'épisode
        if 'TIT3' in audio:
            title = audio['TIT3'].text[0]
        elif 'TIT2' in audio:
            title = audio['TIT2'].text[0]
        else:
            return os.path.basename(filepath)
        
        # Nettoyage avancé du titre
        title = re.sub(r'\s*-\s*Radio France$', '', title, flags=re.IGNORECASE)
        title = re.sub(r'[\\/*?:"<>|]', '', title).strip()
        return title[:100]  # Limite la longueur du nom de fichier
        
    except Exception:
        return os.path.basename(filepath).split('.')[0]

def process_url(url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Referer': 'https://www.radiofrance.fr/'
        }
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
    except Exception as e:
        return None, f"Erreur de connexion : {str(e)}"

    # Détection des épisodes avec BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')
    main_content = soup.find('main') or soup
    episodes = main_content.find_all(['article', 'div'], class_=re.compile(r'episode|podcast|card', re.IGNORECASE))
    
    mp3_links = []
    for episode in episodes:
        script_tag = episode.find('script', type='application/ld+json')
        if script_tag:
            match = re.search(r'"contentUrl"\s*:\s*"([^"]+?\.mp3)', script_tag.string)
            if match:
                mp3_url = urljoin(url, match.group(1).split('?')[0]
                if mp3_url not in mp3_links:
                    mp3_links.append(mp3_url)

    # Alternative si détection échoue
    if not mp3_links:
        matches = re.findall(r'(https://media\.radiofrance-podcast\.net[^"\']+?\.mp3)', response.text)
        mp3_links = list(dict.fromkeys(matches))

    if not mp3_links:
        return None, "Aucun épisode trouvé - Vérifiez l'URL"

    temp_dir = tempfile.mkdtemp()
    filenames = []
    
    for idx, mp3_url in enumerate(mp3_links, 1):
        try:
            # Téléchargement temporaire
            temp_name = f"temp_{idx}.mp3"
            temp_path = os.path.join(temp_dir, temp_name)
            
            with requests.get(mp3_url, headers=headers, stream=True, timeout=20) as r:
                r.raise_for_status()
                with open(temp_path, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)

            # Renommage final
            clean_title = get_clean_title(temp_path)
            final_name = f"{idx:02d} - {clean_title}.mp3"
            final_path = os.path.join(temp_dir, final_name)
            os.rename(temp_path, final_path)
            
            filenames.append(final_path)
        except Exception as e:
            continue

    if not filenames:
        return None, "Échec du téléchargement"

    # Création ZIP
    zip_path = os.path.join(temp_dir, 'podcast.zip')
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        for file in filenames:
            zipf.write(file, arcname=os.path.basename(file))

    return zip_path, None

def download_podcast(url):
    zip_path, error = process_url(url)
    if error:
        raise gr.Error(error)
    return zip_path

with gr.Blocks(title="RadioFrance Podcaster Pro") as app:
    gr.Markdown("## 🎧 Téléchargeur Intelligent Radio France")
    with gr.Row():
        url_input = gr.Textbox(
            label="URL de la série podcast",
            placeholder="Ex: https://www.radiofrance.fr/...",
            max_lines=1
        )
    btn = gr.Button("Générer le ZIP", variant="primary")
    output = gr.File(label="Épisodes téléchargés")
    
    examples = gr.Examples(
        examples=[[
            "https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin"
        ]],
        inputs=[url_input]
    )

    btn.click(
        fn=download_podcast,
        inputs=url_input,
        outputs=output,
        api_name="download"
    )

app.launch(show_error=True)