Spaces:

Ribot
/

PodMagic

Running

File size: 3,650 Bytes

eeae908
734bffb
c77e282
734bffb
6ca2249
88da9f3
6ca2249
31f35d4
52a320c
fea2cb5
52a320c
 
 
 
fea2cb5
 
 
 
 
 
31f35d4
 
96682d9
88da9f3
 
fea2cb5
31f35d4
88da9f3
c77e282
fea2cb5
734bffb
52a320c
 
fea2cb5
 
 
 
 
 
 
 
 
2a11a1f
fea2cb5
31f35d4
fea2cb5
 
 
 
2a11a1f
52a320c
fea2cb5
734bffb
88da9f3
 
0a6bdb5
52a320c
6ca2249
fea2cb5
0a6bdb5
fea2cb5
31f35d4
0a6bdb5
52a320c
0a6bdb5
2a11a1f
fea2cb5
 
 
 
52a320c
 
 
fea2cb5
88da9f3
734bffb
88da9f3
fea2cb5
734bffb
31f35d4
0a6bdb5
88da9f3
6ca2249
734bffb
0a6bdb5
734bffb
88da9f3
 
 
 
 
734bffb
fea2cb5
 
09d051e
fea2cb5
 
 
09d051e
 
 
 
 
fea2cb5
09d051e
2a11a1f
fea2cb5
11fd592
fea2cb5

import gradio as gr
import requests
import re
import os
import zipfile
import tempfile
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from mutagen.mp3 import MP3
from mutagen.id3 import ID3

def get_clean_title(filepath):
    try:
        audio = MP3(filepath, ID3=ID3)
        for tag in ['TIT2', 'TIT3', 'TALB']:
            if tag in audio:
                title = audio[tag].text[0]
                title = re.sub(r'[\\/*?:"<>|]', '', title).strip()
                return title[:100]
        return os.path.basename(filepath).split('.')[0]
    except Exception:
        return os.path.basename(filepath).split('.')[0]

def process_url(url):
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
    except Exception as e:
        return None, f"Erreur : {str(e)}"

    soup = BeautifulSoup(response.text, 'html.parser')
    mp3_links = []
    
    # Méthode 1 : Balises JSON-LD
    for script in soup.find_all('script', type='application/ld+json'):
        if script.string:
            matches = re.findall(r'"contentUrl"\s*:\s*"([^"]+?\.mp3)', script.string)
            for match in matches:
                clean_url = urljoin(url, match.split('?')[0])
                if clean_url not in mp3_links:
                    mp3_links.append(clean_url)

    # Méthode 2 : Fallback HTML
    if not mp3_links:
        for a in soup.find_all('a', href=re.compile(r'\.mp3')):
            mp3_url = urljoin(url, a['href'].split('?')[0])
            if mp3_url not in mp3_links:
                mp3_links.append(mp3_url)

    if not mp3_links:
        return None, "Aucun épisode trouvé"

    temp_dir = tempfile.mkdtemp()
    filenames = []
    
    for idx, mp3_url in enumerate(mp3_links, 1):
        try:
            temp_path = os.path.join(temp_dir, f"temp_{idx}.mp3")
            
            # Téléchargement
            with requests.get(mp3_url, headers=headers, stream=True, timeout=20) as r:
                r.raise_for_status()
                with open(temp_path, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
            
            # Renommage
            title = get_clean_title(temp_path)
            final_name = f"{idx:02d} - {title}.mp3"
            final_path = os.path.join(temp_dir, final_name)
            os.rename(temp_path, final_path)
            filenames.append(final_path)
        except Exception:
            continue

    if not filenames:
        return None, "Échec des téléchargements"

    zip_path = os.path.join(temp_dir, 'podcast.zip')
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        for file in filenames:
            zipf.write(file, arcname=os.path.basename(file))

    return zip_path, None

def download_podcast(url):
    zip_path, error = process_url(url)
    if error:
        raise gr.Error(error)
    return zip_path

with gr.Blocks() as app:
    gr.Markdown("## 🎙️ Téléchargeur Radio France")
    with gr.Row():
        url = gr.Textbox(label="URL de la série", placeholder="Collez l'URL ici...")
        btn = gr.Button("Télécharger", variant="primary")
    output = gr.File(label="Épisodes")
    
    examples = gr.Examples(
        examples=[[
            "https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin"
        ]],
        inputs=[url]
    )

    btn.click(download_podcast, inputs=url, outputs=output)

app.launch()