File size: 3,032 Bytes
eeae908
734bffb
c77e282
734bffb
6ca2249
88da9f3
6ca2249
96682d9
88da9f3
 
09d051e
0a6bdb5
88da9f3
c77e282
 
734bffb
09d051e
88da9f3
09d051e
c77e282
0a6bdb5
c77e282
 
 
09d051e
 
734bffb
6ca2249
c77e282
734bffb
09d051e
88da9f3
 
0a6bdb5
c77e282
6ca2249
0a6bdb5
 
 
 
 
 
 
c77e282
 
0a6bdb5
 
88da9f3
734bffb
88da9f3
09d051e
734bffb
09d051e
 
0a6bdb5
88da9f3
6ca2249
734bffb
0a6bdb5
734bffb
88da9f3
 
 
 
 
734bffb
09d051e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11fd592
09d051e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
import requests
import re
import os
import zipfile
import tempfile
from urllib.parse import urljoin

def process_url(url):
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'}
        response = requests.get(url, headers=headers)
        response.raise_for_status()
    except Exception as e:
        return None, f"Erreur de connexion : {str(e)}"

    # Extraction des URLs MP3
    mp3_links = []
    pattern = r'(?:contentUrl|url)"\s*:\s*"([^"]+?\.mp3)'
    matches = re.findall(pattern, response.text)
    
    for match in matches:
        clean_url = match.split('";')[0] if '";' in match else match
        absolute_url = urljoin(response.url, clean_url)
        if absolute_url not in mp3_links:
            mp3_links.append(absolute_url)

    if not mp3_links:
        return None, "Aucun lien MP3 détecté dans le code source"

    # Téléchargement
    temp_dir = tempfile.mkdtemp()
    filenames = []
    
    for idx, mp3_url in enumerate(mp3_links, 1):
        try:
            filename = f"{idx:02d}_{os.path.basename(mp3_url).split('?')[0]}"
            filepath = os.path.join(temp_dir, filename)
            
            with requests.get(mp3_url, headers=headers, stream=True) as r:
                r.raise_for_status()
                with open(filepath, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        if chunk:
                            f.write(chunk)
            filenames.append(filepath)
        except Exception as e:
            continue

    if not filenames:
        return None, "Échec des téléchargements"

    # Création ZIP
    zip_path = os.path.join(temp_dir, 'podcast.zip')
    with zipfile.ZipFile(zip_path, 'w') as zipf:
        for file in filenames:
            zipf.write(file, arcname=os.path.basename(file))

    return zip_path, None

def download_podcast(url):
    zip_path, error = process_url(url)
    if error:
        raise gr.Error(error)
    return zip_path

# Configuration Gradio compatible Hugging Face
with gr.Blocks() as app:
    gr.Markdown("# 📻 Téléchargeur de Podcasts Radio France")
    
    with gr.Row():
        url_input = gr.Textbox(
            label="URL de la série podcast",
            placeholder="Collez l'URL Radio France ici..."
        )
        download_btn = gr.Button("Télécharger les épisodes")
    
    output_file = gr.File(label="Fichier ZIP à télécharger")
    error_output = gr.Textbox(visible=False)
    
    examples = gr.Examples(
        examples=[[
            "https://www.radiofrance.fr/franceculture/podcasts/serie-le-secret-de-la-licorne-les-aventures-de-tintin"
        ]],
        inputs=[url_input]
    )
    
    download_btn.click(
        fn=download_podcast,
        inputs=url_input,
        outputs=output_file,
        api_name="download"
    )

# Configuration spécifique pour Hugging Face
app.launch(debug=False, show_error=True)