File size: 4,194 Bytes
11fd592
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f317db
 
 
11fd592
 
1f317db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11fd592
1f317db
 
11fd592
1f317db
 
 
 
 
11fd592
 
1f317db
11fd592
 
 
1f317db
11fd592
 
 
 
 
 
 
1f317db
11fd592
 
1f317db
11fd592
 
 
 
 
 
1f317db
11fd592
1f317db
11fd592
 
 
 
 
1f317db
11fd592
 
 
 
1f317db
11fd592
1f317db
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import re
import sys
import subprocess
import importlib.util
import shutil
import tempfile

# === INSTALLATION AUTOMATIQUE DES DÉPENDANCES ===
def install_if_missing(package_name, import_name=None):
    import_name = import_name or package_name
    if importlib.util.find_spec(import_name) is None:
        print(f"Installation de {package_name}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])

for package in [
    ("requests",),
    ("bs4", "bs4"),
    ("gradio",),
]:
    install_if_missing(*package)

# === IMPORTS ===
import requests
import gradio as gr
from bs4 import BeautifulSoup

# === UTILITAIRES ===
def slugify(text, max_length=50):
    text = text.lower()
    text = re.sub(r'[^\w\s-]', '', text)
    text = re.sub(r'[-\s]+', '_', text)
    return text[:max_length].strip('_')

def get_episode_links(main_url):
    """Récupère toutes les URL des pages d’épisodes depuis la page principale."""
    response = requests.get(main_url)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')
    links = []

    for a in soup.find_all('a', href=True):
        href = a['href']
        if "/podcasts/" in href and href != main_url:
            full_url = href if href.startswith("http") else f"https://www.radiofrance.fr{href}"
            if full_url not in links:
                links.append(full_url)
    
    return list(dict.fromkeys(links))  # dédoublonner

def extract_mp3_from_episode(url):
    """Extrait le lien MP3 d’un épisode."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        audio_tag = soup.find("audio")
        if audio_tag and audio_tag.get("src", "").endswith(".mp3"):
            return audio_tag["src"]
    except Exception:
        pass
    return None

def get_podcast_title(url):
    """Extrait le titre général du podcast."""
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        title_tag = soup.find('h1') or soup.find('title')
        return slugify(title_tag.get_text()) if title_tag else "podcast"
    except Exception:
        return "podcast"

def download_and_zip_podcast_series(main_url):
    try:
        title = get_podcast_title(main_url)
        episode_pages = get_episode_links(main_url)

        if not episode_pages:
            return "Aucune page d’épisode trouvée.", None

        mp3_links = []
        for ep_url in episode_pages:
            mp3 = extract_mp3_from_episode(ep_url)
            if mp3:
                mp3_links.append(mp3)

        if not mp3_links:
            return "Aucun fichier MP3 trouvé dans les épisodes.", None

        temp_dir = tempfile.mkdtemp()
        for i, mp3_url in enumerate(mp3_links, start=1):
            filename = f"{title}_{i:02}.mp3"
            filepath = os.path.join(temp_dir, filename)
            with requests.get(mp3_url, stream=True) as r:
                r.raise_for_status()
                with open(filepath, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)

        zip_path = os.path.join(temp_dir, f"{title}.zip")
        shutil.make_archive(zip_path.replace('.zip', ''), 'zip', temp_dir)

        return f"{len(mp3_links)} épisode(s) téléchargé(s).", zip_path

    except Exception as e:
        return f"Erreur : {str(e)}", None

# === INTERFACE GRADIO ===
with gr.Blocks() as app:
    gr.Markdown("# Téléchargeur de Podcasts MP3 - France Culture")
    with gr.Row():
        url_input = gr.Textbox(label="URL de la série", placeholder="https://www.radiofrance.fr/...")
    download_button = gr.Button("Télécharger et compresser")
    output_text = gr.Textbox(label="Message")
    file_output = gr.File(label="Fichier ZIP", file_types=[".zip"])

    def process(url):
        message, zip_file = download_and_zip_podcast_series(url)
        return message, zip_file

    download_button.click(fn=process, inputs=[url_input], outputs=[output_text, file_output])

# === LANCEMENT ===
if __name__ == "__main__":
    app.launch(share=True)