Ribot commited on
Commit
88da9f3
·
verified ·
1 Parent(s): 6ca2249

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -52
app.py CHANGED
@@ -1,72 +1,70 @@
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
- import re
5
  import os
6
  import zipfile
 
7
  from urllib.parse import urljoin
8
- from pathlib import Path
9
 
10
- def extract_mp3_links(url):
11
- response = requests.get(url)
12
- soup = BeautifulSoup(response.content, "html.parser")
13
-
14
- # Extraire les blocs d’épisodes depuis le HTML
15
- episode_blocks = soup.find_all("a", href=True)
16
-
17
- seen = set()
18
- links = []
19
 
20
- for a in episode_blocks:
21
- href = a['href']
22
- if href.endswith('.mp3') and 'radiofrance' in href:
23
- full_url = href if href.startswith("http") else urljoin(url, href)
24
- if full_url not in seen:
25
- seen.add(full_url)
26
- title = a.get("title") or a.text.strip() or "episode"
27
- links.append((full_url, title))
28
-
29
- return links
30
 
31
- def download_episodes(podcast_url):
32
- os.makedirs("downloads", exist_ok=True)
33
- mp3_links = extract_mp3_links(podcast_url)
34
 
35
  if not mp3_links:
36
- return None, "Aucun épisode valide trouvé."
37
 
38
- valid_episodes = []
39
- for idx, (mp3_url, title) in enumerate(mp3_links, 1):
 
40
  try:
41
- response = requests.get(mp3_url)
42
- if response.status_code == 200:
43
- safe_title = re.sub(r'[^\w\d-]', '_', title)[:80]
44
- filename = f"{idx:02d}_{safe_title}.mp3"
45
- filepath = os.path.join("downloads", filename)
46
- with open(filepath, "wb") as f:
47
- f.write(response.content)
48
- valid_episodes.append(filepath)
49
- except Exception as e:
50
- print(f"Erreur avec {mp3_url} : {e}")
51
 
52
- if not valid_episodes:
53
- return None, "Aucun fichier mp3 téléchargé."
54
 
55
- zip_path = "/tmp/episodes_radiofrance.zip"
56
- with zipfile.ZipFile(zip_path, 'w') as zipf:
57
- for file in valid_episodes:
58
  zipf.write(file, arcname=os.path.basename(file))
59
 
60
- return zip_path, f"{len(valid_episodes)} épisode(s) téléchargé(s) avec succès."
61
 
62
- with gr.Blocks() as app:
63
- gr.Markdown("# 🎧 Téléchargeur de Podcasts Radio France")
64
- with gr.Row():
65
- url_input = gr.Text(label="URL de la série du podcast")
66
- launch_btn = gr.Button("Télécharger les épisodes")
67
- output_file = gr.File(label="Fichier ZIP")
68
- output_message = gr.Textbox(label="Statut")
69
 
70
- launch_btn.click(fn=download_episodes, inputs=url_input, outputs=[output_file, output_message])
 
 
 
 
 
 
71
 
72
- app.launch()
 
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
 
4
  import os
5
  import zipfile
6
+ import tempfile
7
  from urllib.parse import urljoin
 
8
 
9
+ def process_url(url):
10
+ try:
11
+ response = requests.get(url)
12
+ response.raise_for_status()
13
+ except requests.RequestException as e:
14
+ return None, f"Erreur lors de la récupération de la page : {e}"
15
+
16
+ soup = BeautifulSoup(response.text, 'html.parser')
 
17
 
18
+ mp3_links = []
19
+ for link in soup.find_all('a', href=True):
20
+ href = link['href']
21
+ if href.lower().endswith('.mp3'):
22
+ absolute_url = urljoin(response.url, href)
23
+ mp3_links.append(absolute_url)
 
 
 
 
24
 
25
+ # Supprimer les doublons en conservant l'ordre
26
+ seen = set()
27
+ mp3_links = [x for x in mp3_links if not (x in seen or seen.add(x))]
28
 
29
  if not mp3_links:
30
+ return None, "Aucun lien MP3 trouvé sur la page."
31
 
32
+ temp_dir = tempfile.mkdtemp()
33
+ filenames = []
34
+ for idx, mp3_url in enumerate(mp3_links, start=1):
35
  try:
36
+ mp3_response = requests.get(mp3_url)
37
+ mp3_response.raise_for_status()
38
+ filename = os.path.join(temp_dir, f"{idx:02d}_{os.path.basename(mp3_url)}")
39
+ with open(filename, 'wb') as f:
40
+ f.write(mp3_response.content)
41
+ filenames.append(filename)
42
+ except requests.RequestException as e:
43
+ print(f"Erreur de téléchargement {mp3_url}: {e}")
44
+ continue
 
45
 
46
+ if not filenames:
47
+ return None, "Aucun épisode téléchargé."
48
 
49
+ zip_filename = os.path.join(temp_dir, 'podcast_episodes.zip')
50
+ with zipfile.ZipFile(zip_filename, 'w') as zipf:
51
+ for file in filenames:
52
  zipf.write(file, arcname=os.path.basename(file))
53
 
54
+ return zip_filename, None
55
 
56
+ def download_podcast(url):
57
+ zip_path, error = process_url(url)
58
+ if error:
59
+ raise gr.Error(error)
60
+ return zip_path
 
 
61
 
62
+ iface = gr.Interface(
63
+ fn=download_podcast,
64
+ inputs=gr.Textbox(label="URL de la page du podcast", placeholder="https://www.radiofrance.fr/..."),
65
+ outputs=gr.File(label="Télécharger le ZIP des épisodes"),
66
+ title="Téléchargeur de Podcast",
67
+ description="Entrez l'URL d'une page contenant des épisodes de podcast pour télécharger tous les MP3 dans un ZIP ordonné."
68
+ )
69
 
70
+ iface.launch()