Spaces:

Ribot
/

PodMagic

Running

App Files Files Community

Ribot commited on 19 days ago

Commit

1f317db

verified ·

1 Parent(s): 11fd592

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -21

app.py CHANGED Viewed

@@ -32,29 +32,65 @@ def slugify(text, max_length=50):
     text = re.sub(r'[-\s]+', '_', text)
     return text[:max_length].strip('_')
-def extract_mp3_links_and_title(url):
-    response = requests.get(url)
     response.raise_for_status()
     soup = BeautifulSoup(response.text, 'html.parser')
-    # Titre du podcast
-    title_tag = soup.find('h1') or soup.find('title')
-    podcast_title = slugify(title_tag.get_text()) if title_tag else "podcast"
-    # Liens MP3
-    mp3_links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].endswith('.mp3')]
-    return podcast_title, mp3_links
-def download_and_zip_podcast(url):
-    try:
-        podcast_title, mp3_links = extract_mp3_links_and_title(url)
         if not mp3_links:
-            return "Aucun fichier MP3 trouvé.", None
         temp_dir = tempfile.mkdtemp()
         for i, mp3_url in enumerate(mp3_links, start=1):
-            filename = f"{podcast_title}_{i:02}.mp3"
             filepath = os.path.join(temp_dir, filename)
             with requests.get(mp3_url, stream=True) as r:
                 r.raise_for_status()
@@ -62,29 +98,29 @@ def download_and_zip_podcast(url):
                     for chunk in r.iter_content(chunk_size=8192):
                         f.write(chunk)
-        zip_path = os.path.join(temp_dir, f"{podcast_title}.zip")
         shutil.make_archive(zip_path.replace('.zip', ''), 'zip', temp_dir)
-        return f"{len(mp3_links)} fichiers téléchargés avec succès.", zip_path
     except Exception as e:
         return f"Erreur : {str(e)}", None
 # === INTERFACE GRADIO ===
 with gr.Blocks() as app:
-    gr.Markdown("# Téléchargeur de Podcasts MP3")
     with gr.Row():
-        url_input = gr.Textbox(label="URL de la page série", placeholder="https://www.radiofrance.fr/...")
     download_button = gr.Button("Télécharger et compresser")
     output_text = gr.Textbox(label="Message")
     file_output = gr.File(label="Fichier ZIP", file_types=[".zip"])
     def process(url):
-        message, zip_file = download_and_zip_podcast(url)
         return message, zip_file
     download_button.click(fn=process, inputs=[url_input], outputs=[output_text, file_output])
-# === LANCEMENT LOCAL OU SUR HUGGINGFACE ===
 if __name__ == "__main__":
-    app.launch(share=True)  # `share=True` utile pour Hugging Face

     text = re.sub(r'[-\s]+', '_', text)
     return text[:max_length].strip('_')
+def get_episode_links(main_url):
+    """Récupère toutes les URL des pages d’épisodes depuis la page principale."""
+    response = requests.get(main_url)
     response.raise_for_status()
     soup = BeautifulSoup(response.text, 'html.parser')
+    links = []
+    for a in soup.find_all('a', href=True):
+        href = a['href']
+        if "/podcasts/" in href and href != main_url:
+            full_url = href if href.startswith("http") else f"https://www.radiofrance.fr{href}"
+            if full_url not in links:
+                links.append(full_url)
+    return list(dict.fromkeys(links))  # dédoublonner
+def extract_mp3_from_episode(url):
+    """Extrait le lien MP3 d’un épisode."""
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        audio_tag = soup.find("audio")
+        if audio_tag and audio_tag.get("src", "").endswith(".mp3"):
+            return audio_tag["src"]
+    except Exception:
+        pass
+    return None
+def get_podcast_title(url):
+    """Extrait le titre général du podcast."""
+    try:
+        response = requests.get(url)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        title_tag = soup.find('h1') or soup.find('title')
+        return slugify(title_tag.get_text()) if title_tag else "podcast"
+    except Exception:
+        return "podcast"
+def download_and_zip_podcast_series(main_url):
+    try:
+        title = get_podcast_title(main_url)
+        episode_pages = get_episode_links(main_url)
+        if not episode_pages:
+            return "Aucune page d’épisode trouvée.", None
+        mp3_links = []
+        for ep_url in episode_pages:
+            mp3 = extract_mp3_from_episode(ep_url)
+            if mp3:
+                mp3_links.append(mp3)
         if not mp3_links:
+            return "Aucun fichier MP3 trouvé dans les épisodes.", None
         temp_dir = tempfile.mkdtemp()
         for i, mp3_url in enumerate(mp3_links, start=1):
+            filename = f"{title}_{i:02}.mp3"
             filepath = os.path.join(temp_dir, filename)
             with requests.get(mp3_url, stream=True) as r:
                 r.raise_for_status()
                     for chunk in r.iter_content(chunk_size=8192):
                         f.write(chunk)
+        zip_path = os.path.join(temp_dir, f"{title}.zip")
         shutil.make_archive(zip_path.replace('.zip', ''), 'zip', temp_dir)
+        return f"{len(mp3_links)} épisode(s) téléchargé(s).", zip_path
     except Exception as e:
         return f"Erreur : {str(e)}", None
 # === INTERFACE GRADIO ===
 with gr.Blocks() as app:
+    gr.Markdown("# Téléchargeur de Podcasts MP3 - France Culture")
     with gr.Row():
+        url_input = gr.Textbox(label="URL de la série", placeholder="https://www.radiofrance.fr/...")
     download_button = gr.Button("Télécharger et compresser")
     output_text = gr.Textbox(label="Message")
     file_output = gr.File(label="Fichier ZIP", file_types=[".zip"])
     def process(url):
+        message, zip_file = download_and_zip_podcast_series(url)
         return message, zip_file
     download_button.click(fn=process, inputs=[url_input], outputs=[output_text, file_output])
+# === LANCEMENT ===
 if __name__ == "__main__":
+    app.launch(share=True)