Update app.py
Browse files
app.py
CHANGED
@@ -4,81 +4,88 @@ import zipfile
|
|
4 |
import requests
|
5 |
import tempfile
|
6 |
import subprocess
|
7 |
-
from bs4 import BeautifulSoup
|
8 |
import gradio as gr
|
9 |
|
10 |
-
# Installation automatique des dépendances
|
11 |
try:
|
12 |
import bs4
|
13 |
except ImportError:
|
14 |
subprocess.run(["pip", "install", "-q", "gradio", "beautifulsoup4", "requests"])
|
15 |
|
16 |
def sanitize_filename(name):
|
|
|
17 |
return re.sub(r"[^\w\-_.]", "_", name.strip())[:50]
|
18 |
|
19 |
-
def
|
20 |
-
|
21 |
-
|
|
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
28 |
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
31 |
try:
|
32 |
response = requests.get(url)
|
33 |
response.raise_for_status()
|
34 |
except Exception as e:
|
35 |
-
return f"Erreur de téléchargement : {e}", None
|
36 |
|
37 |
html_text = response.text
|
|
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
if not titles_links:
|
42 |
-
# fallback brut si les titres ne sont pas extraits
|
43 |
-
urls = extract_audio_links_from_html(html_text)
|
44 |
-
titles_links = [(f"track_{i+1:02d}", u) for i, u in enumerate(urls)]
|
45 |
-
|
46 |
-
if not titles_links:
|
47 |
-
return "Aucun fichier audio trouvé avec ITEMA dans l'URL", None
|
48 |
|
49 |
-
#
|
50 |
with tempfile.TemporaryDirectory() as tmpdir:
|
51 |
-
zip_path = os.path.join(tmpdir, "
|
52 |
with zipfile.ZipFile(zip_path, "w") as zipf:
|
53 |
-
for idx, (
|
54 |
-
|
55 |
-
|
|
|
|
|
56 |
filepath = os.path.join(tmpdir, filename)
|
57 |
try:
|
58 |
-
|
|
|
59 |
audio_resp.raise_for_status()
|
|
|
|
|
|
|
60 |
with open(filepath, "wb") as f:
|
61 |
f.write(audio_resp.content)
|
62 |
zipf.write(filepath, arcname=filename)
|
63 |
except Exception as e:
|
64 |
-
print(f"Erreur téléchargement {
|
65 |
|
66 |
-
return "Téléchargement terminé avec succès", zip_path
|
67 |
|
68 |
def gradio_interface(url):
|
69 |
-
message, zip_file =
|
70 |
return message, zip_file
|
71 |
|
72 |
# Interface Gradio
|
73 |
demo = gr.Interface(
|
74 |
fn=gradio_interface,
|
75 |
-
inputs=gr.Textbox(label="URL de la page
|
76 |
outputs=[
|
77 |
gr.Textbox(label="Message"),
|
78 |
-
gr.File(label="Fichier ZIP
|
79 |
],
|
80 |
-
title="
|
81 |
-
description="Collez une URL
|
82 |
)
|
83 |
|
84 |
if __name__ == "__main__":
|
|
|
4 |
import requests
|
5 |
import tempfile
|
6 |
import subprocess
|
|
|
7 |
import gradio as gr
|
8 |
|
9 |
+
# Installation automatique des dépendances si nécessaire
|
10 |
try:
|
11 |
import bs4
|
12 |
except ImportError:
|
13 |
subprocess.run(["pip", "install", "-q", "gradio", "beautifulsoup4", "requests"])
|
14 |
|
15 |
def sanitize_filename(name):
|
16 |
+
# Rend le nom de fichier compatible avec tous les OS
|
17 |
return re.sub(r"[^\w\-_.]", "_", name.strip())[:50]
|
18 |
|
19 |
+
def extract_mp3_links_and_titles(html_text):
|
20 |
+
# Expression pour trouver les URL MP3
|
21 |
+
mp3_pattern = re.compile(r'https?://[^\s"\'<>]+\.mp3')
|
22 |
+
mp3_links = mp3_pattern.findall(html_text)
|
23 |
|
24 |
+
# Expression pour tenter d'extraire les titres associés
|
25 |
+
item_pattern = re.compile(
|
26 |
+
r'title:"\\?"([^"]+)\\?".*?url:"(https?://[^\s"\'<>]+\.mp3)"',
|
27 |
+
re.DOTALL
|
28 |
+
)
|
29 |
+
titled_links = {match[1]: match[0] for match in item_pattern.findall(html_text)}
|
30 |
|
31 |
+
results = []
|
32 |
+
for link in mp3_links:
|
33 |
+
title = titled_links.get(link, None)
|
34 |
+
results.append((link, title))
|
35 |
+
return results
|
36 |
+
|
37 |
+
def download_and_zip_mp3s(url):
|
38 |
try:
|
39 |
response = requests.get(url)
|
40 |
response.raise_for_status()
|
41 |
except Exception as e:
|
42 |
+
return f"Erreur de téléchargement de la page : {e}", None
|
43 |
|
44 |
html_text = response.text
|
45 |
+
mp3_entries = extract_mp3_links_and_titles(html_text)
|
46 |
|
47 |
+
if not mp3_entries:
|
48 |
+
return "Aucun lien .mp3 trouvé sur cette page.", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
+
# Crée un ZIP dans un dossier temporaire
|
51 |
with tempfile.TemporaryDirectory() as tmpdir:
|
52 |
+
zip_path = os.path.join(tmpdir, "episodes_radiofrance.zip")
|
53 |
with zipfile.ZipFile(zip_path, "w") as zipf:
|
54 |
+
for idx, (mp3_url, title) in enumerate(mp3_entries, 1):
|
55 |
+
if title:
|
56 |
+
filename = f"{idx:02d}-{sanitize_filename(title)}.mp3"
|
57 |
+
else:
|
58 |
+
filename = f"{idx:02d}-episode.mp3"
|
59 |
filepath = os.path.join(tmpdir, filename)
|
60 |
try:
|
61 |
+
print(f"Téléchargement : {mp3_url}")
|
62 |
+
audio_resp = requests.get(mp3_url)
|
63 |
audio_resp.raise_for_status()
|
64 |
+
if len(audio_resp.content) < 30_000:
|
65 |
+
print(f"Fichier trop petit, ignoré : {mp3_url}")
|
66 |
+
continue
|
67 |
with open(filepath, "wb") as f:
|
68 |
f.write(audio_resp.content)
|
69 |
zipf.write(filepath, arcname=filename)
|
70 |
except Exception as e:
|
71 |
+
print(f"Erreur lors du téléchargement de {mp3_url} : {e}")
|
72 |
|
73 |
+
return "Téléchargement terminé avec succès.", zip_path
|
74 |
|
75 |
def gradio_interface(url):
|
76 |
+
message, zip_file = download_and_zip_mp3s(url)
|
77 |
return message, zip_file
|
78 |
|
79 |
# Interface Gradio
|
80 |
demo = gr.Interface(
|
81 |
fn=gradio_interface,
|
82 |
+
inputs=gr.Textbox(label="URL de la page contenant des MP3"),
|
83 |
outputs=[
|
84 |
gr.Textbox(label="Message"),
|
85 |
+
gr.File(label="Fichier ZIP")
|
86 |
],
|
87 |
+
title="Extracteur MP3 Radio France (ou autre)",
|
88 |
+
description="Collez une URL contenant des fichiers MP3, et récupérez-les dans un ZIP avec titres et numérotation."
|
89 |
)
|
90 |
|
91 |
if __name__ == "__main__":
|