Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,13 @@ import os
|
|
6 |
import uuid
|
7 |
import yt_dlp
|
8 |
|
9 |
-
model =
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
def download_video_from_url(url):
|
12 |
out_path = f"video_{uuid.uuid4()}.mp4"
|
@@ -20,7 +26,7 @@ def download_video_from_url(url):
|
|
20 |
return out_path
|
21 |
|
22 |
def extract_audio(video_file):
|
23 |
-
audio_path = "
|
24 |
cmd = [
|
25 |
"ffmpeg", "-i", video_file, "-vn",
|
26 |
"-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000",
|
@@ -29,13 +35,14 @@ def extract_audio(video_file):
|
|
29 |
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
30 |
return audio_path
|
31 |
|
32 |
-
|
33 |
def classify_accent(input_file_or_url):
|
|
|
|
|
34 |
# Check if it's a URL
|
35 |
-
if input_file_or_url.startswith("http"):
|
36 |
video_path = download_video_from_url(input_file_or_url)
|
37 |
else:
|
38 |
-
video_path = input_file_or_url
|
39 |
|
40 |
audio_path = extract_audio(video_path)
|
41 |
out_probs, top_prob, top_idx, label = model.classify_file(audio_path)
|
@@ -45,3 +52,32 @@ def classify_accent(input_file_or_url):
|
|
45 |
result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)])
|
46 |
|
47 |
return label[0], f"{top_prob.item()*100:.2f}%", result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
import uuid
|
7 |
import yt_dlp
|
8 |
|
9 |
+
model = None # Lazy-loaded model
|
10 |
+
|
11 |
+
def get_model():
|
12 |
+
global model
|
13 |
+
if model is None:
|
14 |
+
model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa")
|
15 |
+
return model
|
16 |
|
17 |
def download_video_from_url(url):
|
18 |
out_path = f"video_{uuid.uuid4()}.mp4"
|
|
|
26 |
return out_path
|
27 |
|
28 |
def extract_audio(video_file):
|
29 |
+
audio_path = f"audio_{uuid.uuid4()}.wav"
|
30 |
cmd = [
|
31 |
"ffmpeg", "-i", video_file, "-vn",
|
32 |
"-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000",
|
|
|
35 |
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
36 |
return audio_path
|
37 |
|
|
|
38 |
def classify_accent(input_file_or_url):
|
39 |
+
model = get_model()
|
40 |
+
|
41 |
# Check if it's a URL
|
42 |
+
if isinstance(input_file_or_url, str) and input_file_or_url.startswith("http"):
|
43 |
video_path = download_video_from_url(input_file_or_url)
|
44 |
else:
|
45 |
+
video_path = input_file_or_url.name if hasattr(input_file_or_url, "name") else input_file_or_url
|
46 |
|
47 |
audio_path = extract_audio(video_path)
|
48 |
out_probs, top_prob, top_idx, label = model.classify_file(audio_path)
|
|
|
52 |
result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)])
|
53 |
|
54 |
return label[0], f"{top_prob.item()*100:.2f}%", result
|
55 |
+
|
56 |
+
# Gradio UI
|
57 |
+
with gr.Blocks() as demo:
|
58 |
+
gr.Markdown("# Accent Identifier")
|
59 |
+
gr.Markdown("Upload a video/audio file or paste a YouTube URL to identify the speaker's accent.")
|
60 |
+
|
61 |
+
with gr.Row():
|
62 |
+
with gr.Column():
|
63 |
+
input_file = gr.File(label="Upload video/audio file", file_types=[".mp4", ".wav", ".mp3"])
|
64 |
+
url_input = gr.Textbox(label="...or paste a YouTube URL")
|
65 |
+
submit_btn = gr.Button("Classify Accent")
|
66 |
+
|
67 |
+
with gr.Column():
|
68 |
+
label_output = gr.Textbox(label="Top Prediction")
|
69 |
+
confidence_output = gr.Textbox(label="Confidence")
|
70 |
+
top3_output = gr.Textbox(label="Top 3 Predictions")
|
71 |
+
|
72 |
+
def handle_inputs(file, url):
|
73 |
+
if url:
|
74 |
+
return classify_accent(url)
|
75 |
+
elif file:
|
76 |
+
return classify_accent(file)
|
77 |
+
else:
|
78 |
+
return "No input", "", ""
|
79 |
+
|
80 |
+
submit_btn.click(handle_inputs, inputs=[input_file, url_input], outputs=[label_output, confidence_output, top3_output])
|
81 |
+
|
82 |
+
if __name__ == "__main__":
|
83 |
+
demo.launch()
|