Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from speechbrain.pretrained import EncoderClassifier
|
3 |
+
import subprocess
|
4 |
+
import os
|
5 |
+
import uuid
|
6 |
+
import yt_dlp
|
7 |
+
|
8 |
+
model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa")
|
9 |
+
|
10 |
+
def download_video_from_url(url):
|
11 |
+
out_path = f"video_{uuid.uuid4()}.mp4"
|
12 |
+
ydl_opts = {
|
13 |
+
'format': 'bestaudio/best',
|
14 |
+
'outtmpl': out_path,
|
15 |
+
'quiet': True,
|
16 |
+
}
|
17 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
18 |
+
ydl.download([url])
|
19 |
+
return out_path
|
20 |
+
|
21 |
+
def extract_audio(video_file):
|
22 |
+
audio_path = "audio.wav"
|
23 |
+
cmd = [
|
24 |
+
"ffmpeg", "-i", video_file, "-vn",
|
25 |
+
"-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000",
|
26 |
+
audio_path, "-y"
|
27 |
+
]
|
28 |
+
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
29 |
+
return audio_path
|
30 |
+
|
31 |
+
|
32 |
+
def classify_accent(input_file_or_url):
|
33 |
+
# Check if it's a URL
|
34 |
+
if input_file_or_url.startswith("http"):
|
35 |
+
video_path = download_video_from_url(input_file_or_url)
|
36 |
+
else:
|
37 |
+
video_path = input_file_or_url
|
38 |
+
|
39 |
+
audio_path = extract_audio(video_path)
|
40 |
+
out_probs, top_prob, top_idx, label = model.classify_file(audio_path)
|
41 |
+
|
42 |
+
top_labels = model.hparams.label_encoder.decode_ndim(torch.topk(out_probs, 3).indices.squeeze())
|
43 |
+
confidences = torch.topk(out_probs, 3).values.squeeze().tolist()
|
44 |
+
result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)])
|
45 |
+
|
46 |
+
return label[0], f"{top_prob.item()*100:.2f}%", result
|