Pheire commited on
Commit
9552602
·
verified ·
1 Parent(s): 2f93b69

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from speechbrain.pretrained import EncoderClassifier
3
+ import subprocess
4
+ import os
5
+ import uuid
6
+ import yt_dlp
7
+
8
+ model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa")
9
+
10
+ def download_video_from_url(url):
11
+ out_path = f"video_{uuid.uuid4()}.mp4"
12
+ ydl_opts = {
13
+ 'format': 'bestaudio/best',
14
+ 'outtmpl': out_path,
15
+ 'quiet': True,
16
+ }
17
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
18
+ ydl.download([url])
19
+ return out_path
20
+
21
+ def extract_audio(video_file):
22
+ audio_path = "audio.wav"
23
+ cmd = [
24
+ "ffmpeg", "-i", video_file, "-vn",
25
+ "-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000",
26
+ audio_path, "-y"
27
+ ]
28
+ subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
29
+ return audio_path
30
+
31
+
32
+ def classify_accent(input_file_or_url):
33
+ # Check if it's a URL
34
+ if input_file_or_url.startswith("http"):
35
+ video_path = download_video_from_url(input_file_or_url)
36
+ else:
37
+ video_path = input_file_or_url
38
+
39
+ audio_path = extract_audio(video_path)
40
+ out_probs, top_prob, top_idx, label = model.classify_file(audio_path)
41
+
42
+ top_labels = model.hparams.label_encoder.decode_ndim(torch.topk(out_probs, 3).indices.squeeze())
43
+ confidences = torch.topk(out_probs, 3).values.squeeze().tolist()
44
+ result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)])
45
+
46
+ return label[0], f"{top_prob.item()*100:.2f}%", result