Pheire commited on
Commit
2109747
·
verified ·
1 Parent(s): 5dd29f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -5
app.py CHANGED
@@ -6,7 +6,13 @@ import os
6
  import uuid
7
  import yt_dlp
8
 
9
- model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa")
 
 
 
 
 
 
10
 
11
  def download_video_from_url(url):
12
  out_path = f"video_{uuid.uuid4()}.mp4"
@@ -20,7 +26,7 @@ def download_video_from_url(url):
20
  return out_path
21
 
22
  def extract_audio(video_file):
23
- audio_path = "audio.wav"
24
  cmd = [
25
  "ffmpeg", "-i", video_file, "-vn",
26
  "-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000",
@@ -29,13 +35,14 @@ def extract_audio(video_file):
29
  subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
30
  return audio_path
31
 
32
-
33
  def classify_accent(input_file_or_url):
 
 
34
  # Check if it's a URL
35
- if input_file_or_url.startswith("http"):
36
  video_path = download_video_from_url(input_file_or_url)
37
  else:
38
- video_path = input_file_or_url
39
 
40
  audio_path = extract_audio(video_path)
41
  out_probs, top_prob, top_idx, label = model.classify_file(audio_path)
@@ -45,3 +52,32 @@ def classify_accent(input_file_or_url):
45
  result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)])
46
 
47
  return label[0], f"{top_prob.item()*100:.2f}%", result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import uuid
7
  import yt_dlp
8
 
9
+ model = None # Lazy-loaded model
10
+
11
+ def get_model():
12
+ global model
13
+ if model is None:
14
+ model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa")
15
+ return model
16
 
17
  def download_video_from_url(url):
18
  out_path = f"video_{uuid.uuid4()}.mp4"
 
26
  return out_path
27
 
28
  def extract_audio(video_file):
29
+ audio_path = f"audio_{uuid.uuid4()}.wav"
30
  cmd = [
31
  "ffmpeg", "-i", video_file, "-vn",
32
  "-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000",
 
35
  subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
36
  return audio_path
37
 
 
38
  def classify_accent(input_file_or_url):
39
+ model = get_model()
40
+
41
  # Check if it's a URL
42
+ if isinstance(input_file_or_url, str) and input_file_or_url.startswith("http"):
43
  video_path = download_video_from_url(input_file_or_url)
44
  else:
45
+ video_path = input_file_or_url.name if hasattr(input_file_or_url, "name") else input_file_or_url
46
 
47
  audio_path = extract_audio(video_path)
48
  out_probs, top_prob, top_idx, label = model.classify_file(audio_path)
 
52
  result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)])
53
 
54
  return label[0], f"{top_prob.item()*100:.2f}%", result
55
+
56
+ # Gradio UI
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown("# Accent Identifier")
59
+ gr.Markdown("Upload a video/audio file or paste a YouTube URL to identify the speaker's accent.")
60
+
61
+ with gr.Row():
62
+ with gr.Column():
63
+ input_file = gr.File(label="Upload video/audio file", file_types=[".mp4", ".wav", ".mp3"])
64
+ url_input = gr.Textbox(label="...or paste a YouTube URL")
65
+ submit_btn = gr.Button("Classify Accent")
66
+
67
+ with gr.Column():
68
+ label_output = gr.Textbox(label="Top Prediction")
69
+ confidence_output = gr.Textbox(label="Confidence")
70
+ top3_output = gr.Textbox(label="Top 3 Predictions")
71
+
72
+ def handle_inputs(file, url):
73
+ if url:
74
+ return classify_accent(url)
75
+ elif file:
76
+ return classify_accent(file)
77
+ else:
78
+ return "No input", "", ""
79
+
80
+ submit_btn.click(handle_inputs, inputs=[input_file, url_input], outputs=[label_output, confidence_output, top3_output])
81
+
82
+ if __name__ == "__main__":
83
+ demo.launch()