Spaces:

Pheire
/

accent-detector

Sleeping

App Files Files Community

Pheire commited on May 23

Commit

2109747

verified ·

1 Parent(s): 5dd29f5

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -5

app.py CHANGED Viewed

@@ -6,7 +6,13 @@ import os
 import uuid
 import yt_dlp
-model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa")
 def download_video_from_url(url):
     out_path = f"video_{uuid.uuid4()}.mp4"
@@ -20,7 +26,7 @@ def download_video_from_url(url):
     return out_path
 def extract_audio(video_file):
-    audio_path = "audio.wav"
     cmd = [
         "ffmpeg", "-i", video_file, "-vn",
         "-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000",
@@ -29,13 +35,14 @@ def extract_audio(video_file):
     subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return audio_path
 def classify_accent(input_file_or_url):
     # Check if it's a URL
-    if input_file_or_url.startswith("http"):
         video_path = download_video_from_url(input_file_or_url)
     else:
-        video_path = input_file_or_url
     audio_path = extract_audio(video_path)
     out_probs, top_prob, top_idx, label = model.classify_file(audio_path)
@@ -45,3 +52,32 @@ def classify_accent(input_file_or_url):
     result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)])
     return label[0], f"{top_prob.item()*100:.2f}%", result

 import uuid
 import yt_dlp
+model = None  # Lazy-loaded model
+def get_model():
+    global model
+    if model is None:
+        model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa")
+    return model
 def download_video_from_url(url):
     out_path = f"video_{uuid.uuid4()}.mp4"
     return out_path
 def extract_audio(video_file):
+    audio_path = f"audio_{uuid.uuid4()}.wav"
     cmd = [
         "ffmpeg", "-i", video_file, "-vn",
         "-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000",
     subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return audio_path
 def classify_accent(input_file_or_url):
+    model = get_model()
     # Check if it's a URL
+    if isinstance(input_file_or_url, str) and input_file_or_url.startswith("http"):
         video_path = download_video_from_url(input_file_or_url)
     else:
+        video_path = input_file_or_url.name if hasattr(input_file_or_url, "name") else input_file_or_url
     audio_path = extract_audio(video_path)
     out_probs, top_prob, top_idx, label = model.classify_file(audio_path)
     result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)])
     return label[0], f"{top_prob.item()*100:.2f}%", result
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# Accent Identifier")
+    gr.Markdown("Upload a video/audio file or paste a YouTube URL to identify the speaker's accent.")
+    with gr.Row():
+        with gr.Column():
+            input_file = gr.File(label="Upload video/audio file", file_types=[".mp4", ".wav", ".mp3"])
+            url_input = gr.Textbox(label="...or paste a YouTube URL")
+            submit_btn = gr.Button("Classify Accent")
+        with gr.Column():
+            label_output = gr.Textbox(label="Top Prediction")
+            confidence_output = gr.Textbox(label="Confidence")
+            top3_output = gr.Textbox(label="Top 3 Predictions")
+    def handle_inputs(file, url):
+        if url:
+            return classify_accent(url)
+        elif file:
+            return classify_accent(file)
+        else:
+            return "No input", "", ""
+    submit_btn.click(handle_inputs, inputs=[input_file, url_input], outputs=[label_output, confidence_output, top3_output])
+if __name__ == "__main__":
+    demo.launch()