Spaces:

Sajidahamed
/

AccentClassification

Sleeping

App Files Files Community

Sajidahamed commited on May 29

Commit

7cebfba

verified ·

1 Parent(s): 98c4440

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -27

app.py CHANGED Viewed

@@ -1,32 +1,44 @@
-import streamlit as st
 import os
-import subprocess
 import torchaudio
 from speechbrain.pretrained import EncoderClassifier
-st.title("🗣️ English Accent Classifier (Proof of Concept)")
-url = st.text_input("Enter public video URL (YouTube or direct MP4):")
-if st.button("Analyze"):
-    with st.spinner("Downloading video..."):
-        if "youtube.com" in url or "youtu.be" in url:
-            os.system(f'yt-dlp -o input_video.mp4 "{url}"')
-        else:
-            os.system(f'wget -O input_video.mp4 "{url}"')
-    with st.spinner("Extracting audio..."):
-        os.system("ffmpeg -y -i input_video.mp4 -ar 16000 -ac 1 -vn audio.wav")
-    with st.spinner("Classifying accent..."):
-        accent_model = EncoderClassifier.from_hparams(
-            source="speechbrain/lang-id-commonlanguage_ecapa",
-            savedir="tmp_accent_model"
-        )
-        signal, fs = torchaudio.load("audio.wav")
-        if signal.shape[0] > 1:
-            signal = signal[0].unsqueeze(0)
-        prediction = accent_model.classify_batch(signal)
-        pred_label = prediction[3][0]
-        pred_scores = prediction[1][0]
-        confidence = float(pred_scores.max()) * 100
-        st.success(f"Predicted Accent: {pred_label} ({confidence:.1f}%)")
-        st.info(f"The model is {confidence:.0f}% confident this is a {pred_label} English accent.")

+import gradio as gr
 import os
 import torchaudio
 from speechbrain.pretrained import EncoderClassifier
+def accent_detect(video_file):
+    # Save uploaded video
+    if isinstance(video_file, tuple):
+        video_path = video_file[0]
+    else:
+        video_path = "uploaded_input.mp4"
+        with open(video_path, "wb") as f:
+            f.write(video_file.read())
+    # Extract audio
+    os.system(f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn audio.wav")
+    if not os.path.exists("audio.wav") or os.path.getsize("audio.wav") < 1000:
+        return "Audio extraction failed. Please check your file."
+    # Classify accent
+    accent_model = EncoderClassifier.from_hparams(
+        source="speechbrain/lang-id-commonlanguage_ecapa",
+        savedir="tmp_accent_model"
+    )
+    signal, fs = torchaudio.load("audio.wav")
+    if signal.shape[0] > 1:
+        signal = signal[0].unsqueeze(0)
+    prediction = accent_model.classify_batch(signal)
+    pred_label = prediction[3][0]
+    pred_scores = prediction[1][0]
+    confidence = float(pred_scores.max()) * 100
+    explanation = f"Predicted Accent: {pred_label} ({confidence:.1f}%)\nThe model is {confidence:.0f}% confident this is a {pred_label} English accent."
+    return explanation
+demo = gr.Interface(
+    fn=accent_detect,
+    inputs=gr.Video(type="filepath", label="Upload a Video File (MP4, WEBM, etc.)"),
+    outputs="text",
+    title="🗣️ English Accent Classifier (Gradio Demo)",
+    description="Upload a short video clip of English speech. This tool predicts the English accent and confidence."
+)
+if __name__ == "__main__":
+    demo.launch()