import torch import librosa from speechbrain.inference.classifiers import EncoderClassifier from pydub import AudioSegment import gradio as gr # Load model once classifier = EncoderClassifier.from_hparams( source="Jzuluaga/accent-id-commonaccent_ecapa", savedir="/tmp/accent-id-commonaccent_ecapa" ) def classify_accent(video): audio = AudioSegment.from_file(video, format="mp4") audio.export("output.wav", format="wav") waveform, sr = librosa.load("output.wav", sr=16000, mono=True) waveform_tensor = torch.tensor(waveform).unsqueeze(0) prediction = classifier.classify_batch(waveform_tensor) _, score, _, text_lab = prediction return f"Accent: {text_lab[0]} (Confidence: {score.item():.2f})" app = gr.Interface( fn=classify_accent, inputs=gr.Video(label="Upload an MP4"), outputs=gr.Text(label="Prediction"), title="English Accent Classifier", description="Upload a short MP4 video of spoken English to detect accent." )