import os import uuid import torch import requests import gradio as gr from moviepy import VideoFileClip from speechbrain.pretrained.interfaces import foreign_class # Load the pretrained model classifier = foreign_class( source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier" ) def extract_audio(video_path, output_wav="output.wav"): video = VideoFileClip(video_path) audio = video.audio audio.write_audiofile(output_wav, codec='pcm_s16le', fps=16000) return output_wav def download_video(url, filename="temp.mp4"): response = requests.get(url, stream=True) with open(filename, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) return filename def classify_video_accent(video_url): uid = str(uuid.uuid4()) video_path = f"{uid}.mp4" wav_path = f"{uid}.wav" try: download_video(video_url, video_path) extract_audio(video_path, wav_path) out_prob, score, index, text_lab = classifier.classify_file(wav_path) confidence = torch.max(out_prob).item() * 100 return { "accent": text_lab, "confidence_score": f"{confidence:.2f}%", "summary": f"The speaker is most likely using a(n) {text_lab} English accent." } finally: for f in [video_path, wav_path]: if os.path.exists(f): os.remove(f) def gradio_accent_classifier(video_url): try: result = classify_video_accent(video_url) return f"""Accent: {result['accent']} Confidence: {result['confidence_score']} Summary: {result['summary']}""" except Exception as e: return f"Error: {str(e)}" iface = gr.Interface( fn=gradio_accent_classifier, inputs=gr.Textbox(label="Public .mp4 Video URL"), outputs="text", title="English Accent Classifier", description="Paste a direct link to a public .mp4 file to classify the English accent spoken in the video." ) if __name__ == "__main__": iface.launch()