Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
import tempfile | |
import os | |
import requests | |
from moviepy import VideoFileClip | |
from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model | |
import torchaudio | |
# Load Whisper model to confirm English | |
whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu") | |
# Placeholder accent classifier (replace with real one or your own logic) | |
def classify_accent(audio_tensor, sample_rate): | |
# In a real case, you'd use a fine-tuned model or wav2vec2 embeddings | |
# We'll fake a classification here for demonstration | |
return { | |
"accent": "American", | |
"confidence": 87.2, | |
"summary": "The speaker uses rhotic pronunciation and North American intonation." | |
} | |
def download_video(url): | |
video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name | |
response = requests.get(url, stream=True) | |
with open(video_path, "wb") as f: | |
for chunk in response.iter_content(chunk_size=1024*1024): | |
if chunk: | |
f.write(chunk) | |
return video_path | |
def extract_audio(video_path): | |
audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name | |
clip = VideoFileClip(video_path) | |
clip.audio.write_audiofile(audio_path, codec='pcm_s16le') | |
return audio_path | |
def transcribe(audio_path): | |
result = whisper_pipe(audio_path) | |
return result['text'] | |
def analyze_accent(url): | |
try: | |
video_path = download_video(url) | |
audio_path = extract_audio(video_path) | |
# Load audio with torchaudio | |
waveform, sample_rate = torchaudio.load(audio_path) | |
# Transcription (to verify English) | |
transcript = transcribe(audio_path) | |
if len(transcript.strip()) < 3: | |
return "Could not understand speech. Please try another video." | |
# Accent classification | |
result = classify_accent(waveform, sample_rate) | |
output = f"**Accent**: {result['accent']}\n\n" | |
output += f"**Confidence**: {result['confidence']}%\n\n" | |
output += f"**Explanation**: {result['summary']}\n\n" | |
output += f"**Transcript** (first 200 chars): {transcript[:200]}..." | |
# Clean up temp files | |
os.remove(video_path) | |
os.remove(audio_path) | |
return output | |
except Exception as e: | |
return f"❌ Error: {str(e)}" | |
gr.Interface( | |
fn=analyze_accent, | |
inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."), | |
outputs=gr.Markdown(label="Accent Analysis Result"), | |
title="English Accent Classifier", | |
description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.)." | |
).launch() | |