fahadqazi commited on
Commit
2b45ac4
·
1 Parent(s): 65089f3
Files changed (2) hide show
  1. app.py +87 -4
  2. requirements.txt +8 -0
app.py CHANGED
@@ -1,7 +1,90 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ import tempfile
4
+ import os
5
+ import requests
6
+ from moviepy import VideoFileClip
7
+ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
8
+ import torchaudio
9
+ import yt_dlp as youtube_dl
10
 
11
+ # Load Whisper model to confirm English
12
+ whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
13
 
14
+ # Placeholder accent classifier (replace with real one or your own logic)
15
+ def classify_accent(audio_tensor, sample_rate):
16
+ # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
17
+ # We'll fake a classification here for demonstration
18
+ return {
19
+ "accent": "American",
20
+ "confidence": 87.2,
21
+ "summary": "The speaker uses rhotic pronunciation and North American intonation."
22
+ }
23
+
24
+ def download_video(url):
25
+ if "youtube.com" in url or "youtu.be" in url:
26
+ ydl_opts = {
27
+ 'format': 'best[ext=mp4]',
28
+ 'outtmpl': tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name,
29
+ 'quiet': True,
30
+ 'noplaylist': True,
31
+ }
32
+
33
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
34
+ info = ydl.extract_info(url, download=True)
35
+ return ydl.prepare_filename(info)
36
+ else:
37
+ video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
38
+ response = requests.get(url, stream=True)
39
+ with open(video_path, "wb") as f:
40
+ for chunk in response.iter_content(chunk_size=1024*1024):
41
+ if chunk:
42
+ f.write(chunk)
43
+ return video_path
44
+
45
+ def extract_audio(video_path):
46
+ audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
47
+ clip = VideoFileClip(video_path)
48
+ clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
49
+ return audio_path
50
+
51
+ def transcribe(audio_path):
52
+ result = whisper_pipe(audio_path)
53
+ return result['text']
54
+
55
+ def analyze_accent(url):
56
+ try:
57
+ video_path = download_video(url)
58
+ audio_path = extract_audio(video_path)
59
+
60
+ # Load audio with torchaudio
61
+ waveform, sample_rate = torchaudio.load(audio_path)
62
+
63
+ # Transcription (to verify English)
64
+ transcript = transcribe(audio_path)
65
+ if len(transcript.strip()) < 3:
66
+ return "Could not understand speech. Please try another video."
67
+
68
+ # Accent classification
69
+ result = classify_accent(waveform, sample_rate)
70
+
71
+ output = f"**Accent**: {result['accent']}\n\n"
72
+ output += f"**Confidence**: {result['confidence']}%\n\n"
73
+ output += f"**Explanation**: {result['summary']}\n\n"
74
+ output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
75
+
76
+ # Clean up temp files
77
+ os.remove(video_path)
78
+ os.remove(audio_path)
79
+
80
+ return output
81
+ except Exception as e:
82
+ return f"❌ Error: {str(e)}"
83
+
84
+ gr.Interface(
85
+ fn=analyze_accent,
86
+ inputs=gr.Textbox(label="Public Video URL (e.g. MP4, Loom)", placeholder="https://..."),
87
+ outputs=gr.Markdown(label="Accent Analysis Result"),
88
+ title="English Accent Classifier",
89
+ description="Paste a video URL (MP4/Loom) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.)."
90
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ torchaudio
5
+ moviepy
6
+ ffmpeg-python
7
+ requests
8
+ yt_dlp