fahadqazi commited on
Commit
9a3e0af
·
verified ·
1 Parent(s): 083846e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -122
app.py CHANGED
@@ -1,123 +1,130 @@
1
- import gradio as gr
2
- import torch
3
- import tempfile
4
- import os
5
- import requests
6
- from moviepy import VideoFileClip
7
- from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
8
- import torchaudio
9
-
10
- # Load Whisper model to confirm English
11
- whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
12
-
13
- # Placeholder accent classifier (replace with real one or your own logic)
14
- def classify_accent(audio_tensor, sample_rate):
15
- # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
16
- # We'll fake a classification here for demonstration
17
- return {
18
- "accent": "American",
19
- "confidence": 87.2,
20
- "summary": "The speaker uses rhotic pronunciation and North American intonation."
21
- }
22
-
23
- def download_video(url):
24
- video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
25
- response = requests.get(url, stream=True)
26
- with open(video_path, "wb") as f:
27
- for chunk in response.iter_content(chunk_size=1024*1024):
28
- if chunk:
29
- f.write(chunk)
30
- return video_path
31
-
32
- def extract_audio(video_path):
33
- audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
34
- clip = VideoFileClip(video_path)
35
- clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
36
- return audio_path
37
-
38
- def transcribe(audio_path):
39
- result = whisper_pipe(audio_path)
40
- return result['text']
41
-
42
- def analyze_accent(url_or_file):
43
- try:
44
- if url_or_file.startswith("http"):
45
- video_path = download_video(url_or_file)
46
- else:
47
- video_path = url_or_file
48
-
49
- audio_path = extract_audio(video_path)
50
-
51
-
52
-
53
- # Load audio with torchaudio
54
- waveform, sample_rate = torchaudio.load(audio_path)
55
-
56
- # Transcription (to verify English)
57
- transcript = transcribe(audio_path)
58
- if len(transcript.strip()) < 3:
59
- return "Could not understand speech. Please try another video."
60
-
61
- # Accent classification
62
- result = classify_accent(waveform, sample_rate)
63
-
64
- output = f"**Accent**: {result['accent']}\n\n"
65
- output += f"**Confidence**: {result['confidence']}%\n\n"
66
- output += f"**Explanation**: {result['summary']}\n\n"
67
- output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
68
-
69
- # Clean up temp files
70
- if isinstance(url_or_file, str):
71
- os.remove(video_path)
72
- if isinstance(url_or_file, str):
73
- os.remove(video_path)
74
- if isinstance(url_or_file, str):
75
- os.remove(video_path)
76
- os.remove(audio_path)
77
-
78
- return output
79
- except Exception as e:
80
- return f"❌ Error: {str(e)}"
81
-
82
-
83
- # gr.Interface(
84
- # fn=analyze_accent,
85
- # inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
86
- # outputs=gr.Markdown(label="Accent Analysis Result"),
87
- # title="English Accent Classifier",
88
- # description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.).",
89
-
90
- # examples=[
91
- # ["https://example.com/sample.mp4"], # example URL
92
- # [open("cleo-abram.mp4", "rb")] # local file example
93
- # ],
94
- # live=True
95
- # ).launch()
96
-
97
-
98
-
99
- with gr.Blocks() as demo:
100
- gr.Markdown("# English Accent Classifier")
101
-
102
- with gr.Tab("From URL"):
103
- url_input = gr.Textbox(label="Video URL (MP4)")
104
- url_output = gr.Markdown()
105
- gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)
106
-
107
- with gr.Tab("From File"):
108
- file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
109
- file_output = gr.Markdown()
110
- gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)
111
-
112
-
113
- gr.Examples(
114
- examples=[
115
- ["cleo-abram.mp4"],
116
- ],
117
- inputs=file_input,
118
- outputs=file_output,
119
- fn=analyze_accent,
120
- label="Example MP4 Videos"
121
- )
122
-
 
 
 
 
 
 
 
123
  demo.launch()
 
1
+ import gradio as gr
2
+ import torch
3
+ import tempfile
4
+ import os
5
+ import requests
6
+ from moviepy import VideoFileClip
7
+ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
8
+ import torchaudio
9
+
10
+ # Load Whisper model to confirm English
11
+ whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
12
+
13
+ # Placeholder accent classifier (replace with real one or your own logic)
14
+ def classify_accent(audio_tensor, sample_rate):
15
+ # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
16
+ # We'll fake a classification here for demonstration
17
+ return {
18
+ "accent": "American",
19
+ "confidence": 87.2,
20
+ "summary": "The speaker uses rhotic pronunciation and North American intonation."
21
+ }
22
+
23
+ def download_video(url):
24
+ video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
25
+ response = requests.get(url, stream=True)
26
+ with open(video_path, "wb") as f:
27
+ for chunk in response.iter_content(chunk_size=1024*1024):
28
+ if chunk:
29
+ f.write(chunk)
30
+ return video_path
31
+
32
+ def extract_audio(video_path):
33
+ audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
34
+ clip = VideoFileClip(video_path)
35
+ clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
36
+ return audio_path
37
+
38
+ def transcribe(audio_path):
39
+ result = whisper_pipe(audio_path)
40
+ return result['text']
41
+
42
+ def analyze_accent(url_or_file):
43
+ try:
44
+ if url_or_file.startswith("http"):
45
+ video_path = download_video(url_or_file)
46
+ else:
47
+ video_path = url_or_file
48
+
49
+ print("Video path:", video_path)
50
+
51
+ audio_path = extract_audio(video_path)
52
+
53
+
54
+ print("Audio path:", audio_path)
55
+
56
+
57
+
58
+ # Load audio with torchaudio
59
+ waveform, sample_rate = torchaudio.load(audio_path)
60
+
61
+ # Transcription (to verify English)
62
+ transcript = transcribe(audio_path)
63
+ if len(transcript.strip()) < 3:
64
+ return "Could not understand speech. Please try another video."
65
+
66
+ # Accent classification
67
+ result = classify_accent(waveform, sample_rate)
68
+
69
+ output = f"**Accent**: {result['accent']}\n\n"
70
+ output += f"**Confidence**: {result['confidence']}%\n\n"
71
+ output += f"**Explanation**: {result['summary']}\n\n"
72
+ output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
73
+
74
+ # Clean up temp files
75
+ if isinstance(url_or_file, str):
76
+ os.remove(video_path)
77
+ if isinstance(url_or_file, str):
78
+ os.remove(video_path)
79
+ if isinstance(url_or_file, str):
80
+ os.remove(video_path)
81
+ if isinstance(url_or_file, str):
82
+ os.remove(video_path)
83
+ os.remove(audio_path)
84
+
85
+ return output
86
+ except Exception as e:
87
+ return f" Error: {str(e)}"
88
+
89
+
90
+ # gr.Interface(
91
+ # fn=analyze_accent,
92
+ # inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
93
+ # outputs=gr.Markdown(label="Accent Analysis Result"),
94
+ # title="English Accent Classifier",
95
+ # description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.).",
96
+
97
+ # examples=[
98
+ # ["https://example.com/sample.mp4"], # example URL
99
+ # [open("cleo-abram.mp4", "rb")] # local file example
100
+ # ],
101
+ # live=True
102
+ # ).launch()
103
+
104
+
105
+
106
+ with gr.Blocks() as demo:
107
+ gr.Markdown("# English Accent Classifier")
108
+
109
+ with gr.Tab("From URL"):
110
+ url_input = gr.Textbox(label="Video URL (MP4)")
111
+ url_output = gr.Markdown()
112
+ gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)
113
+
114
+ with gr.Tab("From File"):
115
+ file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
116
+ file_output = gr.Markdown()
117
+ gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)
118
+
119
+
120
+ gr.Examples(
121
+ examples=[
122
+ ["examples/cleo-abram.mp4"],
123
+ ],
124
+ inputs=file_input,
125
+ outputs=file_output,
126
+ fn=analyze_accent,
127
+ label="Example MP4 Videos"
128
+ )
129
+
130
  demo.launch()