fahadqazi commited on
Commit
bdbf652
·
verified ·
1 Parent(s): 0aad0cf

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -124
app.py CHANGED
@@ -1,125 +1,133 @@
1
- import gradio as gr
2
- import torch
3
- import tempfile
4
- import os
5
- import requests
6
- from moviepy import VideoFileClip
7
- from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
8
- import torchaudio
9
-
10
- # Load Whisper model to confirm English
11
- whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
12
-
13
- # Placeholder accent classifier (replace with real one or your own logic)
14
- def classify_accent(audio_tensor, sample_rate):
15
- # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
16
- # We'll fake a classification here for demonstration
17
- return {
18
- "accent": "American",
19
- "confidence": 87.2,
20
- "summary": "The speaker uses rhotic pronunciation and North American intonation."
21
- }
22
-
23
- def download_video(url):
24
- video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
25
- response = requests.get(url, stream=True)
26
- with open(video_path, "wb") as f:
27
- for chunk in response.iter_content(chunk_size=1024*1024):
28
- if chunk:
29
- f.write(chunk)
30
- return video_path
31
-
32
- def extract_audio(video_path):
33
- audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
34
- clip = VideoFileClip(video_path)
35
- clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
36
- return audio_path
37
-
38
- def transcribe(audio_path):
39
- result = whisper_pipe(audio_path)
40
- return result['text']
41
-
42
- def analyze_accent(url_or_file):
43
- try:
44
- if isinstance(url_or_file, str):
45
- video_path = download_video(url_or_file)
46
- else:
47
- video_path = url_or_file.name # local file upload
48
-
49
- if isinstance(url_or_file, str):
50
- video_path = download_video(url_or_file)
51
- else:
52
- video_path = url_or_file.name # local file upload
53
-
54
- audio_path = extract_audio(video_path)
55
-
56
-
57
- # Load audio with torchaudio
58
- waveform, sample_rate = torchaudio.load(audio_path)
59
-
60
- # Transcription (to verify English)
61
- transcript = transcribe(audio_path)
62
- if len(transcript.strip()) < 3:
63
- return "Could not understand speech. Please try another video."
64
-
65
- # Accent classification
66
- result = classify_accent(waveform, sample_rate)
67
-
68
- output = f"**Accent**: {result['accent']}\n\n"
69
- output += f"**Confidence**: {result['confidence']}%\n\n"
70
- output += f"**Explanation**: {result['summary']}\n\n"
71
- output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
72
-
73
- # Clean up temp files
74
- if isinstance(url_or_file, str):
75
- os.remove(video_path)
76
- if isinstance(url_or_file, str):
77
- os.remove(video_path)
78
- os.remove(audio_path)
79
-
80
- return output
81
- except Exception as e:
82
- return f"❌ Error: {str(e)}"
83
-
84
-
85
- # gr.Interface(
86
- # fn=analyze_accent,
87
- # inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
88
- # outputs=gr.Markdown(label="Accent Analysis Result"),
89
- # title="English Accent Classifier",
90
- # description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.).",
91
-
92
- # examples=[
93
- # ["https://example.com/sample.mp4"], # example URL
94
- # [open("cleo-abram.mp4", "rb")] # local file example
95
- # ],
96
- # live=True
97
- # ).launch()
98
-
99
-
100
-
101
- with gr.Blocks() as demo:
102
- gr.Markdown("# English Accent Classifier")
103
-
104
- with gr.Tab("From URL"):
105
- url_input = gr.Textbox(label="Video URL (MP4)")
106
- url_output = gr.Markdown()
107
- gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)
108
-
109
- with gr.Tab("From File"):
110
- file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
111
- file_output = gr.Markdown()
112
- gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)
113
-
114
-
115
- gr.Examples(
116
- examples=[
117
- ["cleo-abram.mp4"],
118
- ],
119
- inputs=file_input,
120
- outputs=file_output,
121
- fn=analyze_accent,
122
- label="Example MP4 Videos"
123
- )
124
-
 
 
 
 
 
 
 
 
125
  demo.launch()
 
1
+ import gradio as gr
2
+ import torch
3
+ import tempfile
4
+ import os
5
+ import requests
6
+ from moviepy import VideoFileClip
7
+ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
8
+ import torchaudio
9
+
10
+ # Load Whisper model to confirm English
11
+ whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
12
+
13
+ # Placeholder accent classifier (replace with real one or your own logic)
14
+ def classify_accent(audio_tensor, sample_rate):
15
+ # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
16
+ # We'll fake a classification here for demonstration
17
+ return {
18
+ "accent": "American",
19
+ "confidence": 87.2,
20
+ "summary": "The speaker uses rhotic pronunciation and North American intonation."
21
+ }
22
+
23
+ def download_video(url):
24
+ video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
25
+ response = requests.get(url, stream=True)
26
+ with open(video_path, "wb") as f:
27
+ for chunk in response.iter_content(chunk_size=1024*1024):
28
+ if chunk:
29
+ f.write(chunk)
30
+ return video_path
31
+
32
+ def extract_audio(video_path):
33
+ audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
34
+ clip = VideoFileClip(video_path)
35
+ clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
36
+ return audio_path
37
+
38
+ def transcribe(audio_path):
39
+ result = whisper_pipe(audio_path)
40
+ return result['text']
41
+
42
+ def analyze_accent(url_or_file):
43
+ try:
44
+ if isinstance(url_or_file, str):
45
+ video_path = download_video(url_or_file)
46
+ else:
47
+ video_path = url_or_file.name # local file upload
48
+
49
+ if isinstance(url_or_file, str):
50
+ video_path = download_video(url_or_file)
51
+ else:
52
+ video_path = url_or_file.name # local file upload
53
+
54
+ if isinstance(url_or_file, str):
55
+ video_path = download_video(url_or_file)
56
+ else:
57
+ video_path = url_or_file.name # local file upload
58
+
59
+ audio_path = extract_audio(video_path)
60
+
61
+
62
+
63
+ # Load audio with torchaudio
64
+ waveform, sample_rate = torchaudio.load(audio_path)
65
+
66
+ # Transcription (to verify English)
67
+ transcript = transcribe(audio_path)
68
+ if len(transcript.strip()) < 3:
69
+ return "Could not understand speech. Please try another video."
70
+
71
+ # Accent classification
72
+ result = classify_accent(waveform, sample_rate)
73
+
74
+ output = f"**Accent**: {result['accent']}\n\n"
75
+ output += f"**Confidence**: {result['confidence']}%\n\n"
76
+ output += f"**Explanation**: {result['summary']}\n\n"
77
+ output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
78
+
79
+ # Clean up temp files
80
+ if isinstance(url_or_file, str):
81
+ os.remove(video_path)
82
+ if isinstance(url_or_file, str):
83
+ os.remove(video_path)
84
+ if isinstance(url_or_file, str):
85
+ os.remove(video_path)
86
+ os.remove(audio_path)
87
+
88
+ return output
89
+ except Exception as e:
90
+ return f" Error: {str(e)}"
91
+
92
+
93
+ # gr.Interface(
94
+ # fn=analyze_accent,
95
+ # inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
96
+ # outputs=gr.Markdown(label="Accent Analysis Result"),
97
+ # title="English Accent Classifier",
98
+ # description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.).",
99
+
100
+ # examples=[
101
+ # ["https://example.com/sample.mp4"], # example URL
102
+ # [open("cleo-abram.mp4", "rb")] # local file example
103
+ # ],
104
+ # live=True
105
+ # ).launch()
106
+
107
+
108
+
109
+ with gr.Blocks() as demo:
110
+ gr.Markdown("# English Accent Classifier")
111
+
112
+ with gr.Tab("From URL"):
113
+ url_input = gr.Textbox(label="Video URL (MP4)")
114
+ url_output = gr.Markdown()
115
+ gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)
116
+
117
+ with gr.Tab("From File"):
118
+ file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
119
+ file_output = gr.Markdown()
120
+ gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)
121
+
122
+
123
+ gr.Examples(
124
+ examples=[
125
+ [open("cleo-abram.mp4", 'rb')],
126
+ ],
127
+ inputs=file_input,
128
+ outputs=file_output,
129
+ fn=analyze_accent,
130
+ label="Example MP4 Videos"
131
+ )
132
+
133
  demo.launch()