fahadqazi commited on
Commit
f8a135a
Β·
verified Β·
1 Parent(s): 2845ac7

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. README.md +14 -14
  3. app.py +106 -77
  4. cleo-abram.mp4 +3 -0
  5. requirements.txt +7 -7
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ cleo-abram.mp4 filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,14 @@
1
- ---
2
- title: Accent Classifier
3
- emoji: πŸ“ˆ
4
- colorFrom: red
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 5.30.0
8
- app_file: app.py
9
- pinned: false
10
- license: other
11
- short_description: Detects & classifies accents of English speakers.
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Accent Classifier
3
+ emoji: πŸ“ˆ
4
+ colorFrom: red
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 5.30.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: other
11
+ short_description: Detects & classifies accents of English speakers.
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,77 +1,106 @@
1
- import gradio as gr
2
- import torch
3
- import tempfile
4
- import os
5
- import requests
6
- from moviepy import VideoFileClip
7
- from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
8
- import torchaudio
9
-
10
- # Load Whisper model to confirm English
11
- whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
12
-
13
- # Placeholder accent classifier (replace with real one or your own logic)
14
- def classify_accent(audio_tensor, sample_rate):
15
- # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
16
- # We'll fake a classification here for demonstration
17
- return {
18
- "accent": "American",
19
- "confidence": 87.2,
20
- "summary": "The speaker uses rhotic pronunciation and North American intonation."
21
- }
22
-
23
- def download_video(url):
24
- video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
25
- response = requests.get(url, stream=True)
26
- with open(video_path, "wb") as f:
27
- for chunk in response.iter_content(chunk_size=1024*1024):
28
- if chunk:
29
- f.write(chunk)
30
- return video_path
31
-
32
- def extract_audio(video_path):
33
- audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
34
- clip = VideoFileClip(video_path)
35
- clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
36
- return audio_path
37
-
38
- def transcribe(audio_path):
39
- result = whisper_pipe(audio_path)
40
- return result['text']
41
-
42
- def analyze_accent(url):
43
- try:
44
- video_path = download_video(url)
45
- audio_path = extract_audio(video_path)
46
-
47
- # Load audio with torchaudio
48
- waveform, sample_rate = torchaudio.load(audio_path)
49
-
50
- # Transcription (to verify English)
51
- transcript = transcribe(audio_path)
52
- if len(transcript.strip()) < 3:
53
- return "Could not understand speech. Please try another video."
54
-
55
- # Accent classification
56
- result = classify_accent(waveform, sample_rate)
57
-
58
- output = f"**Accent**: {result['accent']}\n\n"
59
- output += f"**Confidence**: {result['confidence']}%\n\n"
60
- output += f"**Explanation**: {result['summary']}\n\n"
61
- output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
62
-
63
- # Clean up temp files
64
- os.remove(video_path)
65
- os.remove(audio_path)
66
-
67
- return output
68
- except Exception as e:
69
- return f"❌ Error: {str(e)}"
70
-
71
- gr.Interface(
72
- fn=analyze_accent,
73
- inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
74
- outputs=gr.Markdown(label="Accent Analysis Result"),
75
- title="English Accent Classifier",
76
- description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.)."
77
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import tempfile
4
+ import os
5
+ import requests
6
+ from moviepy import VideoFileClip
7
+ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
8
+ import torchaudio
9
+
10
+ # Load Whisper model to confirm English
11
+ whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
12
+
13
+ # Placeholder accent classifier (replace with real one or your own logic)
14
+ def classify_accent(audio_tensor, sample_rate):
15
+ # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
16
+ # We'll fake a classification here for demonstration
17
+ return {
18
+ "accent": "American",
19
+ "confidence": 87.2,
20
+ "summary": "The speaker uses rhotic pronunciation and North American intonation."
21
+ }
22
+
23
+ def download_video(url):
24
+ video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
25
+ response = requests.get(url, stream=True)
26
+ with open(video_path, "wb") as f:
27
+ for chunk in response.iter_content(chunk_size=1024*1024):
28
+ if chunk:
29
+ f.write(chunk)
30
+ return video_path
31
+
32
+ def extract_audio(video_path):
33
+ audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
34
+ clip = VideoFileClip(video_path)
35
+ clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
36
+ return audio_path
37
+
38
+ def transcribe(audio_path):
39
+ result = whisper_pipe(audio_path)
40
+ return result['text']
41
+
42
+ def analyze_accent(url_or_file):
43
+ try:
44
+ if isinstance(url_or_file, str):
45
+ video_path = download_video(url_or_file)
46
+ else:
47
+ video_path = url_or_file.name # local file upload
48
+
49
+ audio_path = extract_audio(video_path)
50
+
51
+ # Load audio with torchaudio
52
+ waveform, sample_rate = torchaudio.load(audio_path)
53
+
54
+ # Transcription (to verify English)
55
+ transcript = transcribe(audio_path)
56
+ if len(transcript.strip()) < 3:
57
+ return "Could not understand speech. Please try another video."
58
+
59
+ # Accent classification
60
+ result = classify_accent(waveform, sample_rate)
61
+
62
+ output = f"**Accent**: {result['accent']}\n\n"
63
+ output += f"**Confidence**: {result['confidence']}%\n\n"
64
+ output += f"**Explanation**: {result['summary']}\n\n"
65
+ output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
66
+
67
+ # Clean up temp files
68
+ if isinstance(url_or_file, str):
69
+ os.remove(video_path)
70
+ os.remove(audio_path)
71
+
72
+ return output
73
+ except Exception as e:
74
+ return f"❌ Error: {str(e)}"
75
+
76
+
77
+ # gr.Interface(
78
+ # fn=analyze_accent,
79
+ # inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
80
+ # outputs=gr.Markdown(label="Accent Analysis Result"),
81
+ # title="English Accent Classifier",
82
+ # description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.).",
83
+
84
+ # examples=[
85
+ # ["https://example.com/sample.mp4"], # example URL
86
+ # [open("cleo-abram.mp4", "rb")] # local file example
87
+ # ],
88
+ # live=True
89
+ # ).launch()
90
+
91
+
92
+
93
+ with gr.Blocks() as demo:
94
+ gr.Markdown("# English Accent Classifier")
95
+
96
+ with gr.Tab("From URL"):
97
+ url_input = gr.Textbox(label="Video URL (MP4)")
98
+ url_output = gr.Markdown()
99
+ gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)
100
+
101
+ with gr.Tab("From File"):
102
+ file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
103
+ file_output = gr.Markdown()
104
+ gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)
105
+
106
+ demo.launch()
cleo-abram.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a75606c3d58f1020cc2d07a7f4ade9898bb1ca2388c06d117480e529cc726c1e
3
+ size 4035126
requirements.txt CHANGED
@@ -1,8 +1,8 @@
1
- gradio
2
- torch
3
- transformers
4
- torchaudio
5
- moviepy
6
- ffmpeg-python
7
- requests
8
  yt_dlp
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ torchaudio
5
+ moviepy
6
+ ffmpeg-python
7
+ requests
8
  yt_dlp