Spaces:

fahadqazi
/

accent-classifier

Running

App Files Files Community

fahadqazi commited on 20 days ago

Commit

f8a135a

verified ·

1 Parent(s): 2845ac7

Upload 4 files

Browse files

Files changed (5) hide show

.gitattributes +1 -0
README.md +14 -14
app.py +106 -77
cleo-abram.mp4 +3 -0
requirements.txt +7 -7

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+cleo-abram.mp4 filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,14 +1,14 @@
----
-title: Accent Classifier
-emoji: 📈
-colorFrom: red
-colorTo: pink
-sdk: gradio
-sdk_version: 5.30.0
-app_file: app.py
-pinned: false
-license: other
-short_description: Detects & classifies  accents of English speakers.
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Accent Classifier
+emoji: 📈
+colorFrom: red
+colorTo: pink
+sdk: gradio
+sdk_version: 5.30.0
+app_file: app.py
+pinned: false
+license: other
+short_description: Detects & classifies  accents of English speakers.
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,77 +1,106 @@
-import gradio as gr
-import torch
-import tempfile
-import os
-import requests
-from moviepy import VideoFileClip
-from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
-import torchaudio
-# Load Whisper model to confirm English
-whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
-# Placeholder accent classifier (replace with real one or your own logic)
-def classify_accent(audio_tensor, sample_rate):
-    # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
-    # We'll fake a classification here for demonstration
-    return {
-        "accent": "American",
-        "confidence": 87.2,
-        "summary": "The speaker uses rhotic pronunciation and North American intonation."
-    }
-def download_video(url):
-    video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
-    response = requests.get(url, stream=True)
-    with open(video_path, "wb") as f:
-        for chunk in response.iter_content(chunk_size=1024*1024):
-            if chunk:
-                f.write(chunk)
-    return video_path
-def extract_audio(video_path):
-    audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
-    clip = VideoFileClip(video_path)
-    clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
-    return audio_path
-def transcribe(audio_path):
-    result = whisper_pipe(audio_path)
-    return result['text']
-def analyze_accent(url):
-    try:
-        video_path = download_video(url)
-        audio_path = extract_audio(video_path)
-        # Load audio with torchaudio
-        waveform, sample_rate = torchaudio.load(audio_path)
-        # Transcription (to verify English)
-        transcript = transcribe(audio_path)
-        if len(transcript.strip()) < 3:
-            return "Could not understand speech. Please try another video."
-        # Accent classification
-        result = classify_accent(waveform, sample_rate)
-        output = f"**Accent**: {result['accent']}\n\n"
-        output += f"**Confidence**: {result['confidence']}%\n\n"
-        output += f"**Explanation**: {result['summary']}\n\n"
-        output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
-        # Clean up temp files
-        os.remove(video_path)
-        os.remove(audio_path)
-        return output
-    except Exception as e:
-        return f"❌ Error: {str(e)}"
-gr.Interface(
-    fn=analyze_accent,
-    inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
-    outputs=gr.Markdown(label="Accent Analysis Result"),
-    title="English Accent Classifier",
-    description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.)."
-).launch()

+import gradio as gr
+import torch
+import tempfile
+import os
+import requests
+from moviepy import VideoFileClip
+from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
+import torchaudio
+# Load Whisper model to confirm English
+whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
+# Placeholder accent classifier (replace with real one or your own logic)
+def classify_accent(audio_tensor, sample_rate):
+    # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
+    # We'll fake a classification here for demonstration
+    return {
+        "accent": "American",
+        "confidence": 87.2,
+        "summary": "The speaker uses rhotic pronunciation and North American intonation."
+    }
+def download_video(url):
+    video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+    response = requests.get(url, stream=True)
+    with open(video_path, "wb") as f:
+        for chunk in response.iter_content(chunk_size=1024*1024):
+            if chunk:
+                f.write(chunk)
+    return video_path
+def extract_audio(video_path):
+    audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
+    clip = VideoFileClip(video_path)
+    clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
+    return audio_path
+def transcribe(audio_path):
+    result = whisper_pipe(audio_path)
+    return result['text']
+def analyze_accent(url_or_file):
+    try:
+        if isinstance(url_or_file, str):
+            video_path = download_video(url_or_file)
+        else:
+            video_path = url_or_file.name  # local file upload
+        audio_path = extract_audio(video_path)
+        # Load audio with torchaudio
+        waveform, sample_rate = torchaudio.load(audio_path)
+        # Transcription (to verify English)
+        transcript = transcribe(audio_path)
+        if len(transcript.strip()) < 3:
+            return "Could not understand speech. Please try another video."
+        # Accent classification
+        result = classify_accent(waveform, sample_rate)
+        output = f"**Accent**: {result['accent']}\n\n"
+        output += f"**Confidence**: {result['confidence']}%\n\n"
+        output += f"**Explanation**: {result['summary']}\n\n"
+        output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
+        # Clean up temp files
+        if isinstance(url_or_file, str):
+            os.remove(video_path)
+        os.remove(audio_path)
+        return output
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# gr.Interface(
+#     fn=analyze_accent,
+#     inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
+#     outputs=gr.Markdown(label="Accent Analysis Result"),
+#     title="English Accent Classifier",
+#     description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.).",
+#     examples=[
+#         ["https://example.com/sample.mp4"],  # example URL
+#         [open("cleo-abram.mp4", "rb")]  # local file example
+#     ],
+#     live=True
+# ).launch()
+with gr.Blocks() as demo:
+    gr.Markdown("# English Accent Classifier")
+    with gr.Tab("From URL"):
+        url_input = gr.Textbox(label="Video URL (MP4)")
+        url_output = gr.Markdown()
+        gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)
+    with gr.Tab("From File"):
+        file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
+        file_output = gr.Markdown()
+        gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)
+demo.launch()

cleo-abram.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a75606c3d58f1020cc2d07a7f4ade9898bb1ca2388c06d117480e529cc726c1e
+size 4035126

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
-gradio
-torch
-transformers
-torchaudio
-moviepy
-ffmpeg-python
-requests
 yt_dlp

+gradio
+torch
+transformers
+torchaudio
+moviepy
+ffmpeg-python
+requests
 yt_dlp