Spaces:

fahadqazi
/

accent-classifier

Running

App Files Files Community

fahadqazi commited on 22 days ago

Commit

9a3e0af

verified ·

1 Parent(s): 083846e

Upload app.py

Browse files

Files changed (1) hide show

app.py +129 -122

app.py CHANGED Viewed

@@ -1,123 +1,130 @@
-import gradio as gr
-import torch
-import tempfile
-import os
-import requests
-from moviepy import VideoFileClip
-from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
-import torchaudio
-# Load Whisper model to confirm English
-whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
-# Placeholder accent classifier (replace with real one or your own logic)
-def classify_accent(audio_tensor, sample_rate):
-    # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
-    # We'll fake a classification here for demonstration
-    return {
-        "accent": "American",
-        "confidence": 87.2,
-        "summary": "The speaker uses rhotic pronunciation and North American intonation."
-    }
-def download_video(url):
-    video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
-    response = requests.get(url, stream=True)
-    with open(video_path, "wb") as f:
-        for chunk in response.iter_content(chunk_size=1024*1024):
-            if chunk:
-                f.write(chunk)
-    return video_path
-def extract_audio(video_path):
-    audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
-    clip = VideoFileClip(video_path)
-    clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
-    return audio_path
-def transcribe(audio_path):
-    result = whisper_pipe(audio_path)
-    return result['text']
-def analyze_accent(url_or_file):
-    try:
-        if url_or_file.startswith("http"):
-            video_path = download_video(url_or_file)
-        else:
-            video_path = url_or_file
-        audio_path = extract_audio(video_path)
-        # Load audio with torchaudio
-        waveform, sample_rate = torchaudio.load(audio_path)
-        # Transcription (to verify English)
-        transcript = transcribe(audio_path)
-        if len(transcript.strip()) < 3:
-            return "Could not understand speech. Please try another video."
-        # Accent classification
-        result = classify_accent(waveform, sample_rate)
-        output = f"**Accent**: {result['accent']}\n\n"
-        output += f"**Confidence**: {result['confidence']}%\n\n"
-        output += f"**Explanation**: {result['summary']}\n\n"
-        output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
-        # Clean up temp files
-        if isinstance(url_or_file, str):
-            os.remove(video_path)
-        if isinstance(url_or_file, str):
-            os.remove(video_path)
-        if isinstance(url_or_file, str):
-            os.remove(video_path)
-        os.remove(audio_path)
-        return output
-    except Exception as e:
-        return f"❌ Error: {str(e)}"
-# gr.Interface(
-#     fn=analyze_accent,
-#     inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
-#     outputs=gr.Markdown(label="Accent Analysis Result"),
-#     title="English Accent Classifier",
-#     description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.).",
-#     examples=[
-#         ["https://example.com/sample.mp4"],  # example URL
-#         [open("cleo-abram.mp4", "rb")]  # local file example
-#     ],
-#     live=True
-# ).launch()
-with gr.Blocks() as demo:
-    gr.Markdown("# English Accent Classifier")
-    with gr.Tab("From URL"):
-        url_input = gr.Textbox(label="Video URL (MP4)")
-        url_output = gr.Markdown()
-        gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)
-    with gr.Tab("From File"):
-        file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
-        file_output = gr.Markdown()
-        gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)
-        gr.Examples(
-            examples=[
-                ["cleo-abram.mp4"],
-            ],
-            inputs=file_input,
-            outputs=file_output,
-            fn=analyze_accent,
-            label="Example MP4 Videos"
-        )
 demo.launch()

+import gradio as gr
+import torch
+import tempfile
+import os
+import requests
+from moviepy import VideoFileClip
+from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, Wav2Vec2Processor, Wav2Vec2Model
+import torchaudio
+# Load Whisper model to confirm English
+whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
+# Placeholder accent classifier (replace with real one or your own logic)
+def classify_accent(audio_tensor, sample_rate):
+    # In a real case, you'd use a fine-tuned model or wav2vec2 embeddings
+    # We'll fake a classification here for demonstration
+    return {
+        "accent": "American",
+        "confidence": 87.2,
+        "summary": "The speaker uses rhotic pronunciation and North American intonation."
+    }
+def download_video(url):
+    video_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+    response = requests.get(url, stream=True)
+    with open(video_path, "wb") as f:
+        for chunk in response.iter_content(chunk_size=1024*1024):
+            if chunk:
+                f.write(chunk)
+    return video_path
+def extract_audio(video_path):
+    audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
+    clip = VideoFileClip(video_path)
+    clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
+    return audio_path
+def transcribe(audio_path):
+    result = whisper_pipe(audio_path)
+    return result['text']
+def analyze_accent(url_or_file):
+    try:
+        if url_or_file.startswith("http"):
+            video_path = download_video(url_or_file)
+        else:
+            video_path = url_or_file
+        print("Video path:", video_path)
+        audio_path = extract_audio(video_path)
+        print("Audio path:", audio_path)
+        # Load audio with torchaudio
+        waveform, sample_rate = torchaudio.load(audio_path)
+        # Transcription (to verify English)
+        transcript = transcribe(audio_path)
+        if len(transcript.strip()) < 3:
+            return "Could not understand speech. Please try another video."
+        # Accent classification
+        result = classify_accent(waveform, sample_rate)
+        output = f"**Accent**: {result['accent']}\n\n"
+        output += f"**Confidence**: {result['confidence']}%\n\n"
+        output += f"**Explanation**: {result['summary']}\n\n"
+        output += f"**Transcript** (first 200 chars): {transcript[:200]}..."
+        # Clean up temp files
+        if isinstance(url_or_file, str):
+            os.remove(video_path)
+        if isinstance(url_or_file, str):
+            os.remove(video_path)
+        if isinstance(url_or_file, str):
+            os.remove(video_path)
+        if isinstance(url_or_file, str):
+            os.remove(video_path)
+        os.remove(audio_path)
+        return output
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# gr.Interface(
+#     fn=analyze_accent,
+#     inputs=gr.Textbox(label="Public Video URL (e.g. MP4)", placeholder="https://..."),
+#     outputs=gr.Markdown(label="Accent Analysis Result"),
+#     title="English Accent Classifier",
+#     description="Paste a video URL (MP4) to extract audio, transcribe speech, and classify the English accent (e.g., American, British, etc.).",
+#     examples=[
+#         ["https://example.com/sample.mp4"],  # example URL
+#         [open("cleo-abram.mp4", "rb")]  # local file example
+#     ],
+#     live=True
+# ).launch()
+with gr.Blocks() as demo:
+    gr.Markdown("# English Accent Classifier")
+    with gr.Tab("From URL"):
+        url_input = gr.Textbox(label="Video URL (MP4)")
+        url_output = gr.Markdown()
+        gr.Button("Analyze").click(fn=analyze_accent, inputs=url_input, outputs=url_output)
+    with gr.Tab("From File"):
+        file_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
+        file_output = gr.Markdown()
+        gr.Button("Analyze").click(fn=analyze_accent, inputs=file_input, outputs=file_output)
+        gr.Examples(
+            examples=[
+                ["examples/cleo-abram.mp4"],
+            ],
+            inputs=file_input,
+            outputs=file_output,
+            fn=analyze_accent,
+            label="Example MP4 Videos"
+        )
 demo.launch()