Spaces:

usamaijaz-ai
/

accent-classifier

Sleeping

App Files Files Community

usamaijaz-ai commited on May 9

Commit

028bb6f

1 Parent(s): 14704a2

full working app with mp3 uploads.

Browse files

Files changed (5) hide show

.gitignore +95 -0
.gradio/flagged/dataset1.csv +2 -0
app.py +132 -4
requirements.txt +9 -0
templates/index.html +14 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,95 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+venv/
+ENV/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+.venv/
+venv/
+ENV/
+# PyInstaller
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff
+*.log
+# Flask stuff
+instance/
+.webassets-cache
+# Scrapy stuff
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# IPython
+profile_default/
+ipython_config.py
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre
+.pyre/
+# IDEs
+.vscode/
+.idea/
+# Heavy files
+*.h5
+*.pt
+*.pkl
+*.ckpt

.gradio/flagged/dataset1.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Upload video or paste URL,output,timestamp
2	+ ,,2025-05-10 01:38:07.612284

app.py CHANGED Viewed

@@ -1,7 +1,135 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import os
 import gradio as gr
+import torch
+import shutil
+import requests
+import subprocess
+import soundfile as sf
+from scipy.signal import resample
+from moviepy.editor import VideoFileClip, AudioFileClip
+from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
+# === Constants ===
+TEMP_VIDEO = "temp_video.mp4"
+RAW_AUDIO = "raw_audio_input"
+CONVERTED_AUDIO = "converted_audio.wav"
+MODEL_DIR = "model"
+# === Load model and feature extractor ===
+MODEL_REPO = "ylacombe/accent-classifier"
+model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_REPO, cache_dir="hf_model_cache")
+feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_REPO)
+model.eval()
+# === Dynamic label list from model config ===
+LABELS = [model.config.id2label[i] for i in range(len(model.config.id2label))]
+# === Download video from URL ===
+def download_video(url, filename=TEMP_VIDEO):
+    r = requests.get(url, stream=True)
+    r.raise_for_status()
+    with open(filename, 'wb') as f:
+        for chunk in r.iter_content(chunk_size=8192):
+            f.write(chunk)
+    return filename
+# === Extract audio from video ===
+def extract_audio_from_video(video_path, output_path=RAW_AUDIO + ".mp4"):
+    clip = VideoFileClip(video_path)
+    if clip.audio is None:
+        raise ValueError("No audio stream found in video.")
+    clip.audio.write_audiofile(output_path)
+    return output_path
+# === Convert any input audio to WAV using ffmpeg ===
+def convert_to_wav(input_path, output_path=CONVERTED_AUDIO):
+    command = ["ffmpeg", "-y", "-i", input_path, output_path]
+    subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    return output_path
+# === Run accent classification ===
+def classify_accent(audio_path):
+    waveform, sr = sf.read(audio_path)
+    if len(waveform.shape) > 1:
+        waveform = waveform.mean(axis=1)  # Convert stereo to mono
+    target_sr = 16000
+    if sr != target_sr:
+        num_samples = int(len(waveform) * target_sr / sr)
+        waveform = resample(waveform, num_samples)
+        sr = target_sr
+    inputs = feature_extractor(
+        waveform,
+        sampling_rate=sr,
+        return_tensors="pt",
+        padding=True
+    )
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits[0]
+        probs = torch.nn.functional.softmax(logits, dim=-1)
+    top_idx = torch.argmax(probs).item()
+    top_label = LABELS[top_idx]
+    top_conf = round(probs[top_idx].item(), 4)
+    top5 = torch.topk(probs, k=5)
+    top5_labels = [LABELS[i] for i in top5.indices.tolist()]
+    top5_scores = [round(p, 4) for p in top5.values.tolist()]
+    top5_text = "\n".join([f"{label}: {score}" for label, score in zip(top5_labels, top5_scores)])
+    return f"Top prediction: {top_label}", top_conf, top_label, audio_path, top5_text
+# === Main Gradio handler ===
+def process_input(video_url, uploaded_audio):
+    try:
+        audio_path = None
+        if uploaded_audio:
+            shutil.copy(uploaded_audio, RAW_AUDIO)
+            audio_path = convert_to_wav(RAW_AUDIO)
+        elif video_url and video_url.strip():
+            download_video(video_url)
+            extracted = extract_audio_from_video(TEMP_VIDEO)
+            audio_path = convert_to_wav(extracted)
+        else:
+            return "Please provide a video URL or upload an audio file.", None, None, None, None
+        return classify_accent(audio_path)
+    except Exception as e:
+        return f"Error: {str(e)}", None, None, None, None
+    finally:
+        for f in [TEMP_VIDEO, RAW_AUDIO, CONVERTED_AUDIO, RAW_AUDIO + ".mp4"]:
+            if os.path.exists(f):
+                os.remove(f)
+# === Gradio UI ===
+interface = gr.Interface(
+    fn=process_input,
+    inputs=[
+        gr.Textbox(label="Enter Loom or MP4 Video URL (optional)"),
+        gr.Audio(label="Upload MP3 or WAV (optional)", type="filepath")
+    ],
+    outputs=[
+        gr.Text(label="Prediction"),
+        gr.Number(label="Confidence Score"),
+        gr.Text(label="Accent"),
+        gr.Audio(label="Processed Audio", type="filepath"),
+        gr.Text(label="Top 5 Predictions")
+    ],
+    title="Accent Classifier",
+    description="Upload an audio file or Loom/MP4 link to detect speaker's accent with top-5 prediction breakdown."
+)
+if __name__ == "__main__":
+    interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+transformers
+torch
+torchaudio
+gradio
+moviepy==1.0.3
+requests
+safetensors
+soundfile
+scipy

templates/index.html ADDED Viewed

	@@ -0,0 +1,14 @@

+<!doctype html>
+<html lang="en">
+  <head>
+    <title>Video Audio Extractor</title>
+  </head>
+  <body>
+    <h1>Extract Audio from Loom or MP4 URL</h1>
+    <form method="POST">
+      <label for="video_url">Video URL:</label><br>
+      <input type="url" name="video_url" id="video_url" required style="width: 400px;"><br><br>
+      <button type="submit">Extract Audio</button>
+    </form>
+  </body>
+</html>