Spaces:
Running
Running
import gradio as gr | |
from speechbrain.inference import EncoderClassifier | |
import torch | |
import requests | |
import subprocess | |
import os | |
import uuid | |
import yt_dlp | |
model = None # Lazy-loaded model | |
def get_model(): | |
global model | |
if model is None: | |
model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa") | |
return model | |
def extract_id_from_url(url): | |
urlRet = url.split("/")[-1] | |
if '?' in urlRet: | |
urlRet = urlRet.split("?")[0] | |
return urlRet | |
def fetch_loom_download_url(id): | |
response = requests.post(url=f"https://www.loom.com/api/campaigns/sessions/{id}/transcoded-url") | |
if response.status_code == 200: | |
return response.json()["url"] | |
else: | |
print("Error while retrieving response: ", response.status_code) | |
exit | |
def download_loom_video(url, filename): | |
headers = { | |
"User-Agent": "Mozilla/5.0" | |
} | |
try: | |
with requests.get(url, headers=headers, stream=True) as response: | |
response.raise_for_status() | |
with open(filename, "wb") as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
if chunk: | |
f.write(chunk) | |
print(f"Downloaded video to {filename}") | |
return filename | |
except requests.exceptions.RequestException as e: | |
print(f"Failed to download Loom video: {e}") | |
return None | |
def download_direct_mp4(url, filename): | |
try: | |
response = requests.get(url, stream=True) | |
response.raise_for_status() | |
with open(filename, "wb") as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
if chunk: | |
f.write(chunk) | |
return filename | |
except Exception as e: | |
print(f"Error downloading direct mp4: {e}") | |
return None | |
def download_video_from_url(url): | |
if "loom.com" in url: | |
video_id = extract_id_from_url(url) | |
print(video_id) | |
direct_url = fetch_loom_download_url(video_id) | |
print(direct_url) | |
filename = f"LoomVideo_{video_id}.mp4" | |
success = download_loom_video(direct_url, filename) | |
print(success) | |
return filename if success else None | |
elif url.endswith(".mp4"): | |
filename = f"video_{uuid.uuid4()}.mp4" | |
result = download_direct_mp4(url, filename) | |
return result | |
else: | |
# fallback to yt_dlp for youtube, vimeo, etc. | |
out_path = f"video_{uuid.uuid4()}.mp4" | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'outtmpl': out_path, | |
'quiet': True, | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) | |
return out_path | |
def extract_audio(video_file): | |
audio_path = f"audio_{uuid.uuid4()}.wav" | |
cmd = [ | |
"ffmpeg", "-i", video_file, "-vn", | |
"-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000", | |
audio_path, "-y" | |
] | |
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
return audio_path | |
def classify_accent(input_file_or_url): | |
model = get_model() | |
# Check if it's a URL | |
if isinstance(input_file_or_url, str) and input_file_or_url.startswith("http"): | |
video_path = download_video_from_url(input_file_or_url) | |
else: | |
video_path = input_file_or_url.name if hasattr(input_file_or_url, "name") else input_file_or_url | |
audio_path = extract_audio(video_path) | |
out_probs, top_prob, top_idx, label = model.classify_file(audio_path) | |
top_labels = model.hparams.label_encoder.decode_ndim(torch.topk(out_probs, 3).indices.squeeze()) | |
confidences = torch.topk(out_probs, 3).values.squeeze().tolist() | |
result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)]) | |
return label[0], f"{top_prob.item()*100:.2f}%", result | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("# Accent Identifier") | |
gr.Markdown( | |
"Upload a video or audio file, or paste a link (e.g. direct .mp4 URL or Loom video) to identify the speaker's accent." | |
) | |
with gr.Row(): | |
with gr.Column(): | |
input_file = gr.File(label="Upload video/audio file", file_types=[".mp4", ".wav", ".mp3"]) | |
url_input = gr.Textbox(label="...or paste a direct mp4 URL/loom link") | |
submit_btn = gr.Button("Classify Accent") | |
with gr.Column(): | |
label_output = gr.Textbox(label="Top Prediction") | |
confidence_output = gr.Textbox(label="Confidence") | |
top3_output = gr.Textbox(label="Top 3 Predictions") | |
def handle_inputs(file, url): | |
if url: | |
return classify_accent(url) | |
elif file: | |
return classify_accent(file) | |
else: | |
return "No input", "", "" | |
submit_btn.click(handle_inputs, inputs=[input_file, url_input], outputs=[label_output, confidence_output, top3_output]) | |
if __name__ == "__main__": | |
demo.launch(share=True) | |