accent-detector / app.py
Pheire's picture
Update app.py
0467024 verified
import gradio as gr
from speechbrain.inference import EncoderClassifier
import torch
import requests
import subprocess
import os
import uuid
import yt_dlp
model = None # Lazy-loaded model
def get_model():
global model
if model is None:
model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa")
return model
def extract_id_from_url(url):
urlRet = url.split("/")[-1]
if '?' in urlRet:
urlRet = urlRet.split("?")[0]
return urlRet
def fetch_loom_download_url(id):
response = requests.post(url=f"https://www.loom.com/api/campaigns/sessions/{id}/transcoded-url")
if response.status_code == 200:
return response.json()["url"]
else:
print("Error while retrieving response: ", response.status_code)
exit
def download_loom_video(url, filename):
headers = {
"User-Agent": "Mozilla/5.0"
}
try:
with requests.get(url, headers=headers, stream=True) as response:
response.raise_for_status()
with open(filename, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
print(f"Downloaded video to {filename}")
return filename
except requests.exceptions.RequestException as e:
print(f"Failed to download Loom video: {e}")
return None
def download_direct_mp4(url, filename):
try:
response = requests.get(url, stream=True)
response.raise_for_status()
with open(filename, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return filename
except Exception as e:
print(f"Error downloading direct mp4: {e}")
return None
def download_video_from_url(url):
if "loom.com" in url:
video_id = extract_id_from_url(url)
print(video_id)
direct_url = fetch_loom_download_url(video_id)
print(direct_url)
filename = f"LoomVideo_{video_id}.mp4"
success = download_loom_video(direct_url, filename)
print(success)
return filename if success else None
elif url.endswith(".mp4"):
filename = f"video_{uuid.uuid4()}.mp4"
result = download_direct_mp4(url, filename)
return result
else:
# fallback to yt_dlp for youtube, vimeo, etc.
out_path = f"video_{uuid.uuid4()}.mp4"
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': out_path,
'quiet': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return out_path
def extract_audio(video_file):
audio_path = f"audio_{uuid.uuid4()}.wav"
cmd = [
"ffmpeg", "-i", video_file, "-vn",
"-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000",
audio_path, "-y"
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return audio_path
def classify_accent(input_file_or_url):
model = get_model()
# Check if it's a URL
if isinstance(input_file_or_url, str) and input_file_or_url.startswith("http"):
video_path = download_video_from_url(input_file_or_url)
else:
video_path = input_file_or_url.name if hasattr(input_file_or_url, "name") else input_file_or_url
audio_path = extract_audio(video_path)
out_probs, top_prob, top_idx, label = model.classify_file(audio_path)
top_labels = model.hparams.label_encoder.decode_ndim(torch.topk(out_probs, 3).indices.squeeze())
confidences = torch.topk(out_probs, 3).values.squeeze().tolist()
result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)])
return label[0], f"{top_prob.item()*100:.2f}%", result
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Accent Identifier")
gr.Markdown(
"Upload a video or audio file, or paste a link (e.g. direct .mp4 URL or Loom video) to identify the speaker's accent."
)
with gr.Row():
with gr.Column():
input_file = gr.File(label="Upload video/audio file", file_types=[".mp4", ".wav", ".mp3"])
url_input = gr.Textbox(label="...or paste a direct mp4 URL/loom link")
submit_btn = gr.Button("Classify Accent")
with gr.Column():
label_output = gr.Textbox(label="Top Prediction")
confidence_output = gr.Textbox(label="Confidence")
top3_output = gr.Textbox(label="Top 3 Predictions")
def handle_inputs(file, url):
if url:
return classify_accent(url)
elif file:
return classify_accent(file)
else:
return "No input", "", ""
submit_btn.click(handle_inputs, inputs=[input_file, url_input], outputs=[label_output, confidence_output, top3_output])
if __name__ == "__main__":
demo.launch(share=True)