Spaces:
Running
Running
File size: 6,060 Bytes
f19db96 b8a2d43 f19db96 6b0d633 b8a2d43 6b0d633 b8a2d43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import streamlit as st
import os
import tempfile
import torch
import json
import urllib.request
from urllib.parse import urlparse
from moviepy import VideoFileClip, AudioFileClip
from speechbrain.pretrained.interfaces import foreign_class
import yt_dlp
from pydub import AudioSegment
from pydub.silence import detect_nonsilent
model_dir = "/tmp/pretrained_models"
os.makedirs(model_dir, exist_ok=True)
# Load model once
classifier = foreign_class(
source="Jzuluaga/accent-id-commonaccent_xlsr-en-english",
pymodule_file="custom_interface.py",
classname="CustomEncoderWav2vec2Classifier",
savedir=model_dir
)
def extract_loom_id(url):
parsed_url = urlparse(url)
return parsed_url.path.split("/")[-1]
def download_loom_video(url, filename):
try:
video_id = extract_loom_id(url)
request = urllib.request.Request(
url=f"https://www.loom.com/api/campaigns/sessions/{video_id}/transcoded-url",
headers={},
method="POST"
)
response = urllib.request.urlopen(request)
body = response.read()
content = json.loads(body.decode("utf-8"))
video_url = content["url"]
urllib.request.urlretrieve(video_url, filename)
return filename
except Exception as e:
raise RuntimeError(f"Failed to download video from Loom: {e}")
def download_youtube_audio(url):
try:
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'yt_audio.%(ext)s',
'quiet': True,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '64',
}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
audioclip = AudioFileClip("yt_audio.mp3")
wav_path = "output.wav"
audioclip.write_audiofile(wav_path, logger=None)
audioclip.close()
os.remove("yt_audio.mp3")
return wav_path
except Exception as e:
raise RuntimeError(f"Failed to download from YouTube: {e}")
def download_direct_video(url):
try:
response = urllib.request.urlopen(url)
if response.status != 200:
raise RuntimeError("Failed to download video.")
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
temp_file.write(response.read())
return temp_file.name
except Exception as e:
raise RuntimeError(f"Failed to download video : {e}")
def extract_audio(video_path):
try:
clip = VideoFileClip(video_path)
# audio_clip = clip.audio.subclip(0, min(duration, clip.duration)) # ambil 10 detik awal atau durasi video kalau kurang
wav_path = video_path.replace(".mp4", ".wav")
clip.audio.write_audiofile(wav_path)
return wav_path
except Exception as e:
raise RuntimeError(f"Fail to extract the video : {e}")
def get_speech_segments(audio_path, min_silence_len=700, silence_thresh=-40, duration=10000):
"""
Get speech segments with absolute position
Detects non-silent parts in audio with precise timing
"""
audio = AudioSegment.from_wav(audio_path)
total_duration = len(audio)
nonsilent_ranges = detect_nonsilent(
audio,
min_silence_len=min_silence_len,
silence_thresh=silence_thresh
)
start_ms, original_end_ms = nonsilent_ranges[0]
end_ms = min(start_ms + duration, total_duration)
segment = audio[start_ms:end_ms]
temp_path = "temp_first_segment.wav"
segment.export(temp_path, format="wav")
return temp_path
def classify_audio(wav_path):
out_prob, score, index, label = classifier.classify_file(get_speech_segments(wav_path))
confidence = float(score[0]) * 100 # convert tensor to float
return label, confidence
def delete_file(path):
try:
os.remove(path)
except:
pass
# Streamlit UI
st.title("Accent Classifier for English Speakers")
with st.form("Input your video (it can be video link or upload)"):
video_url = st.text_input(
"Enter video URL (YouTube, Loom, or .mp4)"
)
uploaded_file = st.file_uploader(
"Or upload a video file (mp4, mov, or mkv)",
type=["mp4", "mov", "avi"]
)
if st.form_submit_button("Process"):
video_path = None
wav_path = None
try:
with st.spinner('Processing video... Please wait'):
if video_url:
if "youtube.com" in video_url or "youtu.be" in video_url:
wav_path = download_youtube_audio(video_url)
elif "loom.com" in video_url:
video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
download_loom_video(video_url, video_path)
wav_path = extract_audio(video_path)
elif video_url.endswith(".mp4"):
video_path = download_direct_video(video_url)
wav_path = extract_audio(video_path)
else:
st.error("URL Format unrecognized.")
elif uploaded_file is not None:
video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
with open(video_path, "wb") as f:
f.write(uploaded_file.read())
wav_path = extract_audio(video_path)
else:
st.error("Please upload a file or link")
if wav_path:
label, confidence = classify_audio(wav_path)
st.success(f"Video Accent: **{label}**")
st.info(f"Confidence Score: **{confidence:.2f}%**")
else:
st.error("Error processing video")
except Exception as e:
st.error(str(e))
finally:
delete_file(wav_path)
delete_file(video_path) |