Spaces:
Sleeping
Sleeping
import os, requests, shutil | |
from pydub import AudioSegment | |
import whisper | |
from speechbrain.pretrained.interfaces import foreign_class | |
class AccentAnalyzerTool: | |
def __init__(self): | |
self.whisper_model = whisper.load_model("medium") | |
self.accent_model = foreign_class( | |
source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", | |
pymodule_file="custom_interface.py", | |
classname="CustomEncoderWav2vec2Classifier" | |
) | |
self.last_transcript = None | |
def log(self, msg): | |
print(f"[AccentAnalyzerTool] {msg}") | |
def analyze(self, url: str) -> str: | |
try: | |
self.log("Downloading video...") | |
tmp_dir = "tmp" | |
os.makedirs(tmp_dir, exist_ok=True) | |
video_path = os.path.join(tmp_dir, "video.mp4") | |
r = requests.get(url) | |
with open(video_path, "wb") as f: | |
f.write(r.content) | |
self.log("Extracting audio...") | |
audio_path = os.path.join(tmp_dir, "audio.wav") | |
AudioSegment.from_file(video_path).export(audio_path, format="wav") | |
self.log("Classifying accent...") | |
_, score, _, label = self.accent_model.classify_file(audio_path) | |
accent = label[0].upper() if label[0] == 'us' else label[0].capitalize() | |
confidence = round(float(score) * 100, 2) | |
self.log("Transcribing...") | |
transcript = self.whisper_model.transcribe(audio_path)["text"] | |
self.last_transcript = transcript | |
summary = ( | |
f"The speaker has a **{accent} English accent** " | |
f"with **{confidence}% confidence**.\n\n" | |
f"**Transcript of the audio:**\n\n *{transcript.strip(' ')}*" | |
) | |
shutil.rmtree(tmp_dir, ignore_errors=True) | |
return summary | |
except Exception as e: | |
return f"Error analyzing accent: {str(e)}" | |