Spaces:

ash-171
/

accent-detection

Sleeping

File size: 1,961 Bytes

5a8c370

import os, requests, shutil
from pydub import AudioSegment
import whisper
from speechbrain.pretrained.interfaces import foreign_class

class AccentAnalyzerTool:
    def __init__(self):
        self.whisper_model = whisper.load_model("medium")
        self.accent_model = foreign_class(
            source="Jzuluaga/accent-id-commonaccent_xlsr-en-english",
            pymodule_file="custom_interface.py",
            classname="CustomEncoderWav2vec2Classifier"
        )
        self.last_transcript = None 

    def log(self, msg):
        print(f"[AccentAnalyzerTool] {msg}")

    def analyze(self, url: str) -> str:
        try:
            self.log("Downloading video...")
            tmp_dir = "tmp"
            os.makedirs(tmp_dir, exist_ok=True)
            video_path = os.path.join(tmp_dir, "video.mp4")
            r = requests.get(url)
            with open(video_path, "wb") as f:
                f.write(r.content)

            self.log("Extracting audio...")
            audio_path = os.path.join(tmp_dir, "audio.wav")
            AudioSegment.from_file(video_path).export(audio_path, format="wav")

            self.log("Classifying accent...")
            _, score, _, label = self.accent_model.classify_file(audio_path)
            accent = label[0].upper() if label[0] == 'us' else label[0].capitalize()
            confidence = round(float(score) * 100, 2)

            self.log("Transcribing...")
            transcript = self.whisper_model.transcribe(audio_path)["text"]
            self.last_transcript = transcript

            summary = (
                f"The speaker has a **{accent} English accent** "
                f"with **{confidence}% confidence**.\n\n"
                f"**Transcript of the audio:**\n\n *{transcript.strip(' ')}*"
            )
            
            shutil.rmtree(tmp_dir, ignore_errors=True)
            return summary

        except Exception as e:
            return f"Error analyzing accent: {str(e)}"