Spaces:

ash-171
/

accent-detection

Sleeping

accent-detection / src /tools /accent_tool.py

Upload 5 files

5a8c370 verified 12 days ago

1.96 kB

	import os, requests, shutil
	from pydub import AudioSegment
	import whisper
	from speechbrain.pretrained.interfaces import foreign_class

	class AccentAnalyzerTool:
	def __init__(self):
	self.whisper_model = whisper.load_model("medium")
	self.accent_model = foreign_class(
	source="Jzuluaga/accent-id-commonaccent_xlsr-en-english",
	pymodule_file="custom_interface.py",
	classname="CustomEncoderWav2vec2Classifier"
	)
	self.last_transcript = None

	def log(self, msg):
	print(f"[AccentAnalyzerTool] {msg}")

	def analyze(self, url: str) -> str:
	try:
	self.log("Downloading video...")
	tmp_dir = "tmp"
	os.makedirs(tmp_dir, exist_ok=True)
	video_path = os.path.join(tmp_dir, "video.mp4")
	r = requests.get(url)
	with open(video_path, "wb") as f:
	f.write(r.content)

	self.log("Extracting audio...")
	audio_path = os.path.join(tmp_dir, "audio.wav")
	AudioSegment.from_file(video_path).export(audio_path, format="wav")

	self.log("Classifying accent...")
	_, score, _, label = self.accent_model.classify_file(audio_path)
	accent = label[0].upper() if label[0] == 'us' else label[0].capitalize()
	confidence = round(float(score) * 100, 2)

	self.log("Transcribing...")
	transcript = self.whisper_model.transcribe(audio_path)["text"]
	self.last_transcript = transcript

	summary = (
	f"The speaker has a {accent} English accent "
	f"with {confidence}% confidence.\n\n"
	f"Transcript of the audio:\n\n {transcript.strip(' ')}"
	)

	shutil.rmtree(tmp_dir, ignore_errors=True)
	return summary

	except Exception as e:
	return f"Error analyzing accent: {str(e)}"