from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor import torch import librosa import numpy as np import os # Load Facebook MMS Language Identification Model MODEL_ID = "facebook/mms-lid-1024" processor = AutoFeatureExtractor.from_pretrained(MODEL_ID) model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID) # Constants LID_SAMPLING_RATE = 16_000 LID_THRESHOLD = 0.33 # Confidence threshold LID_LANGUAGES = {} # Load Language Labels LANG_FILE = "data/lid/all_langs.tsv" if not os.path.exists(LANG_FILE): raise FileNotFoundError(f"Language file '{LANG_FILE}' not found!") with open(LANG_FILE, encoding="utf-8") as f: for line in f: iso, name = line.strip().split(" ", 1) LID_LANGUAGES[iso] = name # Identify Audio Language def identify(audio_data=None): if not audio_data: return "<>" # Microphone Input if isinstance(audio_data, tuple): sr, audio_samples = audio_data audio_samples = (audio_samples / 32768.0).astype(np.float32) if sr != LID_SAMPLING_RATE: audio_samples = librosa.resample(audio_samples, orig_sr=sr, target_sr=LID_SAMPLING_RATE) # File Upload elif isinstance(audio_data, str): if not os.path.exists(audio_data): return f"<>" audio_samples, _ = librosa.load(audio_data, sr=LID_SAMPLING_RATE, mono=True) else: return "<>" # Process Input inputs = processor(audio_samples, sampling_rate=LID_SAMPLING_RATE, return_tensors="pt") # Select Device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) inputs = inputs.to(device) # Predict Language with torch.no_grad(): logit = model(**inputs).logits # Compute Probabilities logit_lsm = torch.log_softmax(logit.squeeze(), dim=-1) scores, indices = torch.topk(logit_lsm, 5, dim=-1) scores, indices = torch.exp(scores).cpu().tolist(), indices.cpu().tolist() # Map to Language Labels iso2score = {model.config.id2label[int(i)]: s for s, i in zip(scores, indices)} # Confidence Check if max(iso2score.values()) < LID_THRESHOLD: return "Low confidence in language detection. No output shown." return {LID_LANGUAGES.get(iso, iso): score for iso, score in iso2score.items()} # Example Usage LID_EXAMPLES = [ ["upload/english.mp3"], ["upload/tamil.mp3"], ["upload/burmese.mp3"], ]