File size: 853 Bytes
a50c417 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# deep_model.py
import torch
import librosa
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
MODEL_ID = "ylacombe/accent-classifier"
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
model = AutoModelForAudioClassification.from_pretrained(MODEL_ID)
# لاحظ أن الترتيب يعتمد على ترتيب تصنيفات النموذج نفسه
label_map = {
4: "england",
14: "us"
}
def predict_accent(audio_path: str) -> str:
audio, sr = librosa.load(audio_path, sr=16000)
inputs = feature_extractor(audio, sampling_rate=16000, return_tensors="pt")
with torch.no_grad():
logits = model(**inputs).logits
predicted_id = torch.argmax(logits, dim=-1).item()
return label_map.get(predicted_id, f"Unknown (ID: {predicted_id})")
|