Futuresony commited on
Commit
4468a8d
·
verified ·
1 Parent(s): d44c023

Delete lid.py(auto/audio)

Browse files
Files changed (1) hide show
  1. lid.py(auto/audio) +0 -69
lid.py(auto/audio) DELETED
@@ -1,69 +0,0 @@
1
- from transformers import Wav2Vec2ForSequenceClassification, AutoFeatureExtractor
2
- import torch
3
- import librosa
4
- import numpy as np
5
-
6
- model_id = "facebook/mms-lid-1024"
7
-
8
- processor = AutoFeatureExtractor.from_pretrained(model_id)
9
- model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
10
-
11
- LID_SAMPLING_RATE = 16_000
12
- LID_TOPK = 10
13
- LID_THRESHOLD = 0.33
14
-
15
- LID_LANGUAGES = {}
16
- with open(f"data/lid/all_langs.tsv") as f:
17
- for line in f:
18
- iso, name = line.split(" ", 1)
19
- LID_LANGUAGES[iso] = name
20
-
21
- def detect_language(audio_data=None):
22
- if not audio_data:
23
- return "<<ERROR: Empty Audio Input>>"
24
-
25
- if isinstance(audio_data, tuple):
26
- # microphone
27
- sr, audio_samples = audio_data
28
- audio_samples = (audio_samples / 32768.0).astype(np.float32)
29
- if sr != LID_SAMPLING_RATE:
30
- audio_samples = librosa.resample(
31
- audio_samples, orig_sr=sr, target_sr=LID_SAMPLING_RATE
32
- )
33
- else:
34
- # file upload
35
- isinstance(audio_data, str)
36
- audio_samples = librosa.load(audio_data, sr=LID_SAMPLING_RATE, mono=True)[0]
37
-
38
- inputs = processor(
39
- audio_samples, sampling_rate=LID_SAMPLING_RATE, return_tensors="pt"
40
- )
41
-
42
- # set device
43
- if torch.cuda.is_available():
44
- device = torch.device("cuda")
45
- elif (
46
- hasattr(torch.backends, "mps")
47
- and torch.backends.mps.is_available()
48
- and torch.backends.mps.is_built()
49
- ):
50
- device = torch.device("mps")
51
- else:
52
- device = torch.device("cpu")
53
-
54
- model.to(device)
55
- inputs = inputs.to(device)
56
-
57
- with torch.no_grad():
58
- logit = model(**inputs).logits
59
-
60
- logit_lsm = torch.log_softmax(logit.squeeze(), dim=-1)
61
- scores, indices = torch.topk(logit_lsm, 5, dim=-1)
62
- scores, indices = torch.exp(scores).to("cpu").tolist(), indices.to("cpu").tolist()
63
- iso2score = {model.config.id2label[int(i)]: s for s, i in zip(scores, indices)}
64
-
65
- if max(iso2score.values()) < LID_THRESHOLD:
66
- return "Low confidence in the language identification predictions. Output is not shown!"
67
-
68
- return {LID_LANGUAGES[iso]: score for iso, score in iso2score.items()}
69
-