JCrimson1 commited on
Commit
8c6d58c
·
verified ·
1 Parent(s): 288e7a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -1,25 +1,33 @@
1
- import gradio as gr
2
  import torch
3
  import librosa
4
  from speechbrain.inference.classifiers import EncoderClassifier
 
 
 
5
 
 
6
  classifier = EncoderClassifier.from_hparams(
7
  source="Jzuluaga/accent-id-commonaccent_ecapa",
8
  savedir="pretrained_models/accent-id-commonaccent_ecapa"
9
  )
10
 
11
- def classify_audio(audio_path):
12
- # librosa can load audio from video files too
13
- waveform, sr = librosa.load(audio_path, sr=16000, mono=True)
 
 
 
14
  waveform_tensor = torch.tensor(waveform).unsqueeze(0)
15
- _, score, _, label = classifier.classify_batch(waveform_tensor)
16
- return f"Predicted Accent: {label[0]}\nConfidence: {score.item():.4f}"
17
-
18
- iface = gr.Interface(
19
- fn=classify_audio,
20
- inputs=gr.Audio(type="filepath"), # <-- allows upload of audio or video files
21
- outputs="text",
22
- title="Accent Identifier"
23
- )
24
 
25
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  import librosa
3
  from speechbrain.inference.classifiers import EncoderClassifier
4
+ from pydub import AudioSegment
5
+ import gradio as gr
6
+ import os
7
 
8
+ # Load model only once
9
  classifier = EncoderClassifier.from_hparams(
10
  source="Jzuluaga/accent-id-commonaccent_ecapa",
11
  savedir="pretrained_models/accent-id-commonaccent_ecapa"
12
  )
13
 
14
+ def classify_accent(video):
15
+ # 'video' will already be a path to the uploaded file
16
+ audio = AudioSegment.from_file(video, format="mp4")
17
+ audio.export("output.wav", format="wav")
18
+
19
+ waveform, sr = librosa.load("output.wav", sr=16000, mono=True)
20
  waveform_tensor = torch.tensor(waveform).unsqueeze(0)
 
 
 
 
 
 
 
 
 
21
 
22
+ prediction = classifier.classify_batch(waveform_tensor)
23
+ _, score, _, text_lab = prediction
24
+
25
+ return f"Accent: {text_lab[0]} (Confidence: {score.item():.2f})"
26
+
27
+
28
+ iface = gr.Interface(fn=classify_accent,
29
+ inputs=gr.Video(),
30
+ outputs="text")
31
+
32
+ if __name__ == "__main__":
33
+ iface.launch()