fahadqazi commited on
Commit
5bd7940
·
1 Parent(s): 9ff2c00

added examples

Browse files
Files changed (1) hide show
  1. app.py +21 -1
app.py CHANGED
@@ -7,12 +7,16 @@ from moviepy import VideoFileClip
7
  from transformers import pipeline
8
  import torchaudio
9
  from speechbrain.pretrained.interfaces import foreign_class
 
10
 
11
  # Load Whisper model to confirm English
12
  whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
13
 
14
  classifier = foreign_class(source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
15
 
 
 
 
16
 
17
  ACCENT_LABELS = {
18
  "us": "American Accent",
@@ -69,10 +73,26 @@ def extract_audio(video_path):
69
  clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
70
  return audio_path
71
 
 
 
 
 
 
 
 
 
 
 
72
  def transcribe(audio_path):
73
  result = whisper_pipe(audio_path, return_language=True)
74
  print(result)
75
- return result['text'], result['chunks'][0]['language']
 
 
 
 
 
 
76
 
77
  def analyze_accent(url_or_file):
78
  try:
 
7
  from transformers import pipeline
8
  import torchaudio
9
  from speechbrain.pretrained.interfaces import foreign_class
10
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor
11
 
12
  # Load Whisper model to confirm English
13
  whisper_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-tiny", device="cpu")
14
 
15
  classifier = foreign_class(source="Jzuluaga/accent-id-commonaccent_xlsr-en-english", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
16
 
17
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
18
+ processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
19
+
20
 
21
  ACCENT_LABELS = {
22
  "us": "American Accent",
 
73
  clip.audio.write_audiofile(audio_path, codec='pcm_s16le')
74
  return audio_path
75
 
76
+
77
+ def detect_language(audio_path):
78
+ audio, sr = torchaudio.load(audio_path)
79
+ inputs = processor(audio[0], sampling_rate=sr, return_tensors="pt")
80
+ logits = model.forward(**inputs).logits
81
+ predicted_ids = torch.argmax(logits, dim=-1)
82
+ decoded = processor.tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)
83
+ return decoded[0] # crude approximation
84
+
85
+
86
  def transcribe(audio_path):
87
  result = whisper_pipe(audio_path, return_language=True)
88
  print(result)
89
+
90
+ lang = result['chunks'][0]['language']
91
+
92
+ if lang == None:
93
+ lang = detect_language(audio_path)
94
+
95
+ return result['text'], lang
96
 
97
  def analyze_accent(url_or_file):
98
  try: