camparchimedes commited on
Commit
ca866cd
·
verified ·
1 Parent(s): 89f3c24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -21,7 +21,7 @@ def convert_to_wav(audio_file):
21
  return wav_file
22
 
23
  import torch
24
- from transformers import pipeline # AutoProcessor, AutoModelForSpeechSeq2Seq
25
 
26
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
27
  torch_dtype = torch.float32
@@ -29,17 +29,22 @@ torch_dtype = torch.float32
29
  pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
30
 
31
  # @spaces.GPU(queue=True)
 
 
 
 
 
 
 
32
  def transcribe_audio(audio_file, forced_decoder_ids):
33
  if audio_file.endswith(".m4a"):
34
  audio_file = convert_to_wav(audio_file)
35
 
36
  start_time = time.time()
37
- forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
38
- # check if still the case...........??*********************************************
39
- # "You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, 50288], [2, 50360], [3, 50364]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe."
40
 
41
  with torch.no_grad():
42
- output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids”: forced_decoder_ids"})
43
 
44
  text = output["text"]
45
  end_time = time.time()
@@ -50,6 +55,7 @@ def transcribe_audio(audio_file, forced_decoder_ids):
50
 
51
  return text, result
52
 
 
53
  # [VERSION 3: full-on w/ 3 styles for summarization]
54
  import nltk
55
  from nltk.tokenize import word_tokenize, sent_tokenize
@@ -199,7 +205,7 @@ def save_to_pdf(text, summary):
199
 
200
  banner_html = """
201
  <div style="text-align: center;">
202
- <img src="https://huggingface.co/spaces/camparchimedes/text_app/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="100%" height="auto">
203
  </div>
204
  """
205
 
 
21
  return wav_file
22
 
23
  import torch
24
+ from transformers import pipeline, AutoProcessor # AutoModelForSpeechSeq2Seq
25
 
26
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
27
  torch_dtype = torch.float32
 
29
  pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
30
 
31
  # @spaces.GPU(queue=True)
32
+
33
+ # Initialize processor before using it in the function
34
+ processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large")
35
+
36
+ language = "no"
37
+ task = "transcribe"
38
+
39
  def transcribe_audio(audio_file, forced_decoder_ids):
40
  if audio_file.endswith(".m4a"):
41
  audio_file = convert_to_wav(audio_file)
42
 
43
  start_time = time.time()
44
+ forced_decoder_ids = processor.get_decoder_prompt_ids(language="no", task="transcribe")
 
 
45
 
46
  with torch.no_grad():
47
+ output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids": forced_decoder_ids})
48
 
49
  text = output["text"]
50
  end_time = time.time()
 
55
 
56
  return text, result
57
 
58
+
59
  # [VERSION 3: full-on w/ 3 styles for summarization]
60
  import nltk
61
  from nltk.tokenize import word_tokenize, sent_tokenize
 
205
 
206
  banner_html = """
207
  <div style="text-align: center;">
208
+ <img src="https://huggingface.co/spaces/camparchimedes/transcription_app/blob/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="100%" height="auto">
209
  </div>
210
  """
211