camparchimedes commited on
Commit
ca78d98
·
verified ·
1 Parent(s): 61d43b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -11
app.py CHANGED
@@ -22,7 +22,7 @@ def convert_to_wav(audio_file):
22
  return wav_file
23
 
24
  import torch
25
- from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
26
 
27
 
28
  # Initialize processor and pipeline
@@ -30,25 +30,33 @@ processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large")
30
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
31
  torch_dtype = torch.float32
32
 
 
 
 
 
 
 
33
  pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", torch_dtype=torch_dtype)
34
 
35
- language = "no"
36
  task = "transcribe"
37
 
38
- # @spaces.GPU(queue=True)
39
  def transcribe_audio(audio_file):
40
  if audio_file.endswith(".m4a"):
41
  audio_file = convert_to_wav(audio_file)
42
 
43
  start_time = time.time()
44
 
45
- # forced_decoder_ids in the correct context
46
- forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
47
-
48
  with torch.no_grad():
49
- # CUDA within the function
50
- # with torch.cuda.device(device) if torch.cuda.is_available() else contextlib.nullcontext():
51
- output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids": forced_decoder_ids})
 
 
 
 
 
 
52
 
53
  text = output["text"]
54
  end_time = time.time()
@@ -182,12 +190,13 @@ def text_rank_summary(text, num_paragraphs=3):
182
 
183
  summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
184
  return ' '.join(summary)
185
-
186
  banner_html = """
187
  <div style="text-align: center;">
188
- <img src="https://github.com/camparchimedes/sw-llm/blob/main/annex/cooltext462376124862020.png" alt="" width="100%" height="auto">
189
  </div>
190
  """
 
191
 
192
 
193
  import gradio as gr
 
22
  return wav_file
23
 
24
  import torch
25
+ from transformers import AutoProcessor, pipeline
26
 
27
 
28
  # Initialize processor and pipeline
 
30
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
31
  torch_dtype = torch.float32
32
 
33
+ # Set distinct pad and eos tokens
34
+ if processor.tokenizer.pad_token_id is None:
35
+ processor.tokenizer.pad_token_id = processor.tokenizer.convert_tokens_to_ids("[PAD]")
36
+ if processor.tokenizer.eos_token_id is None:
37
+ processor.tokenizer.eos_token_id = processor.tokenizer.convert_tokens_to_ids("[EOS]")
38
+
39
  pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", torch_dtype=torch_dtype)
40
 
41
+ #language = "no"
42
  task = "transcribe"
43
 
 
44
  def transcribe_audio(audio_file):
45
  if audio_file.endswith(".m4a"):
46
  audio_file = convert_to_wav(audio_file)
47
 
48
  start_time = time.time()
49
 
 
 
 
50
  with torch.no_grad():
51
+ output = pipe(
52
+ audio_file,
53
+ chunk_length_s=30,
54
+ generate_kwargs={
55
+ "task": task,
56
+ "pad_token_id": processor.tokenizer.pad_token_id,
57
+ "eos_token_id": processor.tokenizer.eos_token_id
58
+ }
59
+ )
60
 
61
  text = output["text"]
62
  end_time = time.time()
 
190
 
191
  summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
192
  return ' '.join(summary)
193
+
194
  banner_html = """
195
  <div style="text-align: center;">
196
+ <img src="https://raw.githubusercontent.com/camparchimedes/sw-llm/main/annex/cooltext462376124862020.png" alt="" width="100%" height="auto">
197
  </div>
198
  """
199
+ # https://raw.huggingface.co/spaces/camparchimedes/transcription_app/blob/main/banner_trans.png
200
 
201
 
202
  import gradio as gr