camparchimedes commited on
Commit
caafdf0
·
verified ·
1 Parent(s): 53454a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -66,9 +66,14 @@ def convert_to_wav(audio_file):
66
 
67
 
68
  # @spaces.GPU(duration=300, queue=False)
 
 
 
 
 
 
69
  def transcribe_audio(audio_file, batch_size=4):
70
  start_time = time.time()
71
- # Convert .m4a to .wav
72
  if audio_file.endswith(".m4a"):
73
  audio_file = convert_to_wav(audio_file)
74
 
@@ -89,7 +94,9 @@ def transcribe_audio(audio_file, batch_size=4):
89
  num_beams=8,
90
  task="transcribe",
91
  attention_mask=attention_mask,
92
- language="no"
 
 
93
  )
94
  transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
95
 
@@ -101,6 +108,7 @@ def transcribe_audio(audio_file, batch_size=4):
101
 
102
  return transcription.strip(), result
103
 
 
104
  # Graph-based summarization|TextRank
105
  def summarize_text(text):
106
  sentences = sent_tokenize(text)
 
66
 
67
 
68
  # @spaces.GPU(duration=300, queue=False)
69
+ # Set distinct pad and eos tokens
70
+ if processor.tokenizer.pad_token_id is None:
71
+ processor.tokenizer.pad_token_id = processor.tokenizer.convert_tokens_to_ids("[PAD]")
72
+ if processor.tokenizer.eos_token_id is None:
73
+ processor.tokenizer.eos_token_id = processor.tokenizer.convert_tokens_to_ids("[EOS]")
74
+
75
  def transcribe_audio(audio_file, batch_size=4):
76
  start_time = time.time()
 
77
  if audio_file.endswith(".m4a"):
78
  audio_file = convert_to_wav(audio_file)
79
 
 
94
  num_beams=8,
95
  task="transcribe",
96
  attention_mask=attention_mask,
97
+ language="no",
98
+ pad_token_id=processor.tokenizer.pad_token_id,
99
+ eos_token_id=processor.tokenizer.eos_token_id
100
  )
101
  transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
102
 
 
108
 
109
  return transcription.strip(), result
110
 
111
+
112
  # Graph-based summarization|TextRank
113
  def summarize_text(text):
114
  sentences = sent_tokenize(text)