Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -66,9 +66,14 @@ def convert_to_wav(audio_file):
|
|
66 |
|
67 |
|
68 |
# @spaces.GPU(duration=300, queue=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def transcribe_audio(audio_file, batch_size=4):
|
70 |
start_time = time.time()
|
71 |
-
# Convert .m4a to .wav
|
72 |
if audio_file.endswith(".m4a"):
|
73 |
audio_file = convert_to_wav(audio_file)
|
74 |
|
@@ -89,7 +94,9 @@ def transcribe_audio(audio_file, batch_size=4):
|
|
89 |
num_beams=8,
|
90 |
task="transcribe",
|
91 |
attention_mask=attention_mask,
|
92 |
-
language="no"
|
|
|
|
|
93 |
)
|
94 |
transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
|
95 |
|
@@ -101,6 +108,7 @@ def transcribe_audio(audio_file, batch_size=4):
|
|
101 |
|
102 |
return transcription.strip(), result
|
103 |
|
|
|
104 |
# Graph-based summarization|TextRank
|
105 |
def summarize_text(text):
|
106 |
sentences = sent_tokenize(text)
|
|
|
66 |
|
67 |
|
68 |
# @spaces.GPU(duration=300, queue=False)
|
69 |
+
# Set distinct pad and eos tokens
|
70 |
+
if processor.tokenizer.pad_token_id is None:
|
71 |
+
processor.tokenizer.pad_token_id = processor.tokenizer.convert_tokens_to_ids("[PAD]")
|
72 |
+
if processor.tokenizer.eos_token_id is None:
|
73 |
+
processor.tokenizer.eos_token_id = processor.tokenizer.convert_tokens_to_ids("[EOS]")
|
74 |
+
|
75 |
def transcribe_audio(audio_file, batch_size=4):
|
76 |
start_time = time.time()
|
|
|
77 |
if audio_file.endswith(".m4a"):
|
78 |
audio_file = convert_to_wav(audio_file)
|
79 |
|
|
|
94 |
num_beams=8,
|
95 |
task="transcribe",
|
96 |
attention_mask=attention_mask,
|
97 |
+
language="no",
|
98 |
+
pad_token_id=processor.tokenizer.pad_token_id,
|
99 |
+
eos_token_id=processor.tokenizer.eos_token_id
|
100 |
)
|
101 |
transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
|
102 |
|
|
|
108 |
|
109 |
return transcription.strip(), result
|
110 |
|
111 |
+
|
112 |
# Graph-based summarization|TextRank
|
113 |
def summarize_text(text):
|
114 |
sentences = sent_tokenize(text)
|