Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -22,7 +22,7 @@ def convert_to_wav(audio_file):
|
|
22 |
return wav_file
|
23 |
|
24 |
import torch
|
25 |
-
from transformers import
|
26 |
|
27 |
|
28 |
# Initialize processor and pipeline
|
@@ -30,25 +30,33 @@ processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large")
|
|
30 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
31 |
torch_dtype = torch.float32
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", torch_dtype=torch_dtype)
|
34 |
|
35 |
-
language = "no"
|
36 |
task = "transcribe"
|
37 |
|
38 |
-
# @spaces.GPU(queue=True)
|
39 |
def transcribe_audio(audio_file):
|
40 |
if audio_file.endswith(".m4a"):
|
41 |
audio_file = convert_to_wav(audio_file)
|
42 |
|
43 |
start_time = time.time()
|
44 |
|
45 |
-
# forced_decoder_ids in the correct context
|
46 |
-
forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
|
47 |
-
|
48 |
with torch.no_grad():
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
text = output["text"]
|
54 |
end_time = time.time()
|
@@ -182,12 +190,13 @@ def text_rank_summary(text, num_paragraphs=3):
|
|
182 |
|
183 |
summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
|
184 |
return ' '.join(summary)
|
185 |
-
|
186 |
banner_html = """
|
187 |
<div style="text-align: center;">
|
188 |
-
<img src="https://
|
189 |
</div>
|
190 |
"""
|
|
|
191 |
|
192 |
|
193 |
import gradio as gr
|
|
|
22 |
return wav_file
|
23 |
|
24 |
import torch
|
25 |
+
from transformers import AutoProcessor, pipeline
|
26 |
|
27 |
|
28 |
# Initialize processor and pipeline
|
|
|
30 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
31 |
torch_dtype = torch.float32
|
32 |
|
33 |
+
# Set distinct pad and eos tokens
|
34 |
+
if processor.tokenizer.pad_token_id is None:
|
35 |
+
processor.tokenizer.pad_token_id = processor.tokenizer.convert_tokens_to_ids("[PAD]")
|
36 |
+
if processor.tokenizer.eos_token_id is None:
|
37 |
+
processor.tokenizer.eos_token_id = processor.tokenizer.convert_tokens_to_ids("[EOS]")
|
38 |
+
|
39 |
pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", torch_dtype=torch_dtype)
|
40 |
|
41 |
+
#language = "no"
|
42 |
task = "transcribe"
|
43 |
|
|
|
44 |
def transcribe_audio(audio_file):
|
45 |
if audio_file.endswith(".m4a"):
|
46 |
audio_file = convert_to_wav(audio_file)
|
47 |
|
48 |
start_time = time.time()
|
49 |
|
|
|
|
|
|
|
50 |
with torch.no_grad():
|
51 |
+
output = pipe(
|
52 |
+
audio_file,
|
53 |
+
chunk_length_s=30,
|
54 |
+
generate_kwargs={
|
55 |
+
"task": task,
|
56 |
+
"pad_token_id": processor.tokenizer.pad_token_id,
|
57 |
+
"eos_token_id": processor.tokenizer.eos_token_id
|
58 |
+
}
|
59 |
+
)
|
60 |
|
61 |
text = output["text"]
|
62 |
end_time = time.time()
|
|
|
190 |
|
191 |
summary = [ranked_sentences[i][1] for i in range(num_paragraphs)] # top sentences for summary
|
192 |
return ' '.join(summary)
|
193 |
+
|
194 |
banner_html = """
|
195 |
<div style="text-align: center;">
|
196 |
+
<img src="https://raw.githubusercontent.com/camparchimedes/sw-llm/main/annex/cooltext462376124862020.png" alt="" width="100%" height="auto">
|
197 |
</div>
|
198 |
"""
|
199 |
+
# https://raw.huggingface.co/spaces/camparchimedes/transcription_app/blob/main/banner_trans.png
|
200 |
|
201 |
|
202 |
import gradio as gr
|