Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
"""
|
2 |
-
Version:
|
3 |
|
4 |
Description: webapp, transkribering (norsk), NbAiLab/nb-whisper-large, oppsummering, pdf-download.
|
5 |
"""
|
@@ -22,7 +22,7 @@ import warnings
|
|
22 |
from pydub import AudioSegment
|
23 |
import torch
|
24 |
import torchaudio
|
25 |
-
from transformers import pipeline
|
26 |
from huggingface_hub import model_info
|
27 |
import spacy
|
28 |
import networkx as nx
|
@@ -46,35 +46,29 @@ def convert_to_wav(audio_file):
|
|
46 |
audio.export(wav_file, format="wav")
|
47 |
return wav_file
|
48 |
|
49 |
-
|
50 |
-
# D3efine model
|
51 |
-
MODEL_NAME = "NbAiLab/nb-whisper-large"
|
52 |
-
lang = "no"
|
53 |
-
|
54 |
# Initialize device for torch
|
55 |
device = 0 if torch.cuda.is_available() else "cpu"
|
|
|
56 |
|
57 |
-
#
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
chunk_length_s=30,
|
62 |
-
device=device,
|
63 |
-
)
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
# # Set eos_token_id and pad_token_id to different values
|
68 |
-
#pipe.model.config.eos_token_id = 0
|
69 |
-
#pipe.model.config.pad_token_id = 1
|
70 |
|
71 |
-
# Simple
|
72 |
-
pipe.model.config.pad_token_id = pipe.tokenizer.get_decoder_prompt_ids(language=lang)
|
73 |
|
|
|
|
|
|
|
74 |
|
75 |
-
#
|
76 |
-
|
|
|
|
|
|
|
|
|
77 |
|
|
|
|
|
78 |
|
79 |
# Transcribe audio
|
80 |
def transcribe_audio(audio_file):
|
@@ -83,7 +77,9 @@ def transcribe_audio(audio_file):
|
|
83 |
|
84 |
start_time = time.time()
|
85 |
|
86 |
-
|
|
|
|
|
87 |
|
88 |
output_time = time.time() - start_time
|
89 |
|
@@ -112,7 +108,6 @@ def transcribe_audio(audio_file):
|
|
112 |
|
113 |
return text, result
|
114 |
|
115 |
-
|
116 |
# Clean and preprocess text for summarization
|
117 |
def clean_text(text):
|
118 |
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
|
@@ -270,3 +265,6 @@ with iface:
|
|
270 |
pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output], outputs=[pdf_output])
|
271 |
|
272 |
iface.launch(share=True, debug=True)
|
|
|
|
|
|
|
|
1 |
"""
|
2 |
+
Version: 5th_pruned_optimized_transcription_app.py (alias HF_modded_nb-whisper_T4)
|
3 |
|
4 |
Description: webapp, transkribering (norsk), NbAiLab/nb-whisper-large, oppsummering, pdf-download.
|
5 |
"""
|
|
|
22 |
from pydub import AudioSegment
|
23 |
import torch
|
24 |
import torchaudio
|
25 |
+
from transformers import pipeline, WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor
|
26 |
from huggingface_hub import model_info
|
27 |
import spacy
|
28 |
import networkx as nx
|
|
|
46 |
audio.export(wav_file, format="wav")
|
47 |
return wav_file
|
48 |
|
|
|
|
|
|
|
|
|
|
|
49 |
# Initialize device for torch
|
50 |
device = 0 if torch.cuda.is_available() else "cpu"
|
51 |
+
torch_dtype = torch.float32
|
52 |
|
53 |
+
# Load tokenizer and model
|
54 |
+
tokenizer = WhisperTokenizer.from_pretrained("NbAiLab/nb-whisper-large")
|
55 |
+
model = WhisperForConditionalGeneration.from_pretrained("NbAiLab/nb-whisper-large")
|
56 |
+
processor = WhisperProcessor.from_pretrained("NbAiLab/nb-whisper-large")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
|
|
|
|
58 |
|
59 |
+
# Model script does not support JIT compilation
|
60 |
+
#model = model.to(device)
|
61 |
+
#model = torch.jit.script(model)
|
62 |
|
63 |
+
# Generation kwargs
|
64 |
+
generate_kwargs = {
|
65 |
+
"num_beams": 5,
|
66 |
+
"task": "transcribe",
|
67 |
+
"language": "no"
|
68 |
+
}
|
69 |
|
70 |
+
# Initialize pipeline
|
71 |
+
asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch_dtype)
|
72 |
|
73 |
# Transcribe audio
|
74 |
def transcribe_audio(audio_file):
|
|
|
77 |
|
78 |
start_time = time.time()
|
79 |
|
80 |
+
# ASR pipeline on audio
|
81 |
+
with torch.no_grad():
|
82 |
+
text = asr(audio_file, chunk_length_s=30, generate_kwargs=generate_kwargs)["text"]
|
83 |
|
84 |
output_time = time.time() - start_time
|
85 |
|
|
|
108 |
|
109 |
return text, result
|
110 |
|
|
|
111 |
# Clean and preprocess text for summarization
|
112 |
def clean_text(text):
|
113 |
text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
|
|
|
265 |
pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output], outputs=[pdf_output])
|
266 |
|
267 |
iface.launch(share=True, debug=True)
|
268 |
+
|
269 |
+
|
270 |
+
|