Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
|
2 |
import time
|
3 |
import os
|
|
|
4 |
import warnings
|
5 |
warnings.filterwarnings("ignore")
|
6 |
from pydub import AudioSegment
|
@@ -20,6 +21,7 @@ torch_dtype = torch.float32
|
|
20 |
|
21 |
asr = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
|
22 |
|
|
|
23 |
def transcribe_audio(audio_file):
|
24 |
if audio_file.endswith(".m4a"):
|
25 |
audio_file = convert_to_wav(audio_file)
|
@@ -59,7 +61,6 @@ def transcribe(audio_file):
|
|
59 |
text = transcription
|
60 |
return text, result
|
61 |
|
62 |
-
|
63 |
def clean_text(text):
|
64 |
text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
|
65 |
text = re.sub(r'\<a href', ' ', str(text))
|
@@ -91,10 +92,11 @@ def preprocess_text(text):
|
|
91 |
|
92 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
93 |
|
94 |
-
summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", return_dict=True, torch_dtype=torch.float16)
|
95 |
summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
96 |
summarization_model.to(device)
|
97 |
|
|
|
98 |
def summarize_text(text):
|
99 |
preprocessed_text = preprocess_text(text)
|
100 |
if preprocessed_text is None:
|
|
|
1 |
|
2 |
import time
|
3 |
import os
|
4 |
+
import spaces
|
5 |
import warnings
|
6 |
warnings.filterwarnings("ignore")
|
7 |
from pydub import AudioSegment
|
|
|
21 |
|
22 |
asr = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
|
23 |
|
24 |
+
@spaces.GPU(queue=True)
|
25 |
def transcribe_audio(audio_file):
|
26 |
if audio_file.endswith(".m4a"):
|
27 |
audio_file = convert_to_wav(audio_file)
|
|
|
61 |
text = transcription
|
62 |
return text, result
|
63 |
|
|
|
64 |
def clean_text(text):
|
65 |
text = re.sub(r'https?:\/\/.*[\r\n]*', '', str(text), flags=re.MULTILINE)
|
66 |
text = re.sub(r'\<a href', ' ', str(text))
|
|
|
92 |
|
93 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
94 |
|
95 |
+
summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", device=device, return_dict=True, torch_dtype=torch.float16)
|
96 |
summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
97 |
summarization_model.to(device)
|
98 |
|
99 |
+
@spaces.GPU(queue=True)
|
100 |
def summarize_text(text):
|
101 |
preprocessed_text = preprocess_text(text)
|
102 |
if preprocessed_text is None:
|