Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ from nltk.tokenize import sent_tokenize
|
|
9 |
import gradio as gr
|
10 |
import warnings
|
11 |
import torch
|
12 |
-
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq
|
13 |
from pydub import AudioSegment
|
14 |
import soundfile as sf
|
15 |
import numpy as np
|
@@ -24,17 +24,20 @@ warnings.filterwarnings("ignore")
|
|
24 |
HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
|
25 |
|
26 |
|
27 |
-
model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
|
28 |
-
processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
|
29 |
|
30 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
31 |
model.to(device)
|
32 |
|
33 |
-
asr = pipeline("automatic-speech-recognition", model=model, processor=processor, device=device, torch_dtype=torch.float32)
|
|
|
|
|
34 |
|
35 |
def transcribe_audio(audio_file):
|
36 |
with torch.no_grad():
|
37 |
-
output =
|
38 |
return output["text"]
|
39 |
|
40 |
# Gradio interface
|
@@ -48,7 +51,7 @@ iface = gr.Interface(
|
|
48 |
live=False
|
49 |
)
|
50 |
|
51 |
-
#
|
52 |
summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
53 |
summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
54 |
|
@@ -83,7 +86,7 @@ def transcribe_audio(audio_file, batch_size=4):
|
|
83 |
output = model.generate(
|
84 |
inputs.input_features,
|
85 |
max_length=2048,
|
86 |
-
num_beams=
|
87 |
task="transcribe",
|
88 |
attention_mask=attention_mask,
|
89 |
language="no"
|
|
|
9 |
import gradio as gr
|
10 |
import warnings
|
11 |
import torch
|
12 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM #AutoProcessor, AutoModelForSpeechSeq2Seq
|
13 |
from pydub import AudioSegment
|
14 |
import soundfile as sf
|
15 |
import numpy as np
|
|
|
24 |
HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
|
25 |
|
26 |
|
27 |
+
# model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
|
28 |
+
# processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
|
29 |
|
30 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
31 |
+
torch_dtype = torch.float32
|
32 |
model.to(device)
|
33 |
|
34 |
+
#asr = pipeline("automatic-speech-recognition", model=model, processor=processor.tokenizer, device=device, torch_dtype=torch.float32)
|
35 |
+
pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large-semantic")
|
36 |
+
|
37 |
|
38 |
def transcribe_audio(audio_file):
|
39 |
with torch.no_grad():
|
40 |
+
output = pipe(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 8, "task": "transcribe", "language": "no"})
|
41 |
return output["text"]
|
42 |
|
43 |
# Gradio interface
|
|
|
51 |
live=False
|
52 |
)
|
53 |
|
54 |
+
# summarization model
|
55 |
summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
56 |
summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
|
57 |
|
|
|
86 |
output = model.generate(
|
87 |
inputs.input_features,
|
88 |
max_length=2048,
|
89 |
+
num_beams=8,
|
90 |
task="transcribe",
|
91 |
attention_mask=attention_mask,
|
92 |
language="no"
|