Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ from nltk.tokenize import sent_tokenize
|
|
9 |
import gradio as gr
|
10 |
import warnings
|
11 |
import torch
|
12 |
-
from transformers import pipeline,
|
13 |
from pydub import AudioSegment
|
14 |
import soundfile as sf
|
15 |
import numpy as np
|
@@ -24,15 +24,13 @@ warnings.filterwarnings("ignore")
|
|
24 |
HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
|
25 |
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
|
30 |
-
|
31 |
|
32 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
33 |
model.to(device)
|
34 |
|
35 |
-
asr = pipeline("automatic-speech-recognition", model=model,
|
36 |
|
37 |
def transcribe_audio(audio_file):
|
38 |
with torch.no_grad():
|
@@ -82,9 +80,9 @@ def transcribe_audio(audio_file, batch_size=4):
|
|
82 |
inputs = inputs.to(device)
|
83 |
attention_mask = inputs.attention_mask.to(device) if 'attention_mask' in inputs else None
|
84 |
with torch.no_grad():
|
85 |
-
output =
|
86 |
inputs.input_features,
|
87 |
-
max_length=2048,
|
88 |
num_beams=7,
|
89 |
task="transcribe",
|
90 |
attention_mask=attention_mask,
|
@@ -123,10 +121,7 @@ def summarize_text(text):
|
|
123 |
# HTML syntax for imagery
|
124 |
image_html = """
|
125 |
<div style="text-align: center;">
|
126 |
-
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/
|
127 |
-
</div>
|
128 |
-
<div style="text-align: center; margin-top: 20px;">
|
129 |
-
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.jpg" alt="Additional Image" width="68%" height="auto">
|
130 |
</div>
|
131 |
"""
|
132 |
|
@@ -135,11 +130,11 @@ iface = gr.Blocks()
|
|
135 |
|
136 |
with iface:
|
137 |
gr.HTML(image_html)
|
138 |
-
gr.Markdown("#
|
139 |
audio_input = gr.Audio(type="filepath")
|
140 |
batch_size_input = gr.Slider(minimum=1, maximum=16, step=1, label="Batch Size")
|
141 |
-
transcription_output = gr.Textbox()
|
142 |
-
summary_output = gr.Textbox()
|
143 |
transcribe_button = gr.Button("Transcribe and Summarize")
|
144 |
|
145 |
def transcribe_and_summarize(audio_file, batch_size):
|
|
|
9 |
import gradio as gr
|
10 |
import warnings
|
11 |
import torch
|
12 |
+
from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoModelForSeq2SeqLM
|
13 |
from pydub import AudioSegment
|
14 |
import soundfile as sf
|
15 |
import numpy as np
|
|
|
24 |
HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
|
25 |
|
26 |
|
27 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
|
28 |
+
processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
|
|
|
|
|
29 |
|
30 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
31 |
model.to(device)
|
32 |
|
33 |
+
asr = pipeline("automatic-speech-recognition", model=model, processor=processor, device=device, torch_dtype=torch.float32)
|
34 |
|
35 |
def transcribe_audio(audio_file):
|
36 |
with torch.no_grad():
|
|
|
80 |
inputs = inputs.to(device)
|
81 |
attention_mask = inputs.attention_mask.to(device) if 'attention_mask' in inputs else None
|
82 |
with torch.no_grad():
|
83 |
+
output = model.generate(
|
84 |
inputs.input_features,
|
85 |
+
max_length=2048,
|
86 |
num_beams=7,
|
87 |
task="transcribe",
|
88 |
attention_mask=attention_mask,
|
|
|
121 |
# HTML syntax for imagery
|
122 |
image_html = """
|
123 |
<div style="text-align: center;">
|
124 |
+
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.png" alt="Banner" width="87%" height="auto">
|
|
|
|
|
|
|
125 |
</div>
|
126 |
"""
|
127 |
|
|
|
130 |
|
131 |
with iface:
|
132 |
gr.HTML(image_html)
|
133 |
+
gr.Markdown("# Upload an audio file to get the transcription")
|
134 |
audio_input = gr.Audio(type="filepath")
|
135 |
batch_size_input = gr.Slider(minimum=1, maximum=16, step=1, label="Batch Size")
|
136 |
+
transcription_output = gr.Textbox("Transcription | nb-whisper-large-semantic")
|
137 |
+
summary_output = gr.Textbox("Summary | TextRank, graph-based")
|
138 |
transcribe_button = gr.Button("Transcribe and Summarize")
|
139 |
|
140 |
def transcribe_and_summarize(audio_file, batch_size):
|