camparchimedes commited on
Commit
052955a
·
verified ·
1 Parent(s): 89c78e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -15
app.py CHANGED
@@ -9,7 +9,7 @@ from nltk.tokenize import sent_tokenize
9
  import gradio as gr
10
  import warnings
11
  import torch
12
- from transformers import pipeline, WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor, AutoTokenizer, AutoModelForSeq2SeqLM
13
  from pydub import AudioSegment
14
  import soundfile as sf
15
  import numpy as np
@@ -24,15 +24,13 @@ warnings.filterwarnings("ignore")
24
  HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
25
 
26
 
27
- tokenizer = WhisperTokenizer.from_pretrained("NbAiLabBeta/nb-whisper-medium")
28
- model = WhisperForConditionalGeneration.from_pretrained("NbAiLabBeta/nb-whisper-medium")
29
- processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
30
-
31
 
32
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
33
  model.to(device)
34
 
35
- asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch.float32)
36
 
37
  def transcribe_audio(audio_file):
38
  with torch.no_grad():
@@ -82,9 +80,9 @@ def transcribe_audio(audio_file, batch_size=4):
82
  inputs = inputs.to(device)
83
  attention_mask = inputs.attention_mask.to(device) if 'attention_mask' in inputs else None
84
  with torch.no_grad():
85
- output = transcription_model.generate(
86
  inputs.input_features,
87
- max_length=2048, # Increase max_length for longer outputs
88
  num_beams=7,
89
  task="transcribe",
90
  attention_mask=attention_mask,
@@ -123,10 +121,7 @@ def summarize_text(text):
123
  # HTML syntax for imagery
124
  image_html = """
125
  <div style="text-align: center;">
126
- <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%" height="auto">
127
- </div>
128
- <div style="text-align: center; margin-top: 20px;">
129
- <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.jpg" alt="Additional Image" width="68%" height="auto">
130
  </div>
131
  """
132
 
@@ -135,11 +130,11 @@ iface = gr.Blocks()
135
 
136
  with iface:
137
  gr.HTML(image_html)
138
- gr.Markdown("# Switch Work Audio Transcription App\nUpload an audio file to get the transcription")
139
  audio_input = gr.Audio(type="filepath")
140
  batch_size_input = gr.Slider(minimum=1, maximum=16, step=1, label="Batch Size")
141
- transcription_output = gr.Textbox()
142
- summary_output = gr.Textbox()
143
  transcribe_button = gr.Button("Transcribe and Summarize")
144
 
145
  def transcribe_and_summarize(audio_file, batch_size):
 
9
  import gradio as gr
10
  import warnings
11
  import torch
12
+ from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoModelForSeq2SeqLM
13
  from pydub import AudioSegment
14
  import soundfile as sf
15
  import numpy as np
 
24
  HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
25
 
26
 
27
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
28
+ processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
 
 
29
 
30
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
31
  model.to(device)
32
 
33
+ asr = pipeline("automatic-speech-recognition", model=model, processor=processor, device=device, torch_dtype=torch.float32)
34
 
35
  def transcribe_audio(audio_file):
36
  with torch.no_grad():
 
80
  inputs = inputs.to(device)
81
  attention_mask = inputs.attention_mask.to(device) if 'attention_mask' in inputs else None
82
  with torch.no_grad():
83
+ output = model.generate(
84
  inputs.input_features,
85
+ max_length=2048,
86
  num_beams=7,
87
  task="transcribe",
88
  attention_mask=attention_mask,
 
121
  # HTML syntax for imagery
122
  image_html = """
123
  <div style="text-align: center;">
124
+ <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/picture.png" alt="Banner" width="87%" height="auto">
 
 
 
125
  </div>
126
  """
127
 
 
130
 
131
  with iface:
132
  gr.HTML(image_html)
133
+ gr.Markdown("# Upload an audio file to get the transcription")
134
  audio_input = gr.Audio(type="filepath")
135
  batch_size_input = gr.Slider(minimum=1, maximum=16, step=1, label="Batch Size")
136
+ transcription_output = gr.Textbox("Transcription | nb-whisper-large-semantic")
137
+ summary_output = gr.Textbox("Summary | TextRank, graph-based")
138
  transcribe_button = gr.Button("Transcribe and Summarize")
139
 
140
  def transcribe_and_summarize(audio_file, batch_size):