camparchimedes commited on
Commit
0634e0f
·
verified ·
1 Parent(s): 052955a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -9,7 +9,7 @@ from nltk.tokenize import sent_tokenize
9
  import gradio as gr
10
  import warnings
11
  import torch
12
- from transformers import pipeline, AutoProcessor, AutoModelForSpeechSeq2Seq, AutoTokenizer, AutoModelForSeq2SeqLM
13
  from pydub import AudioSegment
14
  import soundfile as sf
15
  import numpy as np
@@ -24,17 +24,20 @@ warnings.filterwarnings("ignore")
24
  HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
25
 
26
 
27
- model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
28
- processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
29
 
30
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
31
  model.to(device)
32
 
33
- asr = pipeline("automatic-speech-recognition", model=model, processor=processor, device=device, torch_dtype=torch.float32)
 
 
34
 
35
  def transcribe_audio(audio_file):
36
  with torch.no_grad():
37
- output = asr(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 5, "task": "transcribe", "language": "no"})
38
  return output["text"]
39
 
40
  # Gradio interface
@@ -48,7 +51,7 @@ iface = gr.Interface(
48
  live=False
49
  )
50
 
51
- # Load summarization models with authentication token
52
  summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
53
  summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
54
 
@@ -83,7 +86,7 @@ def transcribe_audio(audio_file, batch_size=4):
83
  output = model.generate(
84
  inputs.input_features,
85
  max_length=2048,
86
- num_beams=7,
87
  task="transcribe",
88
  attention_mask=attention_mask,
89
  language="no"
 
9
  import gradio as gr
10
  import warnings
11
  import torch
12
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM #AutoProcessor, AutoModelForSpeechSeq2Seq
13
  from pydub import AudioSegment
14
  import soundfile as sf
15
  import numpy as np
 
24
  HF_AUTH_TOKEN = os.getenv('HF_AUTH_TOKEN')
25
 
26
 
27
+ # model = AutoModelForSpeechSeq2Seq.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
28
+ # processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large-semantic")
29
 
30
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
31
+ torch_dtype = torch.float32
32
  model.to(device)
33
 
34
+ #asr = pipeline("automatic-speech-recognition", model=model, processor=processor.tokenizer, device=device, torch_dtype=torch.float32)
35
+ pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large-semantic")
36
+
37
 
38
  def transcribe_audio(audio_file):
39
  with torch.no_grad():
40
+ output = pipe(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 8, "task": "transcribe", "language": "no"})
41
  return output["text"]
42
 
43
  # Gradio interface
 
51
  live=False
52
  )
53
 
54
+ # summarization model
55
  summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
56
  summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
57
 
 
86
  output = model.generate(
87
  inputs.input_features,
88
  max_length=2048,
89
+ num_beams=8,
90
  task="transcribe",
91
  attention_mask=attention_mask,
92
  language="no"