camparchimedes commited on
Commit
a337c3f
·
verified ·
1 Parent(s): f72e09a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -26
app.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Version: 4th_pruned_optimized_transcription_app.py
3
 
4
  Description: webapp, transkribering (norsk), NbAiLab/nb-whisper-large, oppsummering, pdf-download.
5
  """
@@ -22,7 +22,7 @@ import warnings
22
  from pydub import AudioSegment
23
  import torch
24
  import torchaudio
25
- from transformers import pipeline
26
  from huggingface_hub import model_info
27
  import spacy
28
  import networkx as nx
@@ -46,35 +46,29 @@ def convert_to_wav(audio_file):
46
  audio.export(wav_file, format="wav")
47
  return wav_file
48
 
49
-
50
- # D3efine model
51
- MODEL_NAME = "NbAiLab/nb-whisper-large"
52
- lang = "no"
53
-
54
  # Initialize device for torch
55
  device = 0 if torch.cuda.is_available() else "cpu"
 
56
 
57
- # Define pipeline config
58
- pipe = pipeline(
59
- task="automatic-speech-recognition",
60
- model=MODEL_NAME,
61
- chunk_length_s=30,
62
- device=device,
63
- )
64
-
65
-
66
-
67
- # # Set eos_token_id and pad_token_id to different values
68
- #pipe.model.config.eos_token_id = 0
69
- #pipe.model.config.pad_token_id = 1
70
 
71
- # Simple
72
- pipe.model.config.pad_token_id = pipe.tokenizer.get_decoder_prompt_ids(language=lang)
73
 
 
 
 
74
 
75
- #assert pipe.model.config.eos_token_id != pipe.model.config.pad_token_id
76
- #"eos_token_id and pad_token_id must be different"
 
 
 
 
77
 
 
 
78
 
79
  # Transcribe audio
80
  def transcribe_audio(audio_file):
@@ -83,7 +77,9 @@ def transcribe_audio(audio_file):
83
 
84
  start_time = time.time()
85
 
86
- text = pipe(audio_file)["text"]
 
 
87
 
88
  output_time = time.time() - start_time
89
 
@@ -112,7 +108,6 @@ def transcribe_audio(audio_file):
112
 
113
  return text, result
114
 
115
-
116
  # Clean and preprocess text for summarization
117
  def clean_text(text):
118
  text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
@@ -270,3 +265,6 @@ with iface:
270
  pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output], outputs=[pdf_output])
271
 
272
  iface.launch(share=True, debug=True)
 
 
 
 
1
  """
2
+ Version: 5th_pruned_optimized_transcription_app.py (alias HF_modded_nb-whisper_T4)
3
 
4
  Description: webapp, transkribering (norsk), NbAiLab/nb-whisper-large, oppsummering, pdf-download.
5
  """
 
22
  from pydub import AudioSegment
23
  import torch
24
  import torchaudio
25
+ from transformers import pipeline, WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor
26
  from huggingface_hub import model_info
27
  import spacy
28
  import networkx as nx
 
46
  audio.export(wav_file, format="wav")
47
  return wav_file
48
 
 
 
 
 
 
49
  # Initialize device for torch
50
  device = 0 if torch.cuda.is_available() else "cpu"
51
+ torch_dtype = torch.float32
52
 
53
+ # Load tokenizer and model
54
+ tokenizer = WhisperTokenizer.from_pretrained("NbAiLab/nb-whisper-large")
55
+ model = WhisperForConditionalGeneration.from_pretrained("NbAiLab/nb-whisper-large")
56
+ processor = WhisperProcessor.from_pretrained("NbAiLab/nb-whisper-large")
 
 
 
 
 
 
 
 
 
57
 
 
 
58
 
59
+ # Model script does not support JIT compilation
60
+ #model = model.to(device)
61
+ #model = torch.jit.script(model)
62
 
63
+ # Generation kwargs
64
+ generate_kwargs = {
65
+ "num_beams": 5,
66
+ "task": "transcribe",
67
+ "language": "no"
68
+ }
69
 
70
+ # Initialize pipeline
71
+ asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch_dtype)
72
 
73
  # Transcribe audio
74
  def transcribe_audio(audio_file):
 
77
 
78
  start_time = time.time()
79
 
80
+ # ASR pipeline on audio
81
+ with torch.no_grad():
82
+ text = asr(audio_file, chunk_length_s=30, generate_kwargs=generate_kwargs)["text"]
83
 
84
  output_time = time.time() - start_time
85
 
 
108
 
109
  return text, result
110
 
 
111
  # Clean and preprocess text for summarization
112
  def clean_text(text):
113
  text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
 
265
  pdf_both.click(fn=lambda text, summary: save_to_pdf(text, summary), inputs=[text_output, summary_output], outputs=[pdf_output])
266
 
267
  iface.launch(share=True, debug=True)
268
+
269
+
270
+