camparchimedes commited on
Commit
0ca8cef
·
verified ·
1 Parent(s): 9eaf04c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -8
app.py CHANGED
@@ -1,3 +1,10 @@
 
 
 
 
 
 
 
1
  import time
2
  import os
3
  import warnings
@@ -39,9 +46,20 @@ def convert_to_wav(audio_file):
39
  # Initialize device for torch
40
  device = "cuda" if torch.cuda.is_available() else "cpu"
41
 
42
- # Load Whisper model and processor directly using the transformers library
43
- whisper_processor = WhisperProcessor.from_pretrained("NbAiLab/nb-whisper-large")
44
- whisper_model = WhisperForConditionalGeneration.from_pretrained("NbAiLab/nb-whisper-large").to(device)
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def transcribe_audio(audio_file):
47
  if audio_file.endswith(".m4a"):
@@ -49,23 +67,26 @@ def transcribe_audio(audio_file):
49
 
50
  start_time = time.time()
51
 
 
 
52
  # Load the audio file using torchaudio
53
- waveform, sample_rate = torchaudio.load(audio_file)
54
-
55
  # Process the waveform with Whisper's processor
56
- input_features = whisper_processor(waveform, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)
57
 
58
  # Generate the transcription
59
- output = whisper_model.generate(input_features=input_features)
60
 
61
  # Decode the output
62
- text = whisper_processor.batch_decode(output, skip_special_tokens=True)[0]
63
 
64
  output_time = time.time() - start_time
65
  result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
66
 
67
  return text, result
68
 
 
69
  # Clean and preprocess text for summarization
70
  def clean_text(text):
71
  text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)
 
1
+ """
2
+
3
+ This application processes audio files, transcribes them using a pretrained model (Whisper), and provides multiple summarization options for the transcribed text. The application also includes a PDF generation feature and is built with Gradio for the user interface
4
+
5
+ Webapp, transkribering (norsk), NbAiLab/nb-whisper-large, oppsummering, pdf-download.
6
+ """
7
+
8
  import time
9
  import os
10
  import warnings
 
46
  # Initialize device for torch
47
  device = "cuda" if torch.cuda.is_available() else "cpu"
48
 
49
+ # Load smoking-big-pipe
50
+ MODEL_NAME = "NbAiLab/nb-whisper-large"
51
+ lang = "no"
52
+
53
+ device = 0 if torch.cuda.is_available() else "cpu"
54
+ pipe = pipeline(
55
+ task="automatic-speech-recognition",
56
+ model=MODEL_NAME,
57
+ chunk_length_s=30,
58
+ device=device,
59
+ )
60
+
61
+ pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
62
+
63
 
64
  def transcribe_audio(audio_file):
65
  if audio_file.endswith(".m4a"):
 
67
 
68
  start_time = time.time()
69
 
70
+ text = pipe(audio_file)["text"]
71
+
72
  # Load the audio file using torchaudio
73
+ #waveform, sample_rate = torchaudio.load(audio_file)
74
+
75
  # Process the waveform with Whisper's processor
76
+ #input_features = whisper_processor(waveform, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)
77
 
78
  # Generate the transcription
79
+ #output = whisper_model.generate(input_features=input_features)
80
 
81
  # Decode the output
82
+ #text = whisper_processor.batch_decode(output, skip_special_tokens=True)[0]
83
 
84
  output_time = time.time() - start_time
85
  result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
86
 
87
  return text, result
88
 
89
+
90
  # Clean and preprocess text for summarization
91
  def clean_text(text):
92
  text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)