Spaces:

camparchimedes
/

nb

Build error

App Files Files

camparchimedes commited on Aug 19, 2024

Commit

0ca8cef

verified ·

1 Parent(s): 9eaf04c

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -8

app.py CHANGED Viewed

@@ -1,3 +1,10 @@
 import time
 import os
 import warnings
@@ -39,9 +46,20 @@ def convert_to_wav(audio_file):
 # Initialize device for torch
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load Whisper model and processor directly using the transformers library
-whisper_processor = WhisperProcessor.from_pretrained("NbAiLab/nb-whisper-large")
-whisper_model = WhisperForConditionalGeneration.from_pretrained("NbAiLab/nb-whisper-large").to(device)
 def transcribe_audio(audio_file):
     if audio_file.endswith(".m4a"):
@@ -49,23 +67,26 @@ def transcribe_audio(audio_file):
     start_time = time.time()
     # Load the audio file using torchaudio
-    waveform, sample_rate = torchaudio.load(audio_file)
     # Process the waveform with Whisper's processor
-    input_features = whisper_processor(waveform, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)
     # Generate the transcription
-    output = whisper_model.generate(input_features=input_features)
     # Decode the output
-    text = whisper_processor.batch_decode(output, skip_special_tokens=True)[0]
     output_time = time.time() - start_time
     result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
     return text, result
 # Clean and preprocess text for summarization
 def clean_text(text):
     text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)

+"""
+This application processes audio files, transcribes them using a pretrained model (Whisper), and provides multiple summarization options for the transcribed text. The application also includes a PDF generation feature and is built with Gradio for the user interface
+Webapp, transkribering (norsk), NbAiLab/nb-whisper-large, oppsummering, pdf-download.
+"""
 import time
 import os
 import warnings
 # Initialize device for torch
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load smoking-big-pipe
+MODEL_NAME = "NbAiLab/nb-whisper-large"
+lang = "no"
+device = 0 if torch.cuda.is_available() else "cpu"
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model=MODEL_NAME,
+    chunk_length_s=30,
+    device=device,
+)
+pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
 def transcribe_audio(audio_file):
     if audio_file.endswith(".m4a"):
     start_time = time.time()
+    text = pipe(audio_file)["text"]
     # Load the audio file using torchaudio
+    #waveform, sample_rate = torchaudio.load(audio_file)
     # Process the waveform with Whisper's processor
+    #input_features = whisper_processor(waveform, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)
     # Generate the transcription
+    #output = whisper_model.generate(input_features=input_features)
     # Decode the output
+    #text = whisper_processor.batch_decode(output, skip_special_tokens=True)[0]
     output_time = time.time() - start_time
     result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
     return text, result
 # Clean and preprocess text for summarization
 def clean_text(text):
     text = re.sub(r'https?:\/\/.*[\r\n]*', '', text)