Spaces:

camparchimedes
/

nb

Build error

App Files Files

camparchimedes commited on Aug 19, 2024

Commit

b3d3679

verified ·

1 Parent(s): 7ec9f42

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -16

app.py CHANGED Viewed

@@ -38,30 +38,25 @@ def convert_to_wav(audio_file):
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load Whisper model and tokenizer
-whisper_pipeline = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", device=device)
-summarization_model = AutoModelForSeq2SeqLM.from_pretrained("t5-base", torch_dtype=torch.float16).to(device)
-summarization_tokenizer = AutoTokenizer.from_pretrained("t5-base")
-# Transcribe audio to text
 def transcribe_audio(audio_file):
     if audio_file.endswith(".m4a"):
         audio_file = convert_to_wav(audio_file)
     start_time = time.time()
-    # Prepare input and attention mask
-    inputs = whisper_pipeline.tokenizer(audio_file, return_tensors="pt", padding=True)
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    # Generate the transcription with attention_mask
-    output = whisper_pipeline.model.generate(
-        inputs['input_ids'],
-        attention_mask=inputs['attention_mask']
-    )
     # Decode the output
-    text = whisper_pipeline.tokenizer.decode(output[0], skip_special_tokens=True)
     output_time = time.time() - start_time
     result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
@@ -171,7 +166,7 @@ iface = gr.Blocks()
 with iface:
     gr.HTML("""
     <div style="text-align: center;">
-        <img src="https://huggingface.co/spaces/camparchimedes/transcription_app/blob/main/lol.webp" alt="" width="100%" height="auto">
     </div>
     """)
     gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")

 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load Whisper model and tokenizer
+whisper_processor = WhisperProcessor.from_pretrained("NbAiLab/nb-whisper-large")
+whisper_model = torch.hub.load('huggingface/pytorch-transformers', 'model', "NbAiLab/nb-whisper-large").to(device)
 def transcribe_audio(audio_file):
     if audio_file.endswith(".m4a"):
         audio_file = convert_to_wav(audio_file)
     start_time = time.time()
+    # Load the audio file and process it with Whisper's processor
+    audio, sample_rate = whisper_processor.audio_to_array(audio_file)
+    input_features = whisper_processor(audio, sampling_rate=sample_rate, return_tensors="pt").input_features.to(device)
+    # Generate the transcription
+    output = whisper_model.generate(input_features=input_features)
     # Decode the output
+    text = whisper_processor.batch_decode(output, skip_special_tokens=True)[0]
     output_time = time.time() - start_time
     result = f"Time taken: {output_time:.2f} seconds\nNumber of words: {len(text.split())}"
 with iface:
     gr.HTML("""
     <div style="text-align: center;">
+        <img src="https://huggingface.co/spaces/camparchimedes/transcription_app/raw/main/banner_trans.png" alt="" width="100%" height="auto">
     </div>
     """)
     gr.Markdown("# Vi har nå muligheten til å oversette lydfiler til norsk skrift.")