Spaces:

camparchimedes
/

nb

Build error

App Files Files

camparchimedes commited on Aug 11, 2024

Commit

ca866cd

verified ·

1 Parent(s): 89f3c24

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -6

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ def convert_to_wav(audio_file):
     return wav_file
 import torch
-from transformers import pipeline # AutoProcessor, AutoModelForSpeechSeq2Seq
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 torch_dtype = torch.float32
@@ -29,17 +29,22 @@ torch_dtype = torch.float32
 pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
 # @spaces.GPU(queue=True)
 def transcribe_audio(audio_file, forced_decoder_ids):
     if audio_file.endswith(".m4a"):
         audio_file = convert_to_wav(audio_file)
     start_time = time.time()
-    forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task=task)
-    # check if still the case...........??*********************************************
-    # "You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, 50288], [2, 50360], [3, 50364]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe."
     with torch.no_grad():
-        output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids”: forced_decoder_ids"})
     text = output["text"]
     end_time = time.time()
@@ -50,6 +55,7 @@ def transcribe_audio(audio_file, forced_decoder_ids):
     return text, result
 # [VERSION 3: full-on w/ 3 styles for summarization]
 import nltk
 from nltk.tokenize import word_tokenize, sent_tokenize
@@ -199,7 +205,7 @@ def save_to_pdf(text, summary):
 banner_html = """
 <div style="text-align: center;">
-    <img src="https://huggingface.co/spaces/camparchimedes/text_app/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="100%" height="auto">
 </div>
 """

     return wav_file
 import torch
+from transformers import pipeline, AutoProcessor # AutoModelForSpeechSeq2Seq
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 torch_dtype = torch.float32
 pipe = pipeline("automatic-speech-recognition", model="NbAiLabBeta/nb-whisper-large", device=device, torch_dtype=torch_dtype)
 # @spaces.GPU(queue=True)
+# Initialize processor before using it in the function
+processor = AutoProcessor.from_pretrained("NbAiLabBeta/nb-whisper-large")
+language = "no"
+task = "transcribe"
 def transcribe_audio(audio_file, forced_decoder_ids):
     if audio_file.endswith(".m4a"):
         audio_file = convert_to_wav(audio_file)
     start_time = time.time()
+    forced_decoder_ids = processor.get_decoder_prompt_ids(language="no", task="transcribe")
     with torch.no_grad():
+        output = pipe(audio_file, chunk_length_s=30, generate_kwargs={"forced_decoder_ids": forced_decoder_ids})
     text = output["text"]
     end_time = time.time()
     return text, result
 # [VERSION 3: full-on w/ 3 styles for summarization]
 import nltk
 from nltk.tokenize import word_tokenize, sent_tokenize
 banner_html = """
 <div style="text-align: center;">
+    <img src="https://huggingface.co/spaces/camparchimedes/transcription_app/blob/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="100%" height="auto">
 </div>
 """