Spaces:

camparchimedes
/

nb

Build error

camparchimedes commited on Aug 26, 2024

Commit

1bc35a1

verified ·

1 Parent(s): dd49603

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import spaces
 import gradio as gr
 from PIL import Image
-#from pydub import AudioSegment
 #from scipy.io import wavfile
 import os
@@ -61,20 +61,26 @@ SIDEBAR_INFO = f"""
 # ------------transcribe section------------
 @spaces.GPU()
-def convert_to_wav(filepath):
-    _, file_ending = os.path.splitext(f'{filepath}')
-    audio_file = filepath.replace(file_ending, ".wav")
-    os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
-    return audio_file
 pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'})
 @spaces.GPU()
-def transcribe_audio(audio, batch_size=16):
     with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio_file:
         # Copy the contents of the uploaded audio file to the temporary file
-        temp_audio_file.write(open(audio, "rb").read())
         temp_audio_file.flush()
         # Load the audio file using torchaudio
         waveform, sample_rate = torchaudio.load(temp_audio_file.name)

 import spaces
 import gradio as gr
 from PIL import Image
+from pydub import AudioSegment
 #from scipy.io import wavfile
 import os
 # ------------transcribe section------------
 @spaces.GPU()
+# ============ORIGINAL============[convert m4a audio to wav]
+def convert_to_wav(audio_file):
+    audio = AudioSegment.from_file(audio_file, format="m4a")
+    wav_file = "temp.wav"
+    audio.export(wav_file, format="wav")
+    return wav_file
+# ================================[------------------------]
 pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'})
 @spaces.GPU()
+def transcribe_audio(audio_file, batch_size=16):
+    if audio_file.endswith(".m4a"):
+        audio_file = convert_to_wav(audio_file)
     with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio_file:
         # Copy the contents of the uploaded audio file to the temporary file
+        temp_audio_file.write(open(audio_file, "rb").read())
         temp_audio_file.flush()
         # Load the audio file using torchaudio
         waveform, sample_rate = torchaudio.load(temp_audio_file.name)