camparchimedes commited on
Commit
1bc35a1
·
verified ·
1 Parent(s): dd49603

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -8
app.py CHANGED
@@ -16,7 +16,7 @@
16
  import spaces
17
  import gradio as gr
18
  from PIL import Image
19
- #from pydub import AudioSegment
20
  #from scipy.io import wavfile
21
 
22
  import os
@@ -61,20 +61,26 @@ SIDEBAR_INFO = f"""
61
  # ------------transcribe section------------
62
 
63
  @spaces.GPU()
64
- def convert_to_wav(filepath):
65
- _, file_ending = os.path.splitext(f'{filepath}')
66
- audio_file = filepath.replace(file_ending, ".wav")
67
- os.system(f'ffmpeg -i "{filepath}" -ar 16000 -ac 1 -c:a pcm_s16le "{audio_file}"')
68
- return audio_file
 
 
 
 
69
 
70
  pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'})
71
 
72
  @spaces.GPU()
73
- def transcribe_audio(audio, batch_size=16):
 
 
74
 
75
  with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio_file:
76
  # Copy the contents of the uploaded audio file to the temporary file
77
- temp_audio_file.write(open(audio, "rb").read())
78
  temp_audio_file.flush()
79
  # Load the audio file using torchaudio
80
  waveform, sample_rate = torchaudio.load(temp_audio_file.name)
 
16
  import spaces
17
  import gradio as gr
18
  from PIL import Image
19
+ from pydub import AudioSegment
20
  #from scipy.io import wavfile
21
 
22
  import os
 
61
  # ------------transcribe section------------
62
 
63
  @spaces.GPU()
64
+
65
+ # ============ORIGINAL============[convert m4a audio to wav]
66
+ def convert_to_wav(audio_file):
67
+ audio = AudioSegment.from_file(audio_file, format="m4a")
68
+ wav_file = "temp.wav"
69
+ audio.export(wav_file, format="wav")
70
+ return wav_file
71
+ # ================================[------------------------]
72
+
73
 
74
  pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'})
75
 
76
  @spaces.GPU()
77
+ def transcribe_audio(audio_file, batch_size=16):
78
+ if audio_file.endswith(".m4a"):
79
+ audio_file = convert_to_wav(audio_file)
80
 
81
  with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio_file:
82
  # Copy the contents of the uploaded audio file to the temporary file
83
+ temp_audio_file.write(open(audio_file, "rb").read())
84
  temp_audio_file.flush()
85
  # Load the audio file using torchaudio
86
  waveform, sample_rate = torchaudio.load(temp_audio_file.name)