camparchimedes commited on
Commit
6834d8b
·
verified ·
1 Parent(s): 9dcee67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -70,21 +70,22 @@ def convert_to_wav(filepath):
70
  pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'})
71
 
72
  @spaces.GPU()
73
- def transcribe_audio(filepath, batch_size=16, sample_rate=16000):
74
 
75
- if not isinstance(filepath, str):
76
- raise ValueError("Expected a file path string, but received something else.")
77
-
78
- # Ensure the file exists before trying to load it
79
- if not Path(filepath).is_file():
80
- raise FileNotFoundError(f"The file {filepath} does not exist.")
81
-
82
- waveform, sample_rate = torchaudio.load("{filepath}")
 
83
 
84
  # --convert to mono
85
  if waveform.ndim > 1:
86
  waveform = waveform[0, :]
87
-
88
  waveform = waveform.numpy()
89
 
90
  start_time = time.time()
 
70
  pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large", chunk_length_s=30, generate_kwargs={'task': 'transcribe', 'language': 'no'})
71
 
72
  @spaces.GPU()
73
+ def transcribe_audio(filepath, batch_size=16):
74
 
75
+ with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio_file:
76
+ # Copy the contents of the uploaded audio file to the temporary file
77
+ temp_audio_file.write(open(audio, "rb").read())
78
+ temp_audio_file.flush()
79
+ # Load the audio file using torchaudio
80
+ waveform, sample_rate = torchaudio.load(temp_audio_file.name)
81
+ # Resample the audio to 16kHz
82
+ resampler = torchaudio.transforms.Resample(sample_rate, 16000)
83
+ waveform = resampler(waveform)
84
 
85
  # --convert to mono
86
  if waveform.ndim > 1:
87
  waveform = waveform[0, :]
88
+ # Convert PyTorch tensor NumPy ndarray
89
  waveform = waveform.numpy()
90
 
91
  start_time = time.time()