Spaces:

thak123
/

Whisper-Konkani

Running

thak123 commited on Dec 21, 2024

Commit

f2b8075

verified ·

1 Parent(s): 6980dd0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,10 @@ import torch
 import torchaudio
 pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
-                task="automatic-speech-recognition", tokenizer= tokenizer)  # change to "your-username/the-name-you-picked"
 # pipe.model.config.forced_decoder_ids = (
 #         pipe.tokenizer.get_decoder_prompt_ids(
@@ -17,14 +20,14 @@ pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"t
 #     )
 def transcribe_speech(filepath):
-    waveform, sample_rate = torchaudio.load(filepath)
     # Resample the audio signal to 16k sampling rate
-    resampler = torchaudio.transforms.Resample(sample_rate, 16000)
-    waveform_16k = resampler(waveform)
     # Save the resampled audio signal to a new file
-    torchaudio.save(filepath, waveform_16k, 16000)
     output = pipe(
         filepath,
         max_new_tokens=256,

 import torchaudio
 pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
+                task="automatic-speech-recognition",
+                tokenizer= tokenizer,
+                sampling_rate=16000,
+               )  # change to "your-username/the-name-you-picked"
 # pipe.model.config.forced_decoder_ids = (
 #         pipe.tokenizer.get_decoder_prompt_ids(
 #     )
 def transcribe_speech(filepath):
+    # waveform, sample_rate = torchaudio.load(filepath)
     # Resample the audio signal to 16k sampling rate
+    # resampler = torchaudio.transforms.Resample(sample_rate, 16000)
+    # waveform_16k = resampler(waveform)
     # Save the resampled audio signal to a new file
+    # torchaudio.save(filepath, waveform_16k, 16000)
     output = pipe(
         filepath,
         max_new_tokens=256,