Spaces:
Sleeping
Sleeping
File size: 3,962 Bytes
caaee3e 6980dd0 b357c71 caaee3e 6980dd0 b357c71 caaee3e f2b8075 18b526a f2b8075 9aedf57 caaee3e 8a1e498 caaee3e f2b8075 6980dd0 f2b8075 6980dd0 f2b8075 caaee3e f312b6f caaee3e 0c53f4a caaee3e 4e92421 caaee3e 0d95a29 caaee3e b357c71 caaee3e 263e119 caaee3e 8a1e498 caaee3e 8a1e498 caaee3e 8a1e498 caaee3e b357c71 caaee3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
from transformers import WhisperTokenizer
import os
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", task="transcribe") #, language="marathi", task="transcribe"
from transformers import pipeline
import gradio as gr
import torch
import torchaudio
pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
task="automatic-speech-recognition",
tokenizer= tokenizer,
) # change to "your-username/the-name-you-picked"
# pipe.model.config.forced_decoder_ids = (
# pipe.tokenizer.get_decoder_prompt_ids(
# language="marathi", task="transcribe"
# )
# )
def transcribe_speech(filepath):
# waveform, sample_rate = torchaudio.load(filepath)
# Resample the audio signal to 16k sampling rate
# resampler = torchaudio.transforms.Resample(sample_rate, 16000)
# waveform_16k = resampler(waveform)
# Save the resampled audio signal to a new file
# torchaudio.save(filepath, waveform_16k, 16000)
output = pipe(
filepath,
max_new_tokens=3,
generate_kwargs={
"task": "transcribe",
# "language": "konkani",
}, # update with the language you've fine-tuned on
chunk_length_s=30,
batch_size=8,
# sampling_rate=16000,
# padding=True
)
print(output)
return output["text"]
demo = gr.Blocks()
mic_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=gr.Audio(sources="microphone", type="filepath"),
outputs=gr.components.Textbox(),
)
file_transcribe = gr.Interface(
fn=transcribe_speech,
inputs=gr.Audio(sources="upload", type="filepath"),
outputs=gr.components.Textbox(),
examples=[
[os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
[os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
[os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
[os.path.join(os.path.dirname("."),"audio/panaji1920-9.mp3")],
],
)
with demo:
gr.TabbedInterface(
[mic_transcribe, file_transcribe],
["Transcribe Microphone", "Transcribe Audio File"],
)
demo.launch(debug=True)
# # def transcribe(audio):
# # # text = pipe(audio)["text"]
# # # pipe(audio)
# # text = pipe(audio)
# # print("op",text)
# # return text#pipe(audio) #text
# # iface = gr.Interface(
# # fn=transcribe,
# # inputs=[gr.Audio(sources=["microphone", "upload"])],
# # outputs="text",
# # examples=[
# # [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
# # [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
# # [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
# # ],
# # title="Whisper Konkani",
# # description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
# # )
# # iface.launch()
# from transformers import WhisperTokenizer, pipeline
# import gradio as gr
# import os
# tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="marathi", task="transcribe")
# pipe = pipeline(model="thak123/gom-stt-v3", task="automatic-speech-recognition", tokenizer=tokenizer)
# def transcribe(audio):
# result = pipe(audio)
# text = result[0]['text']
# print("op", text)
# return text
# iface = gr.Interface(
# fn=transcribe,
# inputs=[gr.Audio(sources=["microphone", "upload"])],
# outputs="text",
# examples=[
# [os.path.join(os.path.dirname("."), "audio/chalyaami.mp3")],
# [os.path.join(os.path.dirname("."), "audio/ekdonteen.flac")],
# [os.path.join(os.path.dirname("."), "audio/heyatachadjaale.mp3")],
# ],
# title="Whisper Konkani",
# description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
# )
# iface.launch() |