from transformers import pipeline import gradio as gr pipe = pipeline(model="jsbeaudry/creole-speech-to-text") def transcribe(audio): text = pipe(audio)["text"] return text iface = gr.Interface( fn=transcribe, inputs=gr.Audio(type="filepath"), outputs="text", title="Whisper medium Creole", description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.", ) iface.launch() # from transformers import pipeline # import gradio as gr # import torch # from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline # from datasets import load_dataset # device = "cuda:0" if torch.cuda.is_available() else "cpu" # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # model_id = "jsbeaudry/creole-speech-to-text" # model = AutoModelForSpeechSeq2Seq.from_pretrained( # model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True # ) # model.to(device) # processor = AutoProcessor.from_pretrained(model_id) # pipe = pipeline( # "automatic-speech-recognition", # model=model, # tokenizer=processor.tokenizer, # feature_extractor=processor.feature_extractor, # torch_dtype=torch_dtype, # device=device, # ) # def transcribe(audio): # # Use the 'whisper' pipeline defined in the previous cell # text = pipe(audio)["text"] # return text # iface = gr.Interface( # fn=transcribe, # inputs=gr.Audio(type="filepath"), # outputs="text", # title="Whisper medium Creole", # description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.", # ) # iface.launch()