Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
import gradio as gr | |
pipe = pipeline(model="jsbeaudry/creole-speech-to-text") | |
def transcribe(audio): | |
text = pipe(audio)["text"] | |
return text | |
iface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(type="filepath"), | |
outputs="text", | |
title="Whisper medium Creole", | |
description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.", | |
) | |
iface.launch() | |
# from transformers import pipeline | |
# import gradio as gr | |
# import torch | |
# from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
# from datasets import load_dataset | |
# device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
# torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
# model_id = "jsbeaudry/creole-speech-to-text" | |
# model = AutoModelForSpeechSeq2Seq.from_pretrained( | |
# model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True | |
# ) | |
# model.to(device) | |
# processor = AutoProcessor.from_pretrained(model_id) | |
# pipe = pipeline( | |
# "automatic-speech-recognition", | |
# model=model, | |
# tokenizer=processor.tokenizer, | |
# feature_extractor=processor.feature_extractor, | |
# torch_dtype=torch_dtype, | |
# device=device, | |
# ) | |
# def transcribe(audio): | |
# # Use the 'whisper' pipeline defined in the previous cell | |
# text = pipe(audio)["text"] | |
# return text | |
# iface = gr.Interface( | |
# fn=transcribe, | |
# inputs=gr.Audio(type="filepath"), | |
# outputs="text", | |
# title="Whisper medium Creole", | |
# description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.", | |
# ) | |
# iface.launch() |