Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
import gradio as gr | |
from unsloth import FastModel | |
from transformers import WhisperForConditionalGeneration | |
import torch | |
model, tokenizer = FastModel.from_pretrained( | |
model_name = "jsbeaudry/creole-speech-to-text", | |
dtype = None, # Leave as None for auto detection | |
load_in_4bit = False, # Set to True to do 4bit quantization which reduces memory | |
auto_model = WhisperForConditionalGeneration, | |
whisper_language = "Haitian", | |
whisper_task = "transcribe", | |
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf | |
) | |
# Reuse the previously created pipeline object | |
# pipe = pipeline(model) # This line caused the error | |
# Initialize the pipeline correctly | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model, | |
tokenizer=tokenizer.tokenizer, | |
feature_extractor=tokenizer.feature_extractor, | |
processor=tokenizer, | |
return_language=True, | |
torch_dtype=torch.float16 | |
) | |
def transcribe(audio): | |
# Use the 'pipe' pipeline | |
text = pipe(audio)["text"] | |
return text | |
iface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(type="filepath"), | |
outputs="text", | |
title="Whisper medium Creole", | |
description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.", | |
) | |
iface.launch() |