Spaces:

jsbeaudry
/

oswald-large-v3-turbo-m1

Sleeping

File size: 1,339 Bytes

53e911a
 
aed667b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfbc50c
 
53e911a
aed667b
53e911a
 
bfbc50c
 
53e911a
 
aed667b
53e911a
 
 
 
bfbc50c
aed667b

from transformers import pipeline
import gradio as gr
from unsloth import FastModel
from transformers import WhisperForConditionalGeneration
import torch


model, tokenizer = FastModel.from_pretrained(
    model_name = "jsbeaudry/creole-speech-to-text",
    dtype = None, # Leave as None for auto detection
    load_in_4bit = False, # Set to True to do 4bit quantization which reduces memory
    auto_model = WhisperForConditionalGeneration,
    whisper_language = "Haitian",
    whisper_task = "transcribe",
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)
# Reuse the previously created pipeline object
# pipe = pipeline(model) # This line caused the error

# Initialize the pipeline correctly
pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=tokenizer.tokenizer,
    feature_extractor=tokenizer.feature_extractor,
    processor=tokenizer,
    return_language=True,
    torch_dtype=torch.float16
)


def transcribe(audio):
    # Use the 'pipe' pipeline
    text = pipe(audio)["text"]
    return text


iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"),
    outputs="text",
    title="Whisper medium Creole",
    description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.",
)

iface.launch()