Spaces:

jsbeaudry
/

oswald-large-v3-turbo-m1

Sleeping

File size: 1,688 Bytes

53e911a
 
bfbc50c
53e911a
bfbc50c
53e911a
 
 
bfbc50c
 
53e911a
 
 
 
 
 
 
bfbc50c
53e911a
bfbc50c
 
 
53e911a
 
c5ac114
53e911a
 
 
cffad2b
 
53e911a
 
cffad2b
53e911a
cffad2b
53e911a
 
 
 
cffad2b
53e911a
cffad2b
53e911a
 
 
 
 
 
 
 
 
 
 
 
c5ac114
53e911a
 
 
 
 
 
 
c5ac114
53e911a

from transformers import pipeline
import gradio as gr

pipe = pipeline(model="jsbeaudry/creole-speech-to-text")  

def transcribe(audio):
    text = pipe(audio)["text"]
    return text


iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"), 
    outputs="text",
    title="Whisper medium Creole",
    description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.",
)

iface.launch()



# from transformers import pipeline
# import gradio as gr

# import torch
# from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
# from datasets import load_dataset


# device = "cuda:0" if torch.cuda.is_available() else "cpu"
# torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

# model_id = "jsbeaudry/creole-speech-to-text"

# model = AutoModelForSpeechSeq2Seq.from_pretrained(
#     model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
# )
# model.to(device)

# processor = AutoProcessor.from_pretrained(model_id)

# pipe = pipeline(
#     "automatic-speech-recognition",
#     model=model,
#     tokenizer=processor.tokenizer,
#     feature_extractor=processor.feature_extractor,
#     torch_dtype=torch_dtype,
#     device=device,
# )
# def transcribe(audio):
#     # Use the 'whisper' pipeline defined in the previous cell
#     text = pipe(audio)["text"]
#     return text

# iface = gr.Interface(
#     fn=transcribe,
#     inputs=gr.Audio(type="filepath"),
#     outputs="text",
#     title="Whisper medium Creole",
#     description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.",
# )

# iface.launch()