File size: 1,339 Bytes
53e911a
 
aed667b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfbc50c
 
53e911a
aed667b
53e911a
 
bfbc50c
 
53e911a
 
aed667b
53e911a
 
 
 
bfbc50c
aed667b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from transformers import pipeline
import gradio as gr
from unsloth import FastModel
from transformers import WhisperForConditionalGeneration
import torch


model, tokenizer = FastModel.from_pretrained(
    model_name = "jsbeaudry/creole-speech-to-text",
    dtype = None, # Leave as None for auto detection
    load_in_4bit = False, # Set to True to do 4bit quantization which reduces memory
    auto_model = WhisperForConditionalGeneration,
    whisper_language = "Haitian",
    whisper_task = "transcribe",
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)
# Reuse the previously created pipeline object
# pipe = pipeline(model) # This line caused the error

# Initialize the pipeline correctly
pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=tokenizer.tokenizer,
    feature_extractor=tokenizer.feature_extractor,
    processor=tokenizer,
    return_language=True,
    torch_dtype=torch.float16
)


def transcribe(audio):
    # Use the 'pipe' pipeline
    text = pipe(audio)["text"]
    return text


iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath"),
    outputs="text",
    title="Whisper medium Creole",
    description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.",
)

iface.launch()