Spaces:

jsbeaudry
/

oswald-large-v3-turbo-m1

Sleeping

App Files Files Community

oswald-large-v3-turbo-m1 / app.py

jsbeaudry

Update app.py

53e911a verified 4 months ago

raw

history blame

1.69 kB

	from transformers import pipeline
	import gradio as gr

	pipe = pipeline(model="jsbeaudry/creole-speech-to-text")

	def transcribe(audio):
	text = pipe(audio)["text"]
	return text


	iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(type="filepath"),
	outputs="text",
	title="Whisper medium Creole",
	description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.",
	)

	iface.launch()



	# from transformers import pipeline
	# import gradio as gr

	# import torch
	# from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	# from datasets import load_dataset


	# device = "cuda:0" if torch.cuda.is_available() else "cpu"
	# torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	# model_id = "jsbeaudry/creole-speech-to-text"

	# model = AutoModelForSpeechSeq2Seq.from_pretrained(
	# model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
	# )
	# model.to(device)

	# processor = AutoProcessor.from_pretrained(model_id)

	# pipe = pipeline(
	# "automatic-speech-recognition",
	# model=model,
	# tokenizer=processor.tokenizer,
	# feature_extractor=processor.feature_extractor,
	# torch_dtype=torch_dtype,
	# device=device,
	# )
	# def transcribe(audio):
	# # Use the 'whisper' pipeline defined in the previous cell
	# text = pipe(audio)["text"]
	# return text

	# iface = gr.Interface(
	# fn=transcribe,
	# inputs=gr.Audio(type="filepath"),
	# outputs="text",
	# title="Whisper medium Creole",
	# description="Realtime demo for Haitian Creole speech recognition using a fine-tuned medium small model.",
	# )

	# iface.launch()