Spaces:

micknikolic
/

speech-to-text

Runtime error

App Files Files Community

speech-to-text / app.py

micknikolic

Update app.py

d5f86c3 about 2 years ago

raw

history blame

2.34 kB

	import gradio as gr
	import time
	import io
	import librosa
	import torch
	import soundfile as sf

	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

	#Instantiating the model object.

	model = AutoModelForSpeechSeq2Seq.from_pretrained(pretrained_model_name_or_path= "distil-whisper/distil-large-v2",
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	use_safetensors=True)

	model = model.to("cuda")

	#Instantiating the processor object.

	processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path="distil-whisper/distil-large-v2")

	#Instantiating the transformer class' pipeline object.

	pipe = pipeline(task="automatic-speech-recognition",
	model="distil-whisper/distil-large-v2",
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	max_new_tokens=128,
	chunk_length_s=30,
	batch_size=16,
	return_timestamps=True,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device="cuda"
	)

	#Defining speech-to-text function.

	def convert(audio, state=""):
	"""
	This function performs speech to text conversion and will be used in Gradio's Interface function.
	Parameters:
	- audio: audio data as a bytes-like object.
	- state: a string representing the accumulated text from previous conversions.
	"""
	time.sleep(3)
	try:
	result = pipe(audio)
	transcribed_text = result['text']
	state += transcribed_text + " "
	except Exception as e:
	return f"Error processing audio: Please start recording!", state

	return state, state

	#Instantiating Gradio Interface.

	gr_interface = gr.Interface(
	fn = convert,
	title = "Automatic Speech-to-Text",
	description = "### Record your speech and watch it get converted to text!",
	inputs = [
	gr.Audio(
	label="Please Record Your Speech Here!",
	sources="microphone",
	type="filepath"),
	"state"],
	outputs = [
	"textbox",
	"state"
	],
	live=True
	)

	#Launching the app (share=True).

	gr_interface.launch()