Spaces:

awacke1
/

SOTA-AFEST-Speech

Runtime error

App Files Files Community

SOTA-AFEST-Speech / app.py

awacke1

Create app.py

8e38110 over 3 years ago

raw

history blame contribute delete

2.42 kB

	import os
	os.system("pip install gradio==2.7.5.2")
	import torch
	import zipfile
	import torchaudio
	from glob import glob
	import gradio as gr

	def predict(audio):
	device = torch.device('cpu') # gpu also works, but our models are fast enough for CPU

	model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models',
	model='silero_stt',
	language='en', # also available 'de', 'es'
	device=device)
	(read_batch, split_into_batches,
	read_audio, prepare_model_input) = utils # see function signature for details


	test_files = glob(audio)
	batches = split_into_batches(test_files, batch_size=10)
	input = prepare_model_input(read_batch(batches[0]),
	device=device)

	output = model(input)
	for example in output:
	return decoder(example.cpu())

	title = "Speech-To-Text State of the Art"
	description = "Gradio demo for speech-to-text models using Silero, a set of compact enterprise-grade pre-trained STT Models for multiple languages. To use, upload an MP3 or Wav file."
	article = """
	Speech-To-Text in a compact form-factor for several commonly spoken languages. Robust to a variety of dialects, codecs, domains, noises, lower sampling rates this model optimizes for speed and size. If results are not optimal audio should be resampled to 16 kHz. The models consume a normalized audio in the form of samples without pre-processing except for normalization and output frames with token probabilities.
	### Supported Languages
	As of this update, the following languages are supported:
	- English
	- German
	- Spanish
	Model repository of this instance is at [repo](https://github.com/AaronCWacker/silero-models).
	### Additional Examples and Benchmarks
	For performance benchmarks please see [wiki](https://github.com/snakers4/silero-models/wiki).
	"""

	# download a single file, any format compatible with TorchAudio (soundfile backend)
	torch.hub.download_url_to_file('https://opus-codec.org/static/examples/samples/speech_orig.wav',
	dst ='speech_orig.wav', progress=True)
	examples=[['speech_orig.wav']]
	input = gr.inputs.Audio(type="filepath")
	gr.Interface(predict, input, "text", title=title,description=description, article=article,
	analytics_enabled=False, show_tips=False,examples=examples).launch();