awacke1's picture
Create app.py
8e38110
import os
os.system("pip install gradio==2.7.5.2")
import torch
import zipfile
import torchaudio
from glob import glob
import gradio as gr
def predict(audio):
device = torch.device('cpu') # gpu also works, but our models are fast enough for CPU
model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models',
model='silero_stt',
language='en', # also available 'de', 'es'
device=device)
(read_batch, split_into_batches,
read_audio, prepare_model_input) = utils # see function signature for details
test_files = glob(audio)
batches = split_into_batches(test_files, batch_size=10)
input = prepare_model_input(read_batch(batches[0]),
device=device)
output = model(input)
for example in output:
return decoder(example.cpu())
title = "Speech-To-Text State of the Art"
description = "Gradio demo for speech-to-text models using Silero, a set of compact enterprise-grade pre-trained STT Models for multiple languages. To use, upload an MP3 or Wav file."
article = """
Speech-To-Text in a compact form-factor for several commonly spoken languages. Robust to a variety of dialects, codecs, domains, noises, lower sampling rates this model optimizes for speed and size. If results are not optimal audio should be resampled to 16 kHz. The models consume a normalized audio in the form of samples without pre-processing except for normalization and output frames with token probabilities.
### Supported Languages
As of this update, the following languages are supported:
- English
- German
- Spanish
Model repository of this instance is at [repo](https://github.com/AaronCWacker/silero-models).
### Additional Examples and Benchmarks
For performance benchmarks please see [wiki](https://github.com/snakers4/silero-models/wiki).
"""
# download a single file, any format compatible with TorchAudio (soundfile backend)
torch.hub.download_url_to_file('https://opus-codec.org/static/examples/samples/speech_orig.wav',
dst ='speech_orig.wav', progress=True)
examples=[['speech_orig.wav']]
input = gr.inputs.Audio(type="filepath")
gr.Interface(predict, input, "text", title=title,description=description, article=article,
analytics_enabled=False, show_tips=False,examples=examples).launch();