Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
os.system("pip install gradio==2.7.5.2")
|
3 |
+
import torch
|
4 |
+
import zipfile
|
5 |
+
import torchaudio
|
6 |
+
from glob import glob
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
def predict(audio):
|
10 |
+
device = torch.device('cpu') # gpu also works, but our models are fast enough for CPU
|
11 |
+
|
12 |
+
model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models',
|
13 |
+
model='silero_stt',
|
14 |
+
language='en', # also available 'de', 'es'
|
15 |
+
device=device)
|
16 |
+
(read_batch, split_into_batches,
|
17 |
+
read_audio, prepare_model_input) = utils # see function signature for details
|
18 |
+
|
19 |
+
|
20 |
+
test_files = glob(audio)
|
21 |
+
batches = split_into_batches(test_files, batch_size=10)
|
22 |
+
input = prepare_model_input(read_batch(batches[0]),
|
23 |
+
device=device)
|
24 |
+
|
25 |
+
output = model(input)
|
26 |
+
for example in output:
|
27 |
+
return decoder(example.cpu())
|
28 |
+
|
29 |
+
title = "Speech-To-Text State of the Art"
|
30 |
+
description = "Gradio demo for speech-to-text models using Silero, a set of compact enterprise-grade pre-trained STT Models for multiple languages. To use, upload an MP3 or Wav file."
|
31 |
+
article = """
|
32 |
+
Speech-To-Text in a compact form-factor for several commonly spoken languages. Robust to a variety of dialects, codecs, domains, noises, lower sampling rates this model optimizes for speed and size. If results are not optimal audio should be resampled to 16 kHz. The models consume a normalized audio in the form of samples without pre-processing except for normalization and output frames with token probabilities.
|
33 |
+
### Supported Languages
|
34 |
+
As of this update, the following languages are supported:
|
35 |
+
- English
|
36 |
+
- German
|
37 |
+
- Spanish
|
38 |
+
Model repository of this instance is at [repo](https://github.com/AaronCWacker/silero-models).
|
39 |
+
### Additional Examples and Benchmarks
|
40 |
+
For performance benchmarks please see [wiki](https://github.com/snakers4/silero-models/wiki).
|
41 |
+
"""
|
42 |
+
|
43 |
+
# download a single file, any format compatible with TorchAudio (soundfile backend)
|
44 |
+
torch.hub.download_url_to_file('https://opus-codec.org/static/examples/samples/speech_orig.wav',
|
45 |
+
dst ='speech_orig.wav', progress=True)
|
46 |
+
examples=[['speech_orig.wav']]
|
47 |
+
input = gr.inputs.Audio(type="filepath")
|
48 |
+
gr.Interface(predict, input, "text", title=title,description=description, article=article,
|
49 |
+
analytics_enabled=False, show_tips=False,examples=examples).launch();
|