awacke1 commited on
Commit
8e38110
Β·
1 Parent(s): 01261ba

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system("pip install gradio==2.7.5.2")
3
+ import torch
4
+ import zipfile
5
+ import torchaudio
6
+ from glob import glob
7
+ import gradio as gr
8
+
9
+ def predict(audio):
10
+ device = torch.device('cpu') # gpu also works, but our models are fast enough for CPU
11
+
12
+ model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models',
13
+ model='silero_stt',
14
+ language='en', # also available 'de', 'es'
15
+ device=device)
16
+ (read_batch, split_into_batches,
17
+ read_audio, prepare_model_input) = utils # see function signature for details
18
+
19
+
20
+ test_files = glob(audio)
21
+ batches = split_into_batches(test_files, batch_size=10)
22
+ input = prepare_model_input(read_batch(batches[0]),
23
+ device=device)
24
+
25
+ output = model(input)
26
+ for example in output:
27
+ return decoder(example.cpu())
28
+
29
+ title = "Speech-To-Text State of the Art"
30
+ description = "Gradio demo for speech-to-text models using Silero, a set of compact enterprise-grade pre-trained STT Models for multiple languages. To use, upload an MP3 or Wav file."
31
+ article = """
32
+ Speech-To-Text in a compact form-factor for several commonly spoken languages. Robust to a variety of dialects, codecs, domains, noises, lower sampling rates this model optimizes for speed and size. If results are not optimal audio should be resampled to 16 kHz. The models consume a normalized audio in the form of samples without pre-processing except for normalization and output frames with token probabilities.
33
+ ### Supported Languages
34
+ As of this update, the following languages are supported:
35
+ - English
36
+ - German
37
+ - Spanish
38
+ Model repository of this instance is at [repo](https://github.com/AaronCWacker/silero-models).
39
+ ### Additional Examples and Benchmarks
40
+ For performance benchmarks please see [wiki](https://github.com/snakers4/silero-models/wiki).
41
+ """
42
+
43
+ # download a single file, any format compatible with TorchAudio (soundfile backend)
44
+ torch.hub.download_url_to_file('https://opus-codec.org/static/examples/samples/speech_orig.wav',
45
+ dst ='speech_orig.wav', progress=True)
46
+ examples=[['speech_orig.wav']]
47
+ input = gr.inputs.Audio(type="filepath")
48
+ gr.Interface(predict, input, "text", title=title,description=description, article=article,
49
+ analytics_enabled=False, show_tips=False,examples=examples).launch();