Spaces:

awacke1
/

SOTA-AFEST-Speech

Runtime error

App Files Files Community

awacke1 commited on Mar 25, 2022

Commit

8e38110

1 Parent(s): 01261ba

Create app.py

Browse files

Files changed (1) hide show

app.py +49 -0

app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+os.system("pip install gradio==2.7.5.2")
+import torch
+import zipfile
+import torchaudio
+from glob import glob
+import gradio as gr
+def predict(audio):
+  device = torch.device('cpu')  # gpu also works, but our models are fast enough for CPU
+  model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models',
+                                        model='silero_stt',
+                                        language='en', # also available 'de', 'es'
+                                        device=device)
+  (read_batch, split_into_batches,
+  read_audio, prepare_model_input) = utils  # see function signature for details
+  test_files = glob(audio)
+  batches = split_into_batches(test_files, batch_size=10)
+  input = prepare_model_input(read_batch(batches[0]),
+                              device=device)
+  output = model(input)
+  for example in output:
+      return decoder(example.cpu())
+title = "Speech-To-Text State of the Art"
+description = "Gradio demo for speech-to-text models using Silero, a set of compact enterprise-grade pre-trained STT Models for multiple languages. To use, upload an MP3 or Wav file."
+article = """
+Speech-To-Text in a compact form-factor for several commonly spoken languages. Robust to a variety of dialects, codecs, domains, noises, lower sampling rates this model optimizes for speed and size.  If results are not optimal audio should be resampled to 16 kHz. The models consume a normalized audio in the form of samples without pre-processing except for normalization and output frames with token probabilities.
+### Supported Languages
+As of this update, the following languages are supported:
+- English
+- German
+- Spanish
+Model repository of this instance is at [repo](https://github.com/AaronCWacker/silero-models).
+### Additional Examples and Benchmarks
+For performance benchmarks please see [wiki](https://github.com/snakers4/silero-models/wiki).
+"""
+# download a single file, any format compatible with TorchAudio (soundfile backend)
+torch.hub.download_url_to_file('https://opus-codec.org/static/examples/samples/speech_orig.wav',
+                               dst ='speech_orig.wav', progress=True)
+examples=[['speech_orig.wav']]
+input = gr.inputs.Audio(type="filepath")
+gr.Interface(predict, input, "text", title=title,description=description, article=article,
+             analytics_enabled=False, show_tips=False,examples=examples).launch();