Spaces:
Runtime error
Runtime error
File size: 1,410 Bytes
412c852 ac80c90 412c852 1558352 57add4d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
from transformers import pipeline
import torch
import gradio as gr
import librosa
import numpy as np
p = pipeline("automatic-speech-recognition", model="aware-ai/wav2vec2-base-german")
model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
model='silero_vad')
(get_speech_timestamps,
_, read_audio,
*_) = utils
def is_speech(wav, sr):
speech_timestamps = get_speech_timestamps(wav, model,
sampling_rate=sr)
return len(speech_timestamps) > 0
def transcribe(audio, state={"text": "", "temp_text": "", "audio": None}):
if state is None:
state={"text": "", "temp_text": "", "audio": None}
wav_data, _sr = librosa.load(audio, sr=16000)
speech = is_speech(wav_data, _sr)
if(speech):
if(state["audio"] is None):
state["audio"] = wav_data
else:
state["audio"] = np.concatenate((state["audio"], wav_data))
text = p(state["audio"])["text"] + "\n"
state["temp_text"] = text
else:
state["text"] += state["temp_text"]
state["temp_text"] = ""
state["audio"] = None
return f'{state["text"]} ( {state["temp_text"]} )', state
gr.Interface(
transcribe,
[gr.Audio(source="microphone", type="filepath", streaming=True), "state"],
[gr.Textbox(),"state"],
live=True
).launch(server_name = "0.0.0.0") |