Spaces:
Runtime error
Runtime error
from transformers import pipeline | |
import torch | |
import gradio as gr | |
import librosa | |
import numpy as np | |
p = pipeline("automatic-speech-recognition", model="aware-ai/wav2vec2-base-german") | |
model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', | |
model='silero_vad') | |
(get_speech_timestamps, | |
_, read_audio, | |
*_) = utils | |
def is_speech(wav, sr): | |
speech_timestamps = get_speech_timestamps(wav, model, | |
sampling_rate=sr) | |
return len(speech_timestamps) > 0 | |
def transcribe(audio, state={"text": "", "temp_text": "", "audio": None}): | |
if state is None: | |
state={"text": "", "temp_text": "", "audio": None} | |
wav_data, _sr = librosa.load(audio, sr=16000) | |
speech = is_speech(wav_data, _sr) | |
if(speech): | |
if(state["audio"] is None): | |
state["audio"] = wav_data | |
else: | |
state["audio"] = np.concatenate((state["audio"], wav_data)) | |
text = p(state["audio"])["text"] + "\n" | |
state["temp_text"] = text | |
else: | |
state["text"] += state["temp_text"] | |
state["temp_text"] = "" | |
state["audio"] = None | |
return f'{state["text"]} ( {state["temp_text"]} )', state | |
gr.Interface( | |
transcribe, | |
[gr.Audio(source="microphone", type="filepath", streaming=True), "state"], | |
[gr.Textbox(),"state"], | |
live=True | |
).launch(server_name = "0.0.0.0") |