Spaces:
Sleeping
Sleeping
import time | |
import solara | |
import numpy as np | |
from ipywebrtc import AudioRecorder, CameraStream, AudioStream | |
from tempfile import NamedTemporaryFile | |
from pywhispercpp.model import Model | |
whisper_models = ['tiny.en-q8_0', "base.en-q5_1", "small.en-q5_1"] | |
whisper_model = solara.reactive("tiny.en-q8_0") | |
transcription = solara.reactive("") | |
generation_time = solara.reactive("") | |
def Page(): | |
with solara.Sidebar(): | |
title = "Whisper STT" | |
with solara.Head(): | |
solara.Title(f"{title}") | |
with solara.Column(style={"width": "100%", "padding": "50px"}): | |
solara.Markdown(f"#{title}") | |
solara.Markdown("## Send a voice message") | |
solara.Markdown("### Recorder") | |
w = Model(whisper_model.value) | |
camera = CameraStream(constraints={'audio': True,'video':False}) | |
recorder = AudioRecorder(stream=camera) | |
display(recorder) | |
def MyButton(): | |
def transcribe_voice(): | |
transcription.value = "" | |
generation_time.value = "" | |
with NamedTemporaryFile(suffix=".webm") as temp: | |
with open(f"{temp.name}", 'wb') as f: | |
f.write(recorder.audio.value) | |
start_time = time.time() | |
segments = w.transcribe(f"{temp.name}") | |
for segment in segments: | |
transcription.value += segment.text | |
end_time = time.time() | |
generation_time.value = np.round(end_time - start_time, 2) | |
transcription.value += " " | |
with solara.Row(): | |
solara.Button("Send voice message", on_click=transcribe_voice) | |
solara.Select(label="Select model:", value=whisper_model, values=whisper_models, style="width: 10%") | |
MyButton() | |
solara.Markdown(f"### Transcription:") | |
solara.Text(f"{transcription.value}", style="color: blue; font-size: 1.5rem") | |
if generation_time.value != "": | |
solara.Text(f"Generation time: {generation_time.value} seconds", style="color: blue; position: fixed; bottom: 8rem") | |