whispercpp / app.py
alonsosilva's picture
Add app
2e9aa1a
raw
history blame
2.24 kB
import time
import solara
import numpy as np
from ipywebrtc import AudioRecorder, CameraStream, AudioStream
from tempfile import NamedTemporaryFile
from pywhispercpp.model import Model
whisper_models = ['tiny.en-q8_0', "base.en-q5_1", "small.en-q5_1"]
whisper_model = solara.reactive("tiny.en-q8_0")
transcription = solara.reactive("")
generation_time = solara.reactive("")
@solara.component
def Page():
with solara.Sidebar():
title = "Whisper STT"
with solara.Head():
solara.Title(f"{title}")
with solara.Column(style={"width": "100%", "padding": "50px"}):
solara.Markdown(f"#{title}")
solara.Markdown("## Send a voice message")
solara.Markdown("### Recorder")
w = Model(whisper_model.value)
camera = CameraStream(constraints={'audio': True,'video':False})
recorder = AudioRecorder(stream=camera)
display(recorder)
def MyButton():
def transcribe_voice():
transcription.value = ""
generation_time.value = ""
with NamedTemporaryFile(suffix=".webm") as temp:
with open(f"{temp.name}", 'wb') as f:
f.write(recorder.audio.value)
start_time = time.time()
segments = w.transcribe(f"{temp.name}")
for segment in segments:
transcription.value += segment.text
end_time = time.time()
generation_time.value = np.round(end_time - start_time, 2)
transcription.value += " "
with solara.Row():
solara.Button("Send voice message", on_click=transcribe_voice)
solara.Select(label="Select model:", value=whisper_model, values=whisper_models, style="width: 10%")
MyButton()
solara.Markdown(f"### Transcription:")
solara.Text(f"{transcription.value}", style="color: blue; font-size: 1.5rem")
if generation_time.value != "":
solara.Text(f"Generation time: {generation_time.value} seconds", style="color: blue; position: fixed; bottom: 8rem")