whispercpp / app.py
alonsosilva's picture
Add task
03485f2
raw
history blame
2.91 kB
import time
import solara
import numpy as np
from ipywebrtc import AudioRecorder, CameraStream, AudioStream
from tempfile import NamedTemporaryFile
from pywhispercpp.model import Model
from solara.lab import use_task, Task
whisper_models = ["tiny.en-q5_1", "tiny.en-q8_0","tiny.en", "base.en-q5_1", "base.en", "small.en-q5_1", "small.en"]
whisper_model = solara.reactive("tiny.en-q8_0")
current_whisper_model = solara.reactive("tiny.en-q8_0")
transcription = solara.reactive("")
generation_time = solara.reactive("")
w = Model('tiny.en-q8_0')
@solara.component
def Page():
with solara.Sidebar():
title = "Whisper STT"
with solara.Head():
solara.Title(f"{title}")
with solara.Column(style={"width": "100%", "padding": "50px"}):
solara.Markdown(f"#{title}")
solara.Markdown("## Send a voice message")
solara.Markdown("### Recorder")
with solara.Row():
def load_model():
w = Model(whisper_model.value)
current_whisper_model.value = whisper_model.value
return 1
solara.Select(label="Select model:", value=whisper_model, values=whisper_models, style="width: 10%")
#solara.Button("Load model", on_click=load_model)
result : Task[int] = use_task(load_model, dependencies=[whisper_model.value])
if result.finished:
solara.Success(f"Current model: {current_whisper_model.value}")
else:
solara.ProgressLinear(result.pending)
camera = CameraStream(constraints={'audio': True,'video':False})
recorder = AudioRecorder(stream=camera)
recorder.playing = False
display(recorder)
def transcribe_voice():
transcription.value = ""
generation_time.value = ""
with NamedTemporaryFile(suffix=".webm") as temp:
with open(f"{temp.name}", 'wb') as f:
f.write(recorder.audio.value)
start_time = time.time()
segments = w.transcribe(f"{temp.name}")
for segment in segments:
transcription.value += segment.text
end_time = time.time()
generation_time.value = np.round(end_time - start_time, 2)
transcription.value += " "
with solara.Row():
solara.Button("Send voice message", on_click=transcribe_voice)
with solara.Column(style="padding: 50px"):
solara.Markdown(f"### Transcription:")
solara.Text(f"{transcription.value}", style="color: blue; font-size: 1.5rem")
if generation_time.value != "":
solara.Text(f"Generation time: {generation_time.value} seconds", style="color: blue; position: fixed; bottom: 8rem")