File size: 2,241 Bytes
2e9aa1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import time
import solara
import numpy as np
from ipywebrtc import AudioRecorder, CameraStream, AudioStream
from tempfile import NamedTemporaryFile
from pywhispercpp.model import Model

whisper_models = ['tiny.en-q8_0', "base.en-q5_1", "small.en-q5_1"]
whisper_model = solara.reactive("tiny.en-q8_0")
transcription = solara.reactive("")
generation_time = solara.reactive("")
@solara.component
def Page():
    with solara.Sidebar():
        title = "Whisper STT"
        with solara.Head():
            solara.Title(f"{title}")
        with solara.Column(style={"width": "100%", "padding": "50px"}):
            solara.Markdown(f"#{title}")
            solara.Markdown("## Send a voice message")
            solara.Markdown("### Recorder")
            w = Model(whisper_model.value)
            camera = CameraStream(constraints={'audio': True,'video':False})
            recorder = AudioRecorder(stream=camera)
            display(recorder)
            def MyButton():
                def transcribe_voice():
                    transcription.value = ""
                    generation_time.value = ""
                    with NamedTemporaryFile(suffix=".webm") as temp:
                        with open(f"{temp.name}", 'wb') as f:
                            f.write(recorder.audio.value)
                        start_time = time.time()
                        segments = w.transcribe(f"{temp.name}")
                        for segment in segments:
                            transcription.value += segment.text
                        end_time = time.time()
                        generation_time.value = np.round(end_time - start_time, 2)
                    transcription.value += " "
                with solara.Row():
                    solara.Button("Send voice message", on_click=transcribe_voice)
                    solara.Select(label="Select model:", value=whisper_model, values=whisper_models, style="width: 10%")
            MyButton()
    solara.Markdown(f"### Transcription:")
    solara.Text(f"{transcription.value}", style="color: blue; font-size: 1.5rem")
    if generation_time.value != "":
        solara.Text(f"Generation time: {generation_time.value} seconds", style="color: blue; position: fixed; bottom: 8rem")