Spaces:
Running
Running
1. add to traditional chinese conversion 2. improve ui layout
Browse files- app/asr_worker.py +7 -2
- app/static/index.html +74 -6
app/asr_worker.py
CHANGED
@@ -2,6 +2,10 @@ import numpy as np
|
|
2 |
import sherpa_onnx
|
3 |
from pathlib import Path
|
4 |
import scipy.signal
|
|
|
|
|
|
|
|
|
5 |
|
6 |
def resample_audio(audio, orig_sr, target_sr):
|
7 |
return scipy.signal.resample_poly(audio, target_sr, orig_sr)
|
@@ -33,7 +37,7 @@ def stream_audio(raw_pcm_bytes, stream, recognizer):
|
|
33 |
if recognizer.is_ready(stream):
|
34 |
recognizer.decode_streams([stream])
|
35 |
result = recognizer.get_result(stream)
|
36 |
-
return result, rms
|
37 |
|
38 |
def finalize_stream(stream, recognizer):
|
39 |
tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
|
@@ -41,4 +45,5 @@ def finalize_stream(stream, recognizer):
|
|
41 |
stream.input_finished()
|
42 |
while recognizer.is_ready(stream):
|
43 |
recognizer.decode_streams([stream])
|
44 |
-
|
|
|
|
2 |
import sherpa_onnx
|
3 |
from pathlib import Path
|
4 |
import scipy.signal
|
5 |
+
from opencc import OpenCC
|
6 |
+
|
7 |
+
converter = OpenCC('s2t')
|
8 |
+
|
9 |
|
10 |
def resample_audio(audio, orig_sr, target_sr):
|
11 |
return scipy.signal.resample_poly(audio, target_sr, orig_sr)
|
|
|
37 |
if recognizer.is_ready(stream):
|
38 |
recognizer.decode_streams([stream])
|
39 |
result = recognizer.get_result(stream)
|
40 |
+
return converter.convert(result), rms
|
41 |
|
42 |
def finalize_stream(stream, recognizer):
|
43 |
tail = np.zeros(int(0.66 * 16000), dtype=np.float32)
|
|
|
45 |
stream.input_finished()
|
46 |
while recognizer.is_ready(stream):
|
47 |
recognizer.decode_streams([stream])
|
48 |
+
result = recognizer.get_result(stream)
|
49 |
+
return converter.convert(result)
|
app/static/index.html
CHANGED
@@ -1,13 +1,81 @@
|
|
1 |
<!DOCTYPE html>
|
2 |
-
<html>
|
3 |
<head>
|
4 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
</head>
|
6 |
<body>
|
7 |
-
<h1>π€ Speak into your
|
8 |
-
<
|
9 |
-
|
10 |
-
<
|
|
|
|
|
|
|
|
|
11 |
<script>
|
12 |
const ws = new WebSocket("wss://" + location.host + "/ws");
|
13 |
const vol = document.getElementById("vol");
|
|
|
1 |
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
<head>
|
4 |
+
<meta charset="UTF-8" />
|
5 |
+
<title>π€ Real-Time ASR Demo</title>
|
6 |
+
<style>
|
7 |
+
body {
|
8 |
+
font-family: "Segoe UI", sans-serif;
|
9 |
+
background-color: #f5f6fa;
|
10 |
+
display: flex;
|
11 |
+
flex-direction: column;
|
12 |
+
align-items: center;
|
13 |
+
justify-content: center;
|
14 |
+
min-height: 100vh;
|
15 |
+
margin: 0;
|
16 |
+
padding: 2rem;
|
17 |
+
color: #2f3640;
|
18 |
+
}
|
19 |
+
|
20 |
+
h1 {
|
21 |
+
margin-bottom: 1rem;
|
22 |
+
font-size: 2rem;
|
23 |
+
}
|
24 |
+
|
25 |
+
#vol {
|
26 |
+
width: 300px;
|
27 |
+
height: 20px;
|
28 |
+
margin-bottom: 1rem;
|
29 |
+
appearance: none;
|
30 |
+
}
|
31 |
+
|
32 |
+
#vol::-webkit-progress-bar {
|
33 |
+
background-color: #dcdde1;
|
34 |
+
border-radius: 8px;
|
35 |
+
}
|
36 |
+
|
37 |
+
#vol::-webkit-progress-value {
|
38 |
+
background-color: #44bd32;
|
39 |
+
border-radius: 8px;
|
40 |
+
transition: width 0.2s;
|
41 |
+
}
|
42 |
+
|
43 |
+
.output {
|
44 |
+
width: 90%;
|
45 |
+
max-width: 800px;
|
46 |
+
text-align: left;
|
47 |
+
margin-top: 2rem;
|
48 |
+
background: white;
|
49 |
+
padding: 1rem 1.5rem;
|
50 |
+
border-radius: 10px;
|
51 |
+
box-shadow: 0 0 10px rgba(0,0,0,0.1);
|
52 |
+
}
|
53 |
+
|
54 |
+
.label {
|
55 |
+
font-weight: bold;
|
56 |
+
color: #718093;
|
57 |
+
}
|
58 |
+
|
59 |
+
#partial {
|
60 |
+
font-size: 1.25rem;
|
61 |
+
color: #353b48;
|
62 |
+
}
|
63 |
+
|
64 |
+
#final {
|
65 |
+
font-size: 1.4rem;
|
66 |
+
color: #e84118;
|
67 |
+
}
|
68 |
+
</style>
|
69 |
</head>
|
70 |
<body>
|
71 |
+
<h1>π€ Speak into your microphone</h1>
|
72 |
+
<progress id="vol" max="1" value="0"></progress>
|
73 |
+
|
74 |
+
<div class="output">
|
75 |
+
<div><span class="label">Partial:</span> <span id="partial">...</span></div>
|
76 |
+
<div><span class="label">Final:</span> <b id="final">...</b></div>
|
77 |
+
</div>
|
78 |
+
|
79 |
<script>
|
80 |
const ws = new WebSocket("wss://" + location.host + "/ws");
|
81 |
const vol = document.getElementById("vol");
|