Spaces:
cuio
/
No application file

cuio commited on
Commit
2742885
·
verified ·
1 Parent(s): fb50115

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.js +99 -0
  2. audio_process.js +45 -0
  3. index.html +179 -0
  4. voice.png +0 -0
app.js ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const demoapp = {
2
+ text: '讲个冷笑话吧,要很好笑的那种。',
3
+ recording: false,
4
+ asrWS: null,
5
+ currentText: null,
6
+ disabled: false,
7
+ elapsedTime: null,
8
+ logs: [{ idx: 0, text: 'Happily here at ruzhila.cn.' }],
9
+ async init() {
10
+ },
11
+ async dotts() {
12
+ let audioContext = new AudioContext({ sampleRate: 16000 })
13
+ await audioContext.audioWorklet.addModule('./audio_process.js')
14
+
15
+ const ws = new WebSocket('/tts');
16
+ ws.onopen = () => {
17
+ ws.send(this.text);
18
+ };
19
+ const playNode = new AudioWorkletNode(audioContext, 'play-audio-processor');
20
+ playNode.connect(audioContext.destination);
21
+
22
+ this.disabled = true;
23
+ ws.onmessage = async (e) => {
24
+ if (e.data instanceof Blob) {
25
+ e.data.arrayBuffer().then((arrayBuffer) => {
26
+ const int16Array = new Int16Array(arrayBuffer);
27
+ let float32Array = new Float32Array(int16Array.length);
28
+ for (let i = 0; i < int16Array.length; i++) {
29
+ float32Array[i] = int16Array[i] / 32768.;
30
+ }
31
+ playNode.port.postMessage({ message: 'audioData', audioData: float32Array });
32
+ });
33
+ } else {
34
+ this.elapsedTime = JSON.parse(e.data)?.elapsed;
35
+ this.disabled = false;
36
+ }
37
+ }
38
+ },
39
+
40
+ async stopasr() {
41
+ if (!this.asrWS) {
42
+ return;
43
+ }
44
+ this.asrWS.close();
45
+ this.asrWS = null;
46
+ this.recording = false;
47
+ if (this.currentText) {
48
+ this.logs.push({ idx: this.logs.length + 1, text: this.currentText });
49
+ }
50
+ this.currentText = null;
51
+
52
+ },
53
+
54
+ async doasr() {
55
+ const audioConstraints = {
56
+ video: false,
57
+ audio: true,
58
+ };
59
+
60
+ const mediaStream = await navigator.mediaDevices.getUserMedia(audioConstraints);
61
+
62
+ const ws = new WebSocket('/asr');
63
+ let currentMessage = '';
64
+
65
+ ws.onopen = () => {
66
+ this.logs = [];
67
+ };
68
+
69
+ ws.onmessage = (e) => {
70
+ const data = JSON.parse(e.data);
71
+ const { text, finished, idx } = data;
72
+
73
+ currentMessage = text;
74
+ this.currentText = text
75
+
76
+ if (finished) {
77
+ this.logs.push({ text: currentMessage, idx: idx });
78
+ currentMessage = '';
79
+ this.currentText = null
80
+ }
81
+ };
82
+
83
+ let audioContext = new AudioContext({ sampleRate: 16000 })
84
+ await audioContext.audioWorklet.addModule('./audio_process.js')
85
+
86
+ const recordNode = new AudioWorkletNode(audioContext, 'record-audio-processor');
87
+ recordNode.connect(audioContext.destination);
88
+ recordNode.port.onmessage = (event) => {
89
+ if (ws && ws.readyState === WebSocket.OPEN) {
90
+ const int16Array = event.data.data;
91
+ ws.send(int16Array.buffer);
92
+ }
93
+ }
94
+ const source = audioContext.createMediaStreamSource(mediaStream);
95
+ source.connect(recordNode);
96
+ this.asrWS = ws;
97
+ this.recording = true;
98
+ }
99
+ }
audio_process.js ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class PlayerAudioProcessor extends AudioWorkletProcessor {
2
+ constructor() {
3
+ super();
4
+ this.buffer = new Float32Array();
5
+ this.port.onmessage = (event) => {
6
+ let newFetchedData = new Float32Array(this.buffer.length + event.data.audioData.length);
7
+ newFetchedData.set(this.buffer, 0);
8
+ newFetchedData.set(event.data.audioData, this.buffer.length);
9
+ this.buffer = newFetchedData;
10
+ };
11
+ }
12
+
13
+ process(inputs, outputs, parameters) {
14
+ const output = outputs[0];
15
+ const channel = output[0];
16
+ const bufferLength = this.buffer.length;
17
+ for (let i = 0; i < channel.length; i++) {
18
+ channel[i] = (i < bufferLength) ? this.buffer[i] : 0;
19
+ }
20
+ this.buffer = this.buffer.slice(channel.length);
21
+ return true;
22
+ }
23
+ }
24
+
25
+ class RecordAudioProcessor extends AudioWorkletProcessor {
26
+ constructor() {
27
+ super();
28
+ }
29
+
30
+ process(inputs, outputs, parameters) {
31
+ const channel = inputs[0][0];
32
+ if (!channel || channel.length === 0) {
33
+ return true;
34
+ }
35
+ const int16Array = new Int16Array(channel.length);
36
+ for (let i = 0; i < channel.length; i++) {
37
+ int16Array[i] = channel[i] * 32767;
38
+ }
39
+ this.port.postMessage({ data: int16Array });
40
+ return true
41
+ }
42
+ }
43
+
44
+ registerProcessor('play-audio-processor', PlayerAudioProcessor);
45
+ registerProcessor('record-audio-processor', RecordAudioProcessor);
index.html ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <link rel="icon" type="image/svg+xml" href="./voice.png" />
8
+ <script src="//cdn.tailwindcss.com?plugins=forms"></script>
9
+ <link href="https://cdn.jsdelivr.net/npm/tailwindcss@latest/dist/tailwind.min.css" rel="stylesheet">
10
+ <script src="//cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js" defer></script>
11
+ <script src="./app.js"></script>
12
+ <title>voiceapi demo </title>
13
+ <style>
14
+ * {
15
+ margin: 0;
16
+ padding: 0;
17
+ }
18
+ </style>
19
+
20
+ <style type="text/tailwindcss">
21
+ .label { @apply text-gray-900 w-[50px] lg:w-20 }
22
+ .title{
23
+ @apply text-[16px] text-zinc-500 mx-2;
24
+ }
25
+
26
+ .select { @apply w-full rounded-md h-10 }
27
+
28
+ .round { @apply rounded border px-3 p-2 border-slate-300 placeholder-gray-400 placeholder:text-sm
29
+ focus:bg-white focus:text-gray-900 focus:placeholder-gray-500 focus:outline-none
30
+ focus:border-zinc-950 focus:border ring-0 focus:ring-0 text-gray-900 }
31
+
32
+ .checkbox { @apply ml-2 lg:ml-4 border focus:outline-none ring-0 focus:ring-gray-800 text-gray-900 }
33
+ .dash{ @apply border border-dashed border-zinc-200 flex flex-grow }
34
+
35
+ .button { @apply hover:bg-opacity-90 text-white font-bold py-1.5 px-6 rounded-full cursor-pointer }
36
+ .card { @apply bg-white shadow-sm rounded-xl border p-4 }
37
+
38
+
39
+ .animate-ping {
40
+ animation: ping 2s cubic-bezier(0.5, 0.4, 0.2, 1) infinite;
41
+ }
42
+
43
+ @keyframes ping {
44
+ 0% {
45
+ transform: scale(1);
46
+ opacity: 1;
47
+ }
48
+ 50% {
49
+ transform: scale(1.2);
50
+ opacity: 0.7;
51
+ }
52
+ 100% {
53
+ transform: scale(1);
54
+ opacity: 1;
55
+ }
56
+ }
57
+ </style>
58
+ </head>
59
+
60
+ <body>
61
+ <script>
62
+ async function initAudioWorklet() {
63
+ try {
64
+ // Check for browser support
65
+ if (!('AudioContext' in window) || !('audioWorklet' in AudioContext.prototype)) {
66
+ console.error('Audio Worklet API is not supported in this browser.');
67
+ return;
68
+ }
69
+
70
+ // Initialize AudioContext
71
+ const audioContext = new AudioContext();
72
+
73
+ // Add Audio Worklet module
74
+ await audioContext.audioWorklet.addModule('./audio_process.js');
75
+
76
+ console.log('Audio Worklet module added successfully.');
77
+ // Your code to use the Audio Worklet goes here
78
+
79
+ } catch (error) {
80
+ console.error('Error initializing Audio Worklet:', error);
81
+ }
82
+ }
83
+
84
+ // Initialize Audio Worklet when the page is loaded
85
+ window.addEventListener('load', initAudioWorklet);
86
+ </script>
87
+ <div x-data="demoapp">
88
+ <header class="bg-gray-900 py-4 px-5 lg:p-4 lg:px-10 text-white sticky top-0 z-20">
89
+ <div class="flex w-full justify-between items-center">
90
+ <p class="gap-x-3">
91
+ <span>VoiceAPI Demo</span> /
92
+ <a href="https://ruzhila.cn/?from=voiceapi_demo">ruzhila.cn</a>
93
+ </p>
94
+ <a target="_blank" href="https://github.com/ruzhila/voiceapi" class="hover:cursor-pointer">
95
+ <svg t="1724996252746" class="icon" viewBox="0 0 1024 1024" version="1.1"
96
+ xmlns="http://www.w3.org/2000/svg" p-id="" width="25" height="25">
97
+ <path
98
+ d="M512 12.64c-282.752 0-512 229.216-512 512 0 226.208 146.72 418.144 350.144 485.824 25.6 4.736 35.008-11.104 35.008-24.64 0-12.192-0.48-52.544-0.704-95.328-142.464 30.976-172.512-60.416-172.512-60.416-23.296-59.168-56.832-74.912-56.832-74.912-46.464-31.776 3.52-31.136 3.52-31.136 51.392 3.616 78.464 52.768 78.464 52.768 45.664 78.272 119.776 55.648 148.992 42.56 4.576-33.088 17.856-55.68 32.512-68.48-113.728-12.928-233.28-56.864-233.28-253.024 0-55.904 20-101.568 52.768-137.44-5.312-12.896-22.848-64.96 4.96-135.488 0 0 43.008-13.76 140.832 52.48 40.832-11.36 84.64-17.024 128.16-17.248 43.488 0.192 87.328 5.888 128.256 17.248 97.728-66.24 140.64-52.48 140.64-52.48 27.872 70.528 10.336 122.592 5.024 135.488 32.832 35.84 52.704 81.536 52.704 137.44 0 196.64-119.776 239.936-233.792 252.64 18.368 15.904 34.72 47.04 34.72 94.816 0 68.512-0.608 123.648-0.608 140.512 0 13.632 9.216 29.6 35.168 24.576 203.328-67.776 349.856-259.616 349.856-485.76 0-282.784-229.248-512-512-512z"
99
+ fill="#ffffff"></path>
100
+ </svg>
101
+ </a>
102
+ </div>
103
+ </header>
104
+
105
+ <div class="flex px-6 gap-x-10 w-full max-w-7xl mx-auto">
106
+ <div class="relative flex flex-col items-center w-1/3 py-10">
107
+ <div class="w-full">
108
+ <textarea x-model="text" class="round p-4 w-full h-[36rem] text-sm"
109
+ placeholder="Enter text here"></textarea>
110
+ </div>
111
+
112
+ <div>
113
+ <button @click="dotts" :disabled="disabled"
114
+ class="button bg-gray-900 flex items-center gap-x-2 mt-6">
115
+ <span>Speak</span>
116
+ <svg t="1726215464577" class="icon" viewBox="0 0 1024 1024" version="1.1"
117
+ xmlns="http://www.w3.org/2000/svg" p-id="4263" width="20" height="20">
118
+ <path
119
+ d="M830.450526 853.759999q-11.722105 8.791579-27.351579 8.791579-19.536842 0-33.701053-14.164211t-14.164211-33.701053q0-21.490526
120
+ 16.606316-36.143158 0.976842-0.976842 1.953684-1.465263t1.953684-1.465263l0.976842-0.976842q27.351579-18.56 50.795789-43.957895t41.027368-55.191579 27.351579-63.494737 9.768421-69.84421q0-73.263158-37.12-133.827368t-92.8-99.637895q-20.513684-14.652632-20.513684-39.073684 0-19.536842 14.164211-33.701053t33.701053-14.164211q16.606316 0 29.305263 10.745263 36.143158 25.397895 67.402105 59.098947t53.726316 73.263158 35.166316 84.496842 12.698947 92.8q0 48.842105-12.698947 93.776842t-35.654737 84.985263-54.214737 73.751579-68.378947 59.098947zM775.747368 415.157894q20.513684 28.328421 32.72421 57.145263t12.210526 69.84421q0 39.073684-12.698947 70.332632t-32.235789 56.656842q-7.814737 10.745263-16.606316 19.048421t-22.467368 8.303158q-17.583158 0-29.793684-12.698947t-12.210526-30.282105q0-7.814737 2.930526-15.629474l-0.976842 0q4.884211-10.745263 11.722105-20.513684t13.187368-20.025263 10.745263-23.444211 4.395789-31.747368q0-17.583158-4.395789-30.770526t-10.745263-23.932632-13.187368-20.513684-10.745263-20.513684q-2.930526-6.837895-2.930526-15.629474 0-17.583158 12.210526-30.282105t29.793684-12.698947q13.675789 0 22.467368 8.303158t16.606316 19.048421zM460.227368 995.402104q-49.818947-44.934737-105.498947-93.776842t-103.545263-89.869474q-55.68-46.888421-111.36-92.8-10.745263 0.976842-21.490526 0.976842-8.791579 0.976842-18.56 0.976842l-16.606316 0q-26.374737 0-42.981053-16.117895t-16.606316-38.585263l0-246.16421 0.976842 0-0.976842-0.976842q0-27.351579 17.094737-44.934737t42.492632-17.583158l55.68 0q89.869474-76.193684 163.132631-136.757895 31.258947-26.374737 61.541053-51.28421t54.703158-45.423158 41.027368-34.189474 20.513684-16.606316q29.305263-21.490526 47.376842-19.536842t28.328421 17.583158 14.164211 38.096842 3.907368 41.027368l0 788.311578 0 2.930526q0 18.56-6.837895 39.562105t-21.002105 33.212632-35.654737 10.256842-49.818947-28.328421z"
121
+ p-id="4264" fill="#ffffff"></path>
122
+ </svg>
123
+ </button>
124
+ </div>
125
+ <template x-if="elapsedTime">
126
+ <p x-text="`elapsedTime: ${elapsedTime}`" class="mt-4 text-sm text-gray-600 "></p>
127
+ </template>
128
+ </div>
129
+
130
+ <!-- recording -->
131
+ <div class="w-full flex-grow h-[calc(100vh-10rem)] xl:pl-10 py-10">
132
+
133
+ <div
134
+ class="rounded border border-gray-500 p-3 w-full flex flex-col items-end h-[36rem] overflow-y-auto">
135
+ <template x-for="item in logs">
136
+ <div class="mt-3 mb-2">
137
+ <span
138
+ class="text-white px-4 py-1.5 text-[13px] display-inline-block border border-gray-900 rounded-t-full rounded-l-full bg-gray-900 justify-end w-auto"
139
+ x-text="item?.text">
140
+ </span>
141
+ </div>
142
+ </template>
143
+ </div>
144
+
145
+
146
+ <template x-if="currentText">
147
+ <p x-text="`${currentText} …`" class="text-gray-800 mt-4 text-sm text-center"></p>
148
+ </template>
149
+
150
+ <template x-if="!recording">
151
+ <div class="flex flex-col gap-y-4 items-center justify-center mt-4">
152
+ <p @click="doasr"
153
+ class="mt-2 border border-gray-100 rounded-full duration-300 hover:scale-105 hover:border-gray-400">
154
+ <img src="./images/record.svg" alt="" class="w-14 h-14 mx-auto cursor-pointer">
155
+ </p>
156
+ <p class="text-gray-600">Click to record !</p>
157
+ </div>
158
+ </template>
159
+
160
+ <template x-if="recording">
161
+ <div class="flex flex-col items-center justify-center gap-y-4 mt-4">
162
+
163
+ <p @click="stopasr"
164
+ class="mt-2 border border-red-100 rounded-full duration-300 hover:scale-105 hover:border-red-400">
165
+ <img src="./images/speaking.svg" alt=""
166
+ class="w-14 h-14 mx-auto cursor-pointer animate-ping">
167
+ </p>
168
+ <div class="flex items-center text-gray-600 gap-x-4">
169
+ <p>Click to stop recording !</p>
170
+ </div>
171
+ </div>
172
+ </template>
173
+ </div>
174
+ </div>
175
+ </div>
176
+ </div>
177
+ </body>
178
+
179
+ </html>
voice.png ADDED