Upload 4 files
Browse files- app.js +99 -0
- audio_process.js +45 -0
- index.html +179 -0
- voice.png +0 -0
app.js
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
const demoapp = {
|
2 |
+
text: '讲个冷笑话吧,要很好笑的那种。',
|
3 |
+
recording: false,
|
4 |
+
asrWS: null,
|
5 |
+
currentText: null,
|
6 |
+
disabled: false,
|
7 |
+
elapsedTime: null,
|
8 |
+
logs: [{ idx: 0, text: 'Happily here at ruzhila.cn.' }],
|
9 |
+
async init() {
|
10 |
+
},
|
11 |
+
async dotts() {
|
12 |
+
let audioContext = new AudioContext({ sampleRate: 16000 })
|
13 |
+
await audioContext.audioWorklet.addModule('./audio_process.js')
|
14 |
+
|
15 |
+
const ws = new WebSocket('/tts');
|
16 |
+
ws.onopen = () => {
|
17 |
+
ws.send(this.text);
|
18 |
+
};
|
19 |
+
const playNode = new AudioWorkletNode(audioContext, 'play-audio-processor');
|
20 |
+
playNode.connect(audioContext.destination);
|
21 |
+
|
22 |
+
this.disabled = true;
|
23 |
+
ws.onmessage = async (e) => {
|
24 |
+
if (e.data instanceof Blob) {
|
25 |
+
e.data.arrayBuffer().then((arrayBuffer) => {
|
26 |
+
const int16Array = new Int16Array(arrayBuffer);
|
27 |
+
let float32Array = new Float32Array(int16Array.length);
|
28 |
+
for (let i = 0; i < int16Array.length; i++) {
|
29 |
+
float32Array[i] = int16Array[i] / 32768.;
|
30 |
+
}
|
31 |
+
playNode.port.postMessage({ message: 'audioData', audioData: float32Array });
|
32 |
+
});
|
33 |
+
} else {
|
34 |
+
this.elapsedTime = JSON.parse(e.data)?.elapsed;
|
35 |
+
this.disabled = false;
|
36 |
+
}
|
37 |
+
}
|
38 |
+
},
|
39 |
+
|
40 |
+
async stopasr() {
|
41 |
+
if (!this.asrWS) {
|
42 |
+
return;
|
43 |
+
}
|
44 |
+
this.asrWS.close();
|
45 |
+
this.asrWS = null;
|
46 |
+
this.recording = false;
|
47 |
+
if (this.currentText) {
|
48 |
+
this.logs.push({ idx: this.logs.length + 1, text: this.currentText });
|
49 |
+
}
|
50 |
+
this.currentText = null;
|
51 |
+
|
52 |
+
},
|
53 |
+
|
54 |
+
async doasr() {
|
55 |
+
const audioConstraints = {
|
56 |
+
video: false,
|
57 |
+
audio: true,
|
58 |
+
};
|
59 |
+
|
60 |
+
const mediaStream = await navigator.mediaDevices.getUserMedia(audioConstraints);
|
61 |
+
|
62 |
+
const ws = new WebSocket('/asr');
|
63 |
+
let currentMessage = '';
|
64 |
+
|
65 |
+
ws.onopen = () => {
|
66 |
+
this.logs = [];
|
67 |
+
};
|
68 |
+
|
69 |
+
ws.onmessage = (e) => {
|
70 |
+
const data = JSON.parse(e.data);
|
71 |
+
const { text, finished, idx } = data;
|
72 |
+
|
73 |
+
currentMessage = text;
|
74 |
+
this.currentText = text
|
75 |
+
|
76 |
+
if (finished) {
|
77 |
+
this.logs.push({ text: currentMessage, idx: idx });
|
78 |
+
currentMessage = '';
|
79 |
+
this.currentText = null
|
80 |
+
}
|
81 |
+
};
|
82 |
+
|
83 |
+
let audioContext = new AudioContext({ sampleRate: 16000 })
|
84 |
+
await audioContext.audioWorklet.addModule('./audio_process.js')
|
85 |
+
|
86 |
+
const recordNode = new AudioWorkletNode(audioContext, 'record-audio-processor');
|
87 |
+
recordNode.connect(audioContext.destination);
|
88 |
+
recordNode.port.onmessage = (event) => {
|
89 |
+
if (ws && ws.readyState === WebSocket.OPEN) {
|
90 |
+
const int16Array = event.data.data;
|
91 |
+
ws.send(int16Array.buffer);
|
92 |
+
}
|
93 |
+
}
|
94 |
+
const source = audioContext.createMediaStreamSource(mediaStream);
|
95 |
+
source.connect(recordNode);
|
96 |
+
this.asrWS = ws;
|
97 |
+
this.recording = true;
|
98 |
+
}
|
99 |
+
}
|
audio_process.js
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class PlayerAudioProcessor extends AudioWorkletProcessor {
|
2 |
+
constructor() {
|
3 |
+
super();
|
4 |
+
this.buffer = new Float32Array();
|
5 |
+
this.port.onmessage = (event) => {
|
6 |
+
let newFetchedData = new Float32Array(this.buffer.length + event.data.audioData.length);
|
7 |
+
newFetchedData.set(this.buffer, 0);
|
8 |
+
newFetchedData.set(event.data.audioData, this.buffer.length);
|
9 |
+
this.buffer = newFetchedData;
|
10 |
+
};
|
11 |
+
}
|
12 |
+
|
13 |
+
process(inputs, outputs, parameters) {
|
14 |
+
const output = outputs[0];
|
15 |
+
const channel = output[0];
|
16 |
+
const bufferLength = this.buffer.length;
|
17 |
+
for (let i = 0; i < channel.length; i++) {
|
18 |
+
channel[i] = (i < bufferLength) ? this.buffer[i] : 0;
|
19 |
+
}
|
20 |
+
this.buffer = this.buffer.slice(channel.length);
|
21 |
+
return true;
|
22 |
+
}
|
23 |
+
}
|
24 |
+
|
25 |
+
class RecordAudioProcessor extends AudioWorkletProcessor {
|
26 |
+
constructor() {
|
27 |
+
super();
|
28 |
+
}
|
29 |
+
|
30 |
+
process(inputs, outputs, parameters) {
|
31 |
+
const channel = inputs[0][0];
|
32 |
+
if (!channel || channel.length === 0) {
|
33 |
+
return true;
|
34 |
+
}
|
35 |
+
const int16Array = new Int16Array(channel.length);
|
36 |
+
for (let i = 0; i < channel.length; i++) {
|
37 |
+
int16Array[i] = channel[i] * 32767;
|
38 |
+
}
|
39 |
+
this.port.postMessage({ data: int16Array });
|
40 |
+
return true
|
41 |
+
}
|
42 |
+
}
|
43 |
+
|
44 |
+
registerProcessor('play-audio-processor', PlayerAudioProcessor);
|
45 |
+
registerProcessor('record-audio-processor', RecordAudioProcessor);
|
index.html
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
|
4 |
+
<head>
|
5 |
+
<meta charset="UTF-8">
|
6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
7 |
+
<link rel="icon" type="image/svg+xml" href="./voice.png" />
|
8 |
+
<script src="//cdn.tailwindcss.com?plugins=forms"></script>
|
9 |
+
<link href="https://cdn.jsdelivr.net/npm/tailwindcss@latest/dist/tailwind.min.css" rel="stylesheet">
|
10 |
+
<script src="//cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js" defer></script>
|
11 |
+
<script src="./app.js"></script>
|
12 |
+
<title>voiceapi demo </title>
|
13 |
+
<style>
|
14 |
+
* {
|
15 |
+
margin: 0;
|
16 |
+
padding: 0;
|
17 |
+
}
|
18 |
+
</style>
|
19 |
+
|
20 |
+
<style type="text/tailwindcss">
|
21 |
+
.label { @apply text-gray-900 w-[50px] lg:w-20 }
|
22 |
+
.title{
|
23 |
+
@apply text-[16px] text-zinc-500 mx-2;
|
24 |
+
}
|
25 |
+
|
26 |
+
.select { @apply w-full rounded-md h-10 }
|
27 |
+
|
28 |
+
.round { @apply rounded border px-3 p-2 border-slate-300 placeholder-gray-400 placeholder:text-sm
|
29 |
+
focus:bg-white focus:text-gray-900 focus:placeholder-gray-500 focus:outline-none
|
30 |
+
focus:border-zinc-950 focus:border ring-0 focus:ring-0 text-gray-900 }
|
31 |
+
|
32 |
+
.checkbox { @apply ml-2 lg:ml-4 border focus:outline-none ring-0 focus:ring-gray-800 text-gray-900 }
|
33 |
+
.dash{ @apply border border-dashed border-zinc-200 flex flex-grow }
|
34 |
+
|
35 |
+
.button { @apply hover:bg-opacity-90 text-white font-bold py-1.5 px-6 rounded-full cursor-pointer }
|
36 |
+
.card { @apply bg-white shadow-sm rounded-xl border p-4 }
|
37 |
+
|
38 |
+
|
39 |
+
.animate-ping {
|
40 |
+
animation: ping 2s cubic-bezier(0.5, 0.4, 0.2, 1) infinite;
|
41 |
+
}
|
42 |
+
|
43 |
+
@keyframes ping {
|
44 |
+
0% {
|
45 |
+
transform: scale(1);
|
46 |
+
opacity: 1;
|
47 |
+
}
|
48 |
+
50% {
|
49 |
+
transform: scale(1.2);
|
50 |
+
opacity: 0.7;
|
51 |
+
}
|
52 |
+
100% {
|
53 |
+
transform: scale(1);
|
54 |
+
opacity: 1;
|
55 |
+
}
|
56 |
+
}
|
57 |
+
</style>
|
58 |
+
</head>
|
59 |
+
|
60 |
+
<body>
|
61 |
+
<script>
|
62 |
+
async function initAudioWorklet() {
|
63 |
+
try {
|
64 |
+
// Check for browser support
|
65 |
+
if (!('AudioContext' in window) || !('audioWorklet' in AudioContext.prototype)) {
|
66 |
+
console.error('Audio Worklet API is not supported in this browser.');
|
67 |
+
return;
|
68 |
+
}
|
69 |
+
|
70 |
+
// Initialize AudioContext
|
71 |
+
const audioContext = new AudioContext();
|
72 |
+
|
73 |
+
// Add Audio Worklet module
|
74 |
+
await audioContext.audioWorklet.addModule('./audio_process.js');
|
75 |
+
|
76 |
+
console.log('Audio Worklet module added successfully.');
|
77 |
+
// Your code to use the Audio Worklet goes here
|
78 |
+
|
79 |
+
} catch (error) {
|
80 |
+
console.error('Error initializing Audio Worklet:', error);
|
81 |
+
}
|
82 |
+
}
|
83 |
+
|
84 |
+
// Initialize Audio Worklet when the page is loaded
|
85 |
+
window.addEventListener('load', initAudioWorklet);
|
86 |
+
</script>
|
87 |
+
<div x-data="demoapp">
|
88 |
+
<header class="bg-gray-900 py-4 px-5 lg:p-4 lg:px-10 text-white sticky top-0 z-20">
|
89 |
+
<div class="flex w-full justify-between items-center">
|
90 |
+
<p class="gap-x-3">
|
91 |
+
<span>VoiceAPI Demo</span> /
|
92 |
+
<a href="https://ruzhila.cn/?from=voiceapi_demo">ruzhila.cn</a>
|
93 |
+
</p>
|
94 |
+
<a target="_blank" href="https://github.com/ruzhila/voiceapi" class="hover:cursor-pointer">
|
95 |
+
<svg t="1724996252746" class="icon" viewBox="0 0 1024 1024" version="1.1"
|
96 |
+
xmlns="http://www.w3.org/2000/svg" p-id="" width="25" height="25">
|
97 |
+
<path
|
98 |
+
d="M512 12.64c-282.752 0-512 229.216-512 512 0 226.208 146.72 418.144 350.144 485.824 25.6 4.736 35.008-11.104 35.008-24.64 0-12.192-0.48-52.544-0.704-95.328-142.464 30.976-172.512-60.416-172.512-60.416-23.296-59.168-56.832-74.912-56.832-74.912-46.464-31.776 3.52-31.136 3.52-31.136 51.392 3.616 78.464 52.768 78.464 52.768 45.664 78.272 119.776 55.648 148.992 42.56 4.576-33.088 17.856-55.68 32.512-68.48-113.728-12.928-233.28-56.864-233.28-253.024 0-55.904 20-101.568 52.768-137.44-5.312-12.896-22.848-64.96 4.96-135.488 0 0 43.008-13.76 140.832 52.48 40.832-11.36 84.64-17.024 128.16-17.248 43.488 0.192 87.328 5.888 128.256 17.248 97.728-66.24 140.64-52.48 140.64-52.48 27.872 70.528 10.336 122.592 5.024 135.488 32.832 35.84 52.704 81.536 52.704 137.44 0 196.64-119.776 239.936-233.792 252.64 18.368 15.904 34.72 47.04 34.72 94.816 0 68.512-0.608 123.648-0.608 140.512 0 13.632 9.216 29.6 35.168 24.576 203.328-67.776 349.856-259.616 349.856-485.76 0-282.784-229.248-512-512-512z"
|
99 |
+
fill="#ffffff"></path>
|
100 |
+
</svg>
|
101 |
+
</a>
|
102 |
+
</div>
|
103 |
+
</header>
|
104 |
+
|
105 |
+
<div class="flex px-6 gap-x-10 w-full max-w-7xl mx-auto">
|
106 |
+
<div class="relative flex flex-col items-center w-1/3 py-10">
|
107 |
+
<div class="w-full">
|
108 |
+
<textarea x-model="text" class="round p-4 w-full h-[36rem] text-sm"
|
109 |
+
placeholder="Enter text here"></textarea>
|
110 |
+
</div>
|
111 |
+
|
112 |
+
<div>
|
113 |
+
<button @click="dotts" :disabled="disabled"
|
114 |
+
class="button bg-gray-900 flex items-center gap-x-2 mt-6">
|
115 |
+
<span>Speak</span>
|
116 |
+
<svg t="1726215464577" class="icon" viewBox="0 0 1024 1024" version="1.1"
|
117 |
+
xmlns="http://www.w3.org/2000/svg" p-id="4263" width="20" height="20">
|
118 |
+
<path
|
119 |
+
d="M830.450526 853.759999q-11.722105 8.791579-27.351579 8.791579-19.536842 0-33.701053-14.164211t-14.164211-33.701053q0-21.490526
|
120 |
+
16.606316-36.143158 0.976842-0.976842 1.953684-1.465263t1.953684-1.465263l0.976842-0.976842q27.351579-18.56 50.795789-43.957895t41.027368-55.191579 27.351579-63.494737 9.768421-69.84421q0-73.263158-37.12-133.827368t-92.8-99.637895q-20.513684-14.652632-20.513684-39.073684 0-19.536842 14.164211-33.701053t33.701053-14.164211q16.606316 0 29.305263 10.745263 36.143158 25.397895 67.402105 59.098947t53.726316 73.263158 35.166316 84.496842 12.698947 92.8q0 48.842105-12.698947 93.776842t-35.654737 84.985263-54.214737 73.751579-68.378947 59.098947zM775.747368 415.157894q20.513684 28.328421 32.72421 57.145263t12.210526 69.84421q0 39.073684-12.698947 70.332632t-32.235789 56.656842q-7.814737 10.745263-16.606316 19.048421t-22.467368 8.303158q-17.583158 0-29.793684-12.698947t-12.210526-30.282105q0-7.814737 2.930526-15.629474l-0.976842 0q4.884211-10.745263 11.722105-20.513684t13.187368-20.025263 10.745263-23.444211 4.395789-31.747368q0-17.583158-4.395789-30.770526t-10.745263-23.932632-13.187368-20.513684-10.745263-20.513684q-2.930526-6.837895-2.930526-15.629474 0-17.583158 12.210526-30.282105t29.793684-12.698947q13.675789 0 22.467368 8.303158t16.606316 19.048421zM460.227368 995.402104q-49.818947-44.934737-105.498947-93.776842t-103.545263-89.869474q-55.68-46.888421-111.36-92.8-10.745263 0.976842-21.490526 0.976842-8.791579 0.976842-18.56 0.976842l-16.606316 0q-26.374737 0-42.981053-16.117895t-16.606316-38.585263l0-246.16421 0.976842 0-0.976842-0.976842q0-27.351579 17.094737-44.934737t42.492632-17.583158l55.68 0q89.869474-76.193684 163.132631-136.757895 31.258947-26.374737 61.541053-51.28421t54.703158-45.423158 41.027368-34.189474 20.513684-16.606316q29.305263-21.490526 47.376842-19.536842t28.328421 17.583158 14.164211 38.096842 3.907368 41.027368l0 788.311578 0 2.930526q0 18.56-6.837895 39.562105t-21.002105 33.212632-35.654737 10.256842-49.818947-28.328421z"
|
121 |
+
p-id="4264" fill="#ffffff"></path>
|
122 |
+
</svg>
|
123 |
+
</button>
|
124 |
+
</div>
|
125 |
+
<template x-if="elapsedTime">
|
126 |
+
<p x-text="`elapsedTime: ${elapsedTime}`" class="mt-4 text-sm text-gray-600 "></p>
|
127 |
+
</template>
|
128 |
+
</div>
|
129 |
+
|
130 |
+
<!-- recording -->
|
131 |
+
<div class="w-full flex-grow h-[calc(100vh-10rem)] xl:pl-10 py-10">
|
132 |
+
|
133 |
+
<div
|
134 |
+
class="rounded border border-gray-500 p-3 w-full flex flex-col items-end h-[36rem] overflow-y-auto">
|
135 |
+
<template x-for="item in logs">
|
136 |
+
<div class="mt-3 mb-2">
|
137 |
+
<span
|
138 |
+
class="text-white px-4 py-1.5 text-[13px] display-inline-block border border-gray-900 rounded-t-full rounded-l-full bg-gray-900 justify-end w-auto"
|
139 |
+
x-text="item?.text">
|
140 |
+
</span>
|
141 |
+
</div>
|
142 |
+
</template>
|
143 |
+
</div>
|
144 |
+
|
145 |
+
|
146 |
+
<template x-if="currentText">
|
147 |
+
<p x-text="`${currentText} …`" class="text-gray-800 mt-4 text-sm text-center"></p>
|
148 |
+
</template>
|
149 |
+
|
150 |
+
<template x-if="!recording">
|
151 |
+
<div class="flex flex-col gap-y-4 items-center justify-center mt-4">
|
152 |
+
<p @click="doasr"
|
153 |
+
class="mt-2 border border-gray-100 rounded-full duration-300 hover:scale-105 hover:border-gray-400">
|
154 |
+
<img src="./images/record.svg" alt="" class="w-14 h-14 mx-auto cursor-pointer">
|
155 |
+
</p>
|
156 |
+
<p class="text-gray-600">Click to record !</p>
|
157 |
+
</div>
|
158 |
+
</template>
|
159 |
+
|
160 |
+
<template x-if="recording">
|
161 |
+
<div class="flex flex-col items-center justify-center gap-y-4 mt-4">
|
162 |
+
|
163 |
+
<p @click="stopasr"
|
164 |
+
class="mt-2 border border-red-100 rounded-full duration-300 hover:scale-105 hover:border-red-400">
|
165 |
+
<img src="./images/speaking.svg" alt=""
|
166 |
+
class="w-14 h-14 mx-auto cursor-pointer animate-ping">
|
167 |
+
</p>
|
168 |
+
<div class="flex items-center text-gray-600 gap-x-4">
|
169 |
+
<p>Click to stop recording !</p>
|
170 |
+
</div>
|
171 |
+
</div>
|
172 |
+
</template>
|
173 |
+
</div>
|
174 |
+
</div>
|
175 |
+
</div>
|
176 |
+
</div>
|
177 |
+
</body>
|
178 |
+
|
179 |
+
</html>
|
voice.png
ADDED
![]() |