mistpe commited on
Commit
1c5d7a2
·
verified ·
1 Parent(s): 3f21651

Update main.ts

Browse files
Files changed (1) hide show
  1. main.ts +1120 -78
main.ts CHANGED
@@ -1,21 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import { serve } from "https://deno.land/std/http/server.ts";
2
- import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1";
3
 
4
  const AUTH_TOKEN = Deno.env.get("AUTH_TOKEN");
5
  const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  async function fetchVoiceList() {
8
- const response = await fetch(VOICES_URL);
9
- const voices = await response.json();
10
- return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => {
11
- const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice;
12
- if (!acc[locale]) acc[locale] = [];
13
- acc[locale].push({ model, name, friendlyName, locale });
14
- return acc;
15
- }, {});
 
 
 
 
 
16
  }
17
 
18
- async function synthesizeSpeech(model: string, voice: string, text: string) {
19
  let voiceName;
20
  let rate = 0;
21
  let pitch = 0;
@@ -23,119 +222,962 @@ async function synthesizeSpeech(model: string, voice: string, text: string) {
23
  if (model.includes("tts")) {
24
  rate = 0.1;
25
  pitch = 0.2;
26
-
27
- switch (voice) {
28
- case "alloy":
29
- voiceName = "zh-CN-YunjianNeural";
30
- break;
31
- case "echo":
32
- voiceName = "zh-CN-YunyangNeural";
33
- break;
34
- case "fable":
35
- voiceName = "zh-CN-XiaoxiaoNeural";
36
- break;
37
- case "onyx":
38
- voiceName = "zh-TW-HsiaoChenNeural";
39
- break;
40
- default:
41
- voiceName = "zh-CN-YunxiNeural";
42
- break;
43
- }
44
  } else {
45
  voiceName = model;
46
  const params = Object.fromEntries(
47
- voice.split("|").map((p) => p.split(":") as [string, string])
48
  );
49
  rate = Number(params["rate"] || 0);
50
  pitch = Number(params["pitch"] || 0);
51
  }
52
 
53
- const tts = new EdgeSpeechTTS();
54
 
55
- const payload = {
56
- input: text,
57
- options: {
58
- rate: rate,
59
- pitch: pitch,
60
- voice: voiceName
61
- },
62
- };
63
- const response = await tts.create(payload);
64
- const mp3Buffer = new Uint8Array(await response.arrayBuffer());
65
-
66
- console.log(`Successfully synthesized speech, returning audio/mpeg response`);
67
- return new Response(mp3Buffer, {
68
- headers: { "Content-Type": "audio/mpeg" },
69
- });
 
 
 
 
70
  }
71
 
72
- function unauthorized(req: Request) {
73
  const authHeader = req.headers.get("Authorization");
74
  return AUTH_TOKEN && authHeader !== `Bearer ${AUTH_TOKEN}`;
75
  }
76
 
77
- function validateContentType(req: Request, expected: string) {
78
- const contentType = req.headers.get("Content-Type");
79
- if (contentType !== expected) {
80
- console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
81
- return new Response("Bad Request", { status: 400 });
82
- }
83
- }
84
-
85
- async function handleDebugRequest(req: Request) {
86
  const url = new URL(req.url);
87
  const voice = url.searchParams.get("voice") || "";
88
  const model = url.searchParams.get("model") || "";
89
  const text = url.searchParams.get("text") || "";
90
 
91
- console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`);
92
-
93
  if (!voice || !model || !text) {
94
- console.log("Missing required parameters");
95
  return new Response("Bad Request", { status: 400 });
96
  }
97
 
98
  return synthesizeSpeech(model, voice, text);
99
  }
100
 
101
- async function handleSynthesisRequest(req: Request) {
102
  if (unauthorized(req)) {
103
- console.log("Unauthorized request");
104
  return new Response("Unauthorized", { status: 401 });
105
  }
106
 
107
  if (req.method !== "POST") {
108
- console.log(`Invalid method ${req.method}, expected POST`);
109
  return new Response("Method Not Allowed", { status: 405 });
110
  }
111
 
112
- const invalidContentType = validateContentType(req, "application/json");
113
- if (invalidContentType) return invalidContentType;
114
-
115
- const { model, input, voice } = await req.json();
116
- console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);
117
 
118
- return synthesizeSpeech(model, voice, input);
 
 
 
 
 
119
  }
120
 
 
 
121
 
122
- async function handleDemoRequest(req: Request) {
123
- const groupedVoiceList = await fetchVoiceList();
124
-
125
- const html = `<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>语音合成演示</title><link href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@400;700&display=swap" rel="stylesheet"><style>:root{--primary-color:#6c8bd6;--primary-light:#a2b3e3;--primary-dark:#3d5b8f;--secondary-color:#f08080;--text-color:#333;--text-secondary:#777;--bg-color:#fff}body{font-family:'Noto Sans SC','Arial',sans-serif;color:var(--text-color);margin:0;padding:0;display:flex;justify-content:center;background-color:#fafafa;background-image:linear-gradient(135deg,#f5f7fa 0%,#c3cfe2 100%);position:relative;overflow:hidden}body::before{content:"";position:absolute;top:0;left:0;right:0;bottom:0;background:repeating-radial-gradient(circle at 50% 50%,rgba(255,255,255,0.8) 0%,rgba(255,255,255,0.8) 2%,transparent 2%,transparent 4%,rgba(255,255,255,0.8) 4%,rgba(255,255,255,0.8) 6%,transparent 6%,transparent 8%,rgba(255,255,255,0.8) 8%,rgba(255,255,255,0.8) 10%,transparent 10%),repeating-linear-gradient(45deg,#D4F4FF 0%,#D4F4FF 5%,#E6F9FF 5%,#E6F9FF 10%,#F0FAFF 10%,#F0FAFF 15%,#E6F9FF 15%,#E6F9FF 20%,#D4F4FF 20%,#D4F4FF 25%);background-blend-mode:multiply;opacity:0.8;z-index:-1;animation:glitch 15s infinite}.container{display:flex;max-width:1200px;width:100%;margin:40px;background:#fff;border-radius:12px;position:relative;background-color:rgba(255,255,255,0.8);z-index:1}@keyframes glitch{0%{background-position:0 0,0 0;filter:hue-rotate(0deg)}50%{background-position:10px 10px,-10px 10px;filter:hue-rotate(360deg)}100%{background-position:0 0,0 0;filter:hue-rotate(0deg)}}.input-area,.output-area{padding:30px;width:50%}.input-area{border-right:1px solid #E0E0E0}h1{font-size:36px;color:var(--primary-color);margin-bottom:30px}.filter-section{margin-bottom:30px}.filter-section label{display:block;font-size:16px;color:var(--text-secondary);margin-bottom:10px}.filter-section input{font-size:16px;padding:10px 15px;border:2px solid var(--primary-light);border-radius:8px;outline:none;transition:border-color .3s,box-shadow .3s;width:100%;box-sizing:border-box}.filter-section input:focus{border-color:var(--primary-color);box-shadow:0 0 0 2px var(--primary-light)}.slider-container{margin-bottom:30px}.slider-container label{display:block;font-size:16px;color:var(--text-secondary);margin-bottom:10px}.slider{-webkit-appearance:none;width:100%;height:10px;border-radius:5px;background:linear-gradient(to right,var(--secondary-color) 0%,var(--primary-color) 50%,var(--primary-light) 100%);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px rgba(255,255,255,0.1);outline:none;opacity:0.7;-webkit-transition:.2s;transition:opacity .2s;margin-bottom:10px}.slider:hover{opacity:1}.slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none;width:20px;height:20px;border-radius:50%;background:#fff;border:2px solid var(--primary-color);cursor:pointer}.slider::-moz-range-thumb{width:20px;height:20px;border-radius:50%;background:#fff;border:2px solid var(--primary-color);cursor:pointer}.slider-value{font-size:14px;color:var(--text-secondary)}.textarea-container{margin-bottom:30px}.textarea-container label{display:block;font-size:18px;margin-bottom:10px}.textarea-container textarea{width:100%;padding:10px;font-size:16px;border:2px solid var(--primary-light);border-radius:8px;outline:none;resize:vertical;transition:border-color .3s,box-shadow .3s;box-sizing:border-box;height:200px}.textarea-container textarea:focus{border-color:var(--primary-color);box-shadow:0 0 0 2px var(--primary-light)}.voice-group{margin-bottom:20px;border:2px solid var(--primary-light);border-radius:12px;overflow:hidden;cursor:move;background:#fff}.voice-header{padding:15px 20px;font-size:18px;background:var(--primary-light);color:#fff;cursor:pointer;display:flex;justify-content:space-between;align-items:center}.voice-header:hover{background:var(--primary-color)}.voice-buttons{padding:20px;display:none;gap:12px;flex-wrap:wrap}.voice-button{background:var(--secondary-color);color:#fff;border:none;padding:10px 20px;border-radius:50px;cursor:pointer;transition:filter .3s}.voice-button:hover{filter:brightness(0.9)}.chevron{transition:transform .3s}.voice-group.open .voice-buttons{display:flex}.voice-group.open .chevron{transform:rotate(180deg)}.dragging{opacity:0.5}</style></head><body><div class="container"><div class="input-area"><h1>输��文本</h1><div class="filter-section"><label for="keywords">Speaker筛选:</label><input type="text" id="keywords" value="multilingual,-TW,-CN"></div><div class="slider-container"><label for="rate">语速:</label><input type="range" min="-1" max="1" step="0.1" value="-0.1" class="slider" id="rate"><div class="slider-value" id="rateValue">-0.1</div><label for="pitch">音调:</label><input type="range" min="-1" max="1" step="0.1" value="0.1" class="slider" id="pitch"><div class="slider-value" id="pitchValue">0.1</div></div><div class="textarea-container"><label for="inputText">输入文本:</label><textarea id="inputText">Hello world</textarea></div></div><div class="output-area"><h1>选择语音</h1><div id="voices"></div></div></div><script>const voiceList = ${JSON.stringify(groupedVoiceList)};let audio=null;function filterVoices(){const keywords=document.getElementById('keywords').value.split(',').map(k=>k.trim().toLowerCase());const voicesDiv=document.getElementById('voices');voicesDiv.innerHTML='';const filteredVoices={};for(const[locale,voices]of Object.entries(voiceList)){const filtered=voices.filter(({name,friendlyName})=>keywords.some(keyword=>name.toLowerCase().includes(keyword)||friendlyName.toLowerCase().includes(keyword)));if(filtered.length>0){filteredVoices[locale]=filtered}}for(const[locale,voices]of Object.entries(filteredVoices)){const group=document.createElement('div');group.className='voice-group';group.draggable=true;const header=document.createElement('div');header.className='voice-header';header.textContent=locale.toUpperCase();const chevron=document.createElement('span');chevron.className='chevron';chevron.innerHTML='&#9660;';header.appendChild(chevron);const buttonsContainer=document.createElement('div');buttonsContainer.className='voice-buttons';voices.forEach(({model,name})=>{const button=document.createElement('button');button.className='voice-button';button.textContent=name;button.onclick=()=>synthesize(model);buttonsContainer.appendChild(button)});header.onclick=()=>{group.classList.toggle('open')};group.appendChild(header);group.appendChild(buttonsContainer);voicesDiv.appendChild(group)}addDragDropListeners()}function synthesize(model){const text=document.getElementById('inputText').value||'Hello world';const rate=document.getElementById('rate').value||'-0.1';const pitch=document.getElementById('pitch').value||'0.1';const voice=\`rate:\${rate}|pitch:\${pitch}\`;if(audio){audio.pause();audio.currentTime=0}fetch('/v1/audio/speech',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({model,input:text,voice})}).then(response=>response.blob()).then(blob=>{const audioUrl=URL.createObjectURL(blob);audio=new Audio(audioUrl);audio.play()})}function addDragDropListeners(){const voicesDiv=document.getElementById('voices');let draggedItem=null;voicesDiv.addEventListener('dragstart',e=>{draggedItem=e.target;e.target.classList.add('dragging')});voicesDiv.addEventListener('dragend',e=>{e.target.classList.remove('dragging');draggedItem=null});voicesDiv.addEventListener('dragover',e=>{e.preventDefault();const afterElement=getDragAfterElement(voicesDiv,e.clientY);if(afterElement==null){voicesDiv.appendChild(draggedItem)}else{voicesDiv.insertBefore(draggedItem,afterElement)}})}function getDragAfterElement(container,y){const draggableElements=[...container.querySelectorAll('.voice-group:not(.dragging)')];return draggableElements.reduce((closest,child)=>{const box=child.getBoundingClientRect();const offset=y-box.top-box.height/2;if(offset<0&&offset>closest.offset){return{offset:offset,element:child}}else{return closest}},{offset:Number.NEGATIVE_INFINITY}).element}filterVoices();document.getElementById('keywords').addEventListener('input',filterVoices);const rateSlider=document.getElementById('rate');const rateValue=document.getElementById('rateValue');rateSlider.oninput=function(){rateValue.innerHTML=this.value};const pitchSlider=document.getElementById('pitch');const pitchValue=document.getElementById('pitchValue');pitchSlider.oninput=function(){pitchValue.innerHTML=this.value}</script></body></html>`;
126
-
127
- return new Response(html, {
128
- headers: { "Content-Type": "text/html" },
129
- });
130
- }
131
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  serve(async (req) => {
134
  try {
135
  const url = new URL(req.url);
136
 
137
  if (url.pathname === "/") {
138
- return handleDemoRequest(req);
139
  }
140
 
141
  if (url.pathname === "/tts") {
 
1
+ // import { serve } from "https://deno.land/std/http/server.ts";
2
+ // import { EdgeSpeechTTS } from "https://esm.sh/@lobehub/tts@1";
3
+
4
+ // const AUTH_TOKEN = Deno.env.get("AUTH_TOKEN");
5
+ // const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";
6
+
7
+ // async function fetchVoiceList() {
8
+ // const response = await fetch(VOICES_URL);
9
+ // const voices = await response.json();
10
+ // return voices.reduce((acc: Record<string, { model: string, name: string, friendlyName: string, locale: string }[]>, voice: any) => {
11
+ // const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice;
12
+ // if (!acc[locale]) acc[locale] = [];
13
+ // acc[locale].push({ model, name, friendlyName, locale });
14
+ // return acc;
15
+ // }, {});
16
+ // }
17
+
18
+ // async function synthesizeSpeech(model: string, voice: string, text: string) {
19
+ // let voiceName;
20
+ // let rate = 0;
21
+ // let pitch = 0;
22
+
23
+ // if (model.includes("tts")) {
24
+ // rate = 0.1;
25
+ // pitch = 0.2;
26
+
27
+ // switch (voice) {
28
+ // case "alloy":
29
+ // voiceName = "zh-CN-YunjianNeural";
30
+ // break;
31
+ // case "echo":
32
+ // voiceName = "zh-CN-YunyangNeural";
33
+ // break;
34
+ // case "fable":
35
+ // voiceName = "zh-CN-XiaoxiaoNeural";
36
+ // break;
37
+ // case "onyx":
38
+ // voiceName = "zh-TW-HsiaoChenNeural";
39
+ // break;
40
+ // default:
41
+ // voiceName = "zh-CN-YunxiNeural";
42
+ // break;
43
+ // }
44
+ // } else {
45
+ // voiceName = model;
46
+ // const params = Object.fromEntries(
47
+ // voice.split("|").map((p) => p.split(":") as [string, string])
48
+ // );
49
+ // rate = Number(params["rate"] || 0);
50
+ // pitch = Number(params["pitch"] || 0);
51
+ // }
52
+
53
+ // const tts = new EdgeSpeechTTS();
54
+
55
+ // const payload = {
56
+ // input: text,
57
+ // options: {
58
+ // rate: rate,
59
+ // pitch: pitch,
60
+ // voice: voiceName
61
+ // },
62
+ // };
63
+ // const response = await tts.create(payload);
64
+ // const mp3Buffer = new Uint8Array(await response.arrayBuffer());
65
+
66
+ // console.log(`Successfully synthesized speech, returning audio/mpeg response`);
67
+ // return new Response(mp3Buffer, {
68
+ // headers: { "Content-Type": "audio/mpeg" },
69
+ // });
70
+ // }
71
+
72
+ // function unauthorized(req: Request) {
73
+ // const authHeader = req.headers.get("Authorization");
74
+ // return AUTH_TOKEN && authHeader !== `Bearer ${AUTH_TOKEN}`;
75
+ // }
76
+
77
+ // function validateContentType(req: Request, expected: string) {
78
+ // const contentType = req.headers.get("Content-Type");
79
+ // if (contentType !== expected) {
80
+ // console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
81
+ // return new Response("Bad Request", { status: 400 });
82
+ // }
83
+ // }
84
+
85
+ // async function handleDebugRequest(req: Request) {
86
+ // const url = new URL(req.url);
87
+ // const voice = url.searchParams.get("voice") || "";
88
+ // const model = url.searchParams.get("model") || "";
89
+ // const text = url.searchParams.get("text") || "";
90
+
91
+ // console.log(`Debug request with model=${model}, voice=${voice}, text=${text}`);
92
+
93
+ // if (!voice || !model || !text) {
94
+ // console.log("Missing required parameters");
95
+ // return new Response("Bad Request", { status: 400 });
96
+ // }
97
+
98
+ // return synthesizeSpeech(model, voice, text);
99
+ // }
100
+
101
+ // async function handleSynthesisRequest(req: Request) {
102
+ // if (unauthorized(req)) {
103
+ // console.log("Unauthorized request");
104
+ // return new Response("Unauthorized", { status: 401 });
105
+ // }
106
+
107
+ // if (req.method !== "POST") {
108
+ // console.log(`Invalid method ${req.method}, expected POST`);
109
+ // return new Response("Method Not Allowed", { status: 405 });
110
+ // }
111
+
112
+ // const invalidContentType = validateContentType(req, "application/json");
113
+ // if (invalidContentType) return invalidContentType;
114
+
115
+ // const { model, input, voice } = await req.json();
116
+ // console.log(`Synthesis request with model=${model}, input=${input}, voice=${voice}`);
117
+
118
+ // return synthesizeSpeech(model, voice, input);
119
+ // }
120
+
121
+
122
+ // async function handleDemoRequest(req: Request) {
123
+ // const groupedVoiceList = await fetchVoiceList();
124
+
125
+ // const html = `<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>语音合成演示</title><link href="https://fonts.googleapis.com/css2?family=Noto+Sans+SC:wght@400;700&display=swap" rel="stylesheet"><style>:root{--primary-color:#6c8bd6;--primary-light:#a2b3e3;--primary-dark:#3d5b8f;--secondary-color:#f08080;--text-color:#333;--text-secondary:#777;--bg-color:#fff}body{font-family:'Noto Sans SC','Arial',sans-serif;color:var(--text-color);margin:0;padding:0;display:flex;justify-content:center;background-color:#fafafa;background-image:linear-gradient(135deg,#f5f7fa 0%,#c3cfe2 100%);position:relative;overflow:hidden}body::before{content:"";position:absolute;top:0;left:0;right:0;bottom:0;background:repeating-radial-gradient(circle at 50% 50%,rgba(255,255,255,0.8) 0%,rgba(255,255,255,0.8) 2%,transparent 2%,transparent 4%,rgba(255,255,255,0.8) 4%,rgba(255,255,255,0.8) 6%,transparent 6%,transparent 8%,rgba(255,255,255,0.8) 8%,rgba(255,255,255,0.8) 10%,transparent 10%),repeating-linear-gradient(45deg,#D4F4FF 0%,#D4F4FF 5%,#E6F9FF 5%,#E6F9FF 10%,#F0FAFF 10%,#F0FAFF 15%,#E6F9FF 15%,#E6F9FF 20%,#D4F4FF 20%,#D4F4FF 25%);background-blend-mode:multiply;opacity:0.8;z-index:-1;animation:glitch 15s infinite}.container{display:flex;max-width:1200px;width:100%;margin:40px;background:#fff;border-radius:12px;position:relative;background-color:rgba(255,255,255,0.8);z-index:1}@keyframes glitch{0%{background-position:0 0,0 0;filter:hue-rotate(0deg)}50%{background-position:10px 10px,-10px 10px;filter:hue-rotate(360deg)}100%{background-position:0 0,0 0;filter:hue-rotate(0deg)}}.input-area,.output-area{padding:30px;width:50%}.input-area{border-right:1px solid #E0E0E0}h1{font-size:36px;color:var(--primary-color);margin-bottom:30px}.filter-section{margin-bottom:30px}.filter-section label{display:block;font-size:16px;color:var(--text-secondary);margin-bottom:10px}.filter-section input{font-size:16px;padding:10px 15px;border:2px solid var(--primary-light);border-radius:8px;outline:none;transition:border-color .3s,box-shadow .3s;width:100%;box-sizing:border-box}.filter-section input:focus{border-color:var(--primary-color);box-shadow:0 0 0 2px var(--primary-light)}.slider-container{margin-bottom:30px}.slider-container label{display:block;font-size:16px;color:var(--text-secondary);margin-bottom:10px}.slider{-webkit-appearance:none;width:100%;height:10px;border-radius:5px;background:linear-gradient(to right,var(--secondary-color) 0%,var(--primary-color) 50%,var(--primary-light) 100%);box-shadow:inset 0 1px 2px rgba(0,0,0,0.1),0 1px rgba(255,255,255,0.1);outline:none;opacity:0.7;-webkit-transition:.2s;transition:opacity .2s;margin-bottom:10px}.slider:hover{opacity:1}.slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none;width:20px;height:20px;border-radius:50%;background:#fff;border:2px solid var(--primary-color);cursor:pointer}.slider::-moz-range-thumb{width:20px;height:20px;border-radius:50%;background:#fff;border:2px solid var(--primary-color);cursor:pointer}.slider-value{font-size:14px;color:var(--text-secondary)}.textarea-container{margin-bottom:30px}.textarea-container label{display:block;font-size:18px;margin-bottom:10px}.textarea-container textarea{width:100%;padding:10px;font-size:16px;border:2px solid var(--primary-light);border-radius:8px;outline:none;resize:vertical;transition:border-color .3s,box-shadow .3s;box-sizing:border-box;height:200px}.textarea-container textarea:focus{border-color:var(--primary-color);box-shadow:0 0 0 2px var(--primary-light)}.voice-group{margin-bottom:20px;border:2px solid var(--primary-light);border-radius:12px;overflow:hidden;cursor:move;background:#fff}.voice-header{padding:15px 20px;font-size:18px;background:var(--primary-light);color:#fff;cursor:pointer;display:flex;justify-content:space-between;align-items:center}.voice-header:hover{background:var(--primary-color)}.voice-buttons{padding:20px;display:none;gap:12px;flex-wrap:wrap}.voice-button{background:var(--secondary-color);color:#fff;border:none;padding:10px 20px;border-radius:50px;cursor:pointer;transition:filter .3s}.voice-button:hover{filter:brightness(0.9)}.chevron{transition:transform .3s}.voice-group.open .voice-buttons{display:flex}.voice-group.open .chevron{transform:rotate(180deg)}.dragging{opacity:0.5}</style></head><body><div class="container"><div class="input-area"><h1>输入文本</h1><div class="filter-section"><label for="keywords">Speaker筛选:</label><input type="text" id="keywords" value="multilingual,-TW,-CN"></div><div class="slider-container"><label for="rate">语速:</label><input type="range" min="-1" max="1" step="0.1" value="-0.1" class="slider" id="rate"><div class="slider-value" id="rateValue">-0.1</div><label for="pitch">音调:</label><input type="range" min="-1" max="1" step="0.1" value="0.1" class="slider" id="pitch"><div class="slider-value" id="pitchValue">0.1</div></div><div class="textarea-container"><label for="inputText">输入文本:</label><textarea id="inputText">Hello world</textarea></div></div><div class="output-area"><h1>选择语音</h1><div id="voices"></div></div></div><script>const voiceList = ${JSON.stringify(groupedVoiceList)};let audio=null;function filterVoices(){const keywords=document.getElementById('keywords').value.split(',').map(k=>k.trim().toLowerCase());const voicesDiv=document.getElementById('voices');voicesDiv.innerHTML='';const filteredVoices={};for(const[locale,voices]of Object.entries(voiceList)){const filtered=voices.filter(({name,friendlyName})=>keywords.some(keyword=>name.toLowerCase().includes(keyword)||friendlyName.toLowerCase().includes(keyword)));if(filtered.length>0){filteredVoices[locale]=filtered}}for(const[locale,voices]of Object.entries(filteredVoices)){const group=document.createElement('div');group.className='voice-group';group.draggable=true;const header=document.createElement('div');header.className='voice-header';header.textContent=locale.toUpperCase();const chevron=document.createElement('span');chevron.className='chevron';chevron.innerHTML='&#9660;';header.appendChild(chevron);const buttonsContainer=document.createElement('div');buttonsContainer.className='voice-buttons';voices.forEach(({model,name})=>{const button=document.createElement('button');button.className='voice-button';button.textContent=name;button.onclick=()=>synthesize(model);buttonsContainer.appendChild(button)});header.onclick=()=>{group.classList.toggle('open')};group.appendChild(header);group.appendChild(buttonsContainer);voicesDiv.appendChild(group)}addDragDropListeners()}function synthesize(model){const text=document.getElementById('inputText').value||'Hello world';const rate=document.getElementById('rate').value||'-0.1';const pitch=document.getElementById('pitch').value||'0.1';const voice=\`rate:\${rate}|pitch:\${pitch}\`;if(audio){audio.pause();audio.currentTime=0}fetch('/v1/audio/speech',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({model,input:text,voice})}).then(response=>response.blob()).then(blob=>{const audioUrl=URL.createObjectURL(blob);audio=new Audio(audioUrl);audio.play()})}function addDragDropListeners(){const voicesDiv=document.getElementById('voices');let draggedItem=null;voicesDiv.addEventListener('dragstart',e=>{draggedItem=e.target;e.target.classList.add('dragging')});voicesDiv.addEventListener('dragend',e=>{e.target.classList.remove('dragging');draggedItem=null});voicesDiv.addEventListener('dragover',e=>{e.preventDefault();const afterElement=getDragAfterElement(voicesDiv,e.clientY);if(afterElement==null){voicesDiv.appendChild(draggedItem)}else{voicesDiv.insertBefore(draggedItem,afterElement)}})}function getDragAfterElement(container,y){const draggableElements=[...container.querySelectorAll('.voice-group:not(.dragging)')];return draggableElements.reduce((closest,child)=>{const box=child.getBoundingClientRect();const offset=y-box.top-box.height/2;if(offset<0&&offset>closest.offset){return{offset:offset,element:child}}else{return closest}},{offset:Number.NEGATIVE_INFINITY}).element}filterVoices();document.getElementById('keywords').addEventListener('input',filterVoices);const rateSlider=document.getElementById('rate');const rateValue=document.getElementById('rateValue');rateSlider.oninput=function(){rateValue.innerHTML=this.value};const pitchSlider=document.getElementById('pitch');const pitchValue=document.getElementById('pitchValue');pitchSlider.oninput=function(){pitchValue.innerHTML=this.value}</script></body></html>`;
126
+
127
+ // return new Response(html, {
128
+ // headers: { "Content-Type": "text/html" },
129
+ // });
130
+ // }
131
+
132
+
133
+ // serve(async (req) => {
134
+ // try {
135
+ // const url = new URL(req.url);
136
+
137
+ // if (url.pathname === "/") {
138
+ // return handleDemoRequest(req);
139
+ // }
140
+
141
+ // if (url.pathname === "/tts") {
142
+ // return handleDebugRequest(req);
143
+ // }
144
+
145
+ // if (url.pathname !== "/v1/audio/speech") {
146
+ // console.log(`Unhandled path ${url.pathname}`);
147
+ // return new Response("Not Found", { status: 404 });
148
+ // }
149
+
150
+ // return handleSynthesisRequest(req);
151
+ // } catch (err) {
152
+ // console.error(`Error processing request: ${err.message}`);
153
+ // return new Response(`Internal Server Error\n${err.message}`, {
154
+ // status: 500,
155
+ // });
156
+ // }
157
+ // });
158
  import { serve } from "https://deno.land/std/http/server.ts";
 
159
 
160
  const AUTH_TOKEN = Deno.env.get("AUTH_TOKEN");
161
  const VOICES_URL = "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4";
162
 
163
+ // 简化的Edge TTS实现
164
+ class SimpleEdgeTTS {
165
+ async create(options) {
166
+ const { input, options: voiceOptions } = options;
167
+ const { voice, rate = 0, pitch = 0 } = voiceOptions;
168
+
169
+ const ssml = this.generateSSML(input, voice, rate, pitch);
170
+
171
+ const response = await fetch("https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1", {
172
+ method: "POST",
173
+ headers: {
174
+ "Content-Type": "application/ssml+xml",
175
+ "X-Microsoft-OutputFormat": "audio-24khz-48kbitrate-mono-mp3",
176
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
177
+ },
178
+ body: ssml
179
+ });
180
+
181
+ return response;
182
+ }
183
+
184
+ generateSSML(text, voice, rate, pitch) {
185
+ const rateValue = rate >= 0 ? `+${Math.round(rate * 100)}%` : `${Math.round(rate * 100)}%`;
186
+ const pitchValue = pitch >= 0 ? `+${Math.round(pitch * 50)}Hz` : `${Math.round(pitch * 50)}Hz`;
187
+
188
+ return `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="zh-CN">
189
+ <voice name="${voice}">
190
+ <prosody rate="${rateValue}" pitch="${pitchValue}">
191
+ ${text.replace(/[<>&'"]/g, (char) => {
192
+ const entities = { '<': '&lt;', '>': '&gt;', '&': '&amp;', '"': '&quot;', "'": '&apos;' };
193
+ return entities[char];
194
+ })}
195
+ </prosody>
196
+ </voice>
197
+ </speak>`;
198
+ }
199
+ }
200
+
201
  async function fetchVoiceList() {
202
+ try {
203
+ const response = await fetch(VOICES_URL);
204
+ const voices = await response.json();
205
+ return voices.reduce((acc, voice) => {
206
+ const { ShortName: model, ShortName: name, FriendlyName: friendlyName, Locale: locale } = voice;
207
+ if (!acc[locale]) acc[locale] = [];
208
+ acc[locale].push({ model, name, friendlyName, locale });
209
+ return acc;
210
+ }, {});
211
+ } catch (error) {
212
+ console.error("Failed to fetch voice list:", error);
213
+ return {};
214
+ }
215
  }
216
 
217
+ async function synthesizeSpeech(model, voice, text) {
218
  let voiceName;
219
  let rate = 0;
220
  let pitch = 0;
 
222
  if (model.includes("tts")) {
223
  rate = 0.1;
224
  pitch = 0.2;
225
+
226
+ const voiceMap = {
227
+ "alloy": "zh-CN-YunjianNeural",
228
+ "echo": "zh-CN-YunyangNeural",
229
+ "fable": "zh-CN-XiaoxiaoNeural",
230
+ "onyx": "zh-TW-HsiaoChenNeural"
231
+ };
232
+ voiceName = voiceMap[voice] || "zh-CN-YunxiNeural";
 
 
 
 
 
 
 
 
 
 
233
  } else {
234
  voiceName = model;
235
  const params = Object.fromEntries(
236
+ voice.split("|").map((p) => p.split(":"))
237
  );
238
  rate = Number(params["rate"] || 0);
239
  pitch = Number(params["pitch"] || 0);
240
  }
241
 
242
+ const tts = new SimpleEdgeTTS();
243
 
244
+ try {
245
+ const response = await tts.create({
246
+ input: text,
247
+ options: {
248
+ rate: rate,
249
+ pitch: pitch,
250
+ voice: voiceName
251
+ }
252
+ });
253
+
254
+ const mp3Buffer = new Uint8Array(await response.arrayBuffer());
255
+ console.log(`Successfully synthesized speech, returning audio/mpeg response`);
256
+ return new Response(mp3Buffer, {
257
+ headers: { "Content-Type": "audio/mpeg" }
258
+ });
259
+ } catch (error) {
260
+ console.error("Synthesis error:", error);
261
+ return new Response("Synthesis failed", { status: 500 });
262
+ }
263
  }
264
 
265
+ function unauthorized(req) {
266
  const authHeader = req.headers.get("Authorization");
267
  return AUTH_TOKEN && authHeader !== `Bearer ${AUTH_TOKEN}`;
268
  }
269
 
270
+ async function handleDebugRequest(req) {
 
 
 
 
 
 
 
 
271
  const url = new URL(req.url);
272
  const voice = url.searchParams.get("voice") || "";
273
  const model = url.searchParams.get("model") || "";
274
  const text = url.searchParams.get("text") || "";
275
 
 
 
276
  if (!voice || !model || !text) {
 
277
  return new Response("Bad Request", { status: 400 });
278
  }
279
 
280
  return synthesizeSpeech(model, voice, text);
281
  }
282
 
283
+ async function handleSynthesisRequest(req) {
284
  if (unauthorized(req)) {
 
285
  return new Response("Unauthorized", { status: 401 });
286
  }
287
 
288
  if (req.method !== "POST") {
 
289
  return new Response("Method Not Allowed", { status: 405 });
290
  }
291
 
292
+ const contentType = req.headers.get("Content-Type");
293
+ if (contentType !== "application/json") {
294
+ return new Response("Bad Request", { status: 400 });
295
+ }
 
296
 
297
+ try {
298
+ const { model, input, voice } = await req.json();
299
+ return synthesizeSpeech(model, voice, input);
300
+ } catch (error) {
301
+ return new Response("Invalid JSON", { status: 400 });
302
+ }
303
  }
304
 
305
+ async function handleDemoRequest() {
306
+ const groupedVoiceList = await fetchVoiceList();
307
 
308
+ const html = `<!DOCTYPE html>
309
+ <html lang="zh-CN">
310
+ <head>
311
+ <meta charset="UTF-8">
312
+ <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
313
+ <title>Edge TTS 语音合成</title>
314
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Noto+Sans+SC:wght@300;400;500;600;700&display=swap" rel="stylesheet">
315
+ <style>
316
+ :root {
317
+ --primary: #2563eb;
318
+ --primary-hover: #1d4ed8;
319
+ --secondary: #64748b;
320
+ --accent: #0ea5e9;
321
+ --success: #10b981;
322
+ --warning: #f59e0b;
323
+ --error: #ef4444;
324
+ --background: #ffffff;
325
+ --surface: #f8fafc;
326
+ --surface-alt: #f1f5f9;
327
+ --border: #e2e8f0;
328
+ --border-light: #f1f5f9;
329
+ --text-primary: #0f172a;
330
+ --text-secondary: #475569;
331
+ --text-muted: #94a3b8;
332
+ --shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 0.05);
333
+ --shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
334
+ --shadow-md: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
335
+ --shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 0.1), 0 4px 6px -4px rgb(0 0 0 / 0.1);
336
+ --radius: 8px;
337
+ --radius-lg: 12px;
338
+ --spacing: 1rem;
339
+ }
340
+
341
+ @media (prefers-color-scheme: dark) {
342
+ :root {
343
+ --primary: #3b82f6;
344
+ --primary-hover: #2563eb;
345
+ --background: #0f172a;
346
+ --surface: #1e293b;
347
+ --surface-alt: #334155;
348
+ --border: #334155;
349
+ --border-light: #475569;
350
+ --text-primary: #f1f5f9;
351
+ --text-secondary: #cbd5e1;
352
+ --text-muted: #64748b;
353
+ }
354
+ }
355
+
356
+ * {
357
+ box-sizing: border-box;
358
+ margin: 0;
359
+ padding: 0;
360
+ }
361
+
362
+ body {
363
+ font-family: 'Inter', 'Noto Sans SC', -apple-system, BlinkMacSystemFont, system-ui, sans-serif;
364
+ background: var(--background);
365
+ color: var(--text-primary);
366
+ line-height: 1.6;
367
+ font-size: 14px;
368
+ overflow-x: hidden;
369
+ }
370
+
371
+ .container {
372
+ max-width: 1400px;
373
+ margin: 0 auto;
374
+ padding: 2rem;
375
+ min-height: 100vh;
376
+ }
377
+
378
+ .header {
379
+ text-align: center;
380
+ margin-bottom: 3rem;
381
+ }
382
+
383
+ .header h1 {
384
+ font-size: clamp(2rem, 5vw, 3rem);
385
+ font-weight: 700;
386
+ color: var(--text-primary);
387
+ margin-bottom: 0.5rem;
388
+ letter-spacing: -0.025em;
389
+ }
390
+
391
+ .header p {
392
+ font-size: 1.125rem;
393
+ color: var(--text-secondary);
394
+ font-weight: 400;
395
+ }
396
+
397
+ .main-grid {
398
+ display: grid;
399
+ grid-template-columns: 1fr;
400
+ gap: 2rem;
401
+ }
402
+
403
+ @media (min-width: 1024px) {
404
+ .main-grid {
405
+ grid-template-columns: 400px 1fr;
406
+ }
407
+ }
408
+
409
+ .panel {
410
+ background: var(--surface);
411
+ border: 1px solid var(--border);
412
+ border-radius: var(--radius-lg);
413
+ padding: 2rem;
414
+ box-shadow: var(--shadow);
415
+ }
416
+
417
+ .panel-title {
418
+ font-size: 1.25rem;
419
+ font-weight: 600;
420
+ color: var(--text-primary);
421
+ margin-bottom: 1.5rem;
422
+ display: flex;
423
+ align-items: center;
424
+ gap: 0.5rem;
425
+ }
426
+
427
+ .form-group {
428
+ margin-bottom: 1.5rem;
429
+ }
430
+
431
+ .form-label {
432
+ display: block;
433
+ font-weight: 500;
434
+ color: var(--text-primary);
435
+ margin-bottom: 0.5rem;
436
+ font-size: 0.875rem;
437
+ }
438
+
439
+ .form-control {
440
+ width: 100%;
441
+ padding: 0.75rem 1rem;
442
+ border: 1px solid var(--border);
443
+ border-radius: var(--radius);
444
+ background: var(--background);
445
+ color: var(--text-primary);
446
+ font-family: inherit;
447
+ font-size: 0.875rem;
448
+ transition: all 0.2s ease;
449
+ }
450
+
451
+ .form-control:focus {
452
+ outline: none;
453
+ border-color: var(--primary);
454
+ box-shadow: 0 0 0 3px rgb(37 99 235 / 0.1);
455
+ }
456
+
457
+ .form-control::placeholder {
458
+ color: var(--text-muted);
459
+ }
460
+
461
+ .textarea {
462
+ min-height: 120px;
463
+ resize: vertical;
464
+ font-family: inherit;
465
+ }
466
+
467
+ .filter-tabs {
468
+ display: flex;
469
+ gap: 0.5rem;
470
+ margin-bottom: 1rem;
471
+ flex-wrap: wrap;
472
+ }
473
+
474
+ .filter-tab {
475
+ padding: 0.5rem 1rem;
476
+ border: 1px solid var(--border);
477
+ border-radius: 20px;
478
+ background: var(--background);
479
+ color: var(--text-secondary);
480
+ cursor: pointer;
481
+ font-size: 0.8rem;
482
+ font-weight: 500;
483
+ transition: all 0.2s ease;
484
+ white-space: nowrap;
485
+ }
486
+
487
+ .filter-tab:hover {
488
+ border-color: var(--primary);
489
+ color: var(--primary);
490
+ }
491
+
492
+ .filter-tab.active {
493
+ background: var(--primary);
494
+ border-color: var(--primary);
495
+ color: white;
496
+ }
497
+
498
+ .slider-group {
499
+ margin-bottom: 1.5rem;
500
+ }
501
+
502
+ .slider-container {
503
+ display: flex;
504
+ align-items: center;
505
+ gap: 1rem;
506
+ margin-bottom: 0.75rem;
507
+ }
508
+
509
+ .slider {
510
+ flex: 1;
511
+ height: 6px;
512
+ border-radius: 3px;
513
+ background: var(--border);
514
+ outline: none;
515
+ -webkit-appearance: none;
516
+ appearance: none;
517
+ cursor: pointer;
518
+ }
519
+
520
+ .slider::-webkit-slider-thumb {
521
+ -webkit-appearance: none;
522
+ appearance: none;
523
+ width: 20px;
524
+ height: 20px;
525
+ border-radius: 50%;
526
+ background: var(--primary);
527
+ cursor: pointer;
528
+ border: 2px solid white;
529
+ box-shadow: var(--shadow);
530
+ transition: all 0.2s ease;
531
+ }
532
+
533
+ .slider::-webkit-slider-thumb:hover {
534
+ transform: scale(1.1);
535
+ box-shadow: var(--shadow-md);
536
+ }
537
+
538
+ .slider::-moz-range-thumb {
539
+ width: 20px;
540
+ height: 20px;
541
+ border-radius: 50%;
542
+ background: var(--primary);
543
+ cursor: pointer;
544
+ border: 2px solid white;
545
+ box-shadow: var(--shadow);
546
+ }
547
+
548
+ .slider-value {
549
+ font-size: 0.8rem;
550
+ color: var(--text-secondary);
551
+ font-weight: 500;
552
+ min-width: 3rem;
553
+ text-align: center;
554
+ background: var(--surface-alt);
555
+ padding: 0.25rem 0.5rem;
556
+ border-radius: 4px;
557
+ }
558
+
559
+ .voice-container {
560
+ max-height: 70vh;
561
+ overflow-y: auto;
562
+ border: 1px solid var(--border);
563
+ border-radius: var(--radius);
564
+ background: var(--background);
565
+ }
566
+
567
+ .voice-container::-webkit-scrollbar {
568
+ width: 6px;
569
+ }
570
+
571
+ .voice-container::-webkit-scrollbar-track {
572
+ background: var(--surface);
573
+ }
574
+
575
+ .voice-container::-webkit-scrollbar-thumb {
576
+ background: var(--border);
577
+ border-radius: 3px;
578
+ }
579
+
580
+ .voice-container::-webkit-scrollbar-thumb:hover {
581
+ background: var(--text-muted);
582
+ }
583
+
584
+ .voice-group {
585
+ border-bottom: 1px solid var(--border-light);
586
+ }
587
+
588
+ .voice-group:last-child {
589
+ border-bottom: none;
590
+ }
591
+
592
+ .voice-header {
593
+ padding: 1rem 1.25rem;
594
+ cursor: pointer;
595
+ display: flex;
596
+ justify-content: space-between;
597
+ align-items: center;
598
+ background: var(--surface);
599
+ transition: all 0.2s ease;
600
+ border-left: 3px solid transparent;
601
+ }
602
+
603
+ .voice-header:hover {
604
+ background: var(--surface-alt);
605
+ border-left-color: var(--primary);
606
+ }
607
+
608
+ .voice-header.active {
609
+ background: var(--primary);
610
+ color: white;
611
+ border-left-color: var(--primary-hover);
612
+ }
613
+
614
+ .voice-header-info {
615
+ display: flex;
616
+ align-items: center;
617
+ gap: 0.75rem;
618
+ }
619
+
620
+ .voice-header-title {
621
+ font-weight: 500;
622
+ font-size: 0.9rem;
623
+ }
624
+
625
+ .voice-header-count {
626
+ font-size: 0.75rem;
627
+ opacity: 0.8;
628
+ background: rgba(255, 255, 255, 0.2);
629
+ padding: 0.125rem 0.5rem;
630
+ border-radius: 10px;
631
+ }
632
+
633
+ .chevron {
634
+ transition: transform 0.2s ease;
635
+ font-size: 0.75rem;
636
+ }
637
+
638
+ .voice-group.open .chevron {
639
+ transform: rotate(180deg);
640
+ }
641
+
642
+ .voice-buttons {
643
+ padding: 1rem 1.25rem;
644
+ display: none;
645
+ gap: 0.5rem;
646
+ flex-wrap: wrap;
647
+ background: var(--surface-alt);
648
+ }
649
+
650
+ .voice-group.open .voice-buttons {
651
+ display: flex;
652
+ }
653
+
654
+ .voice-button {
655
+ background: white;
656
+ color: var(--text-primary);
657
+ border: 1px solid var(--border);
658
+ padding: 0.5rem 1rem;
659
+ border-radius: 20px;
660
+ cursor: pointer;
661
+ transition: all 0.2s ease;
662
+ font-size: 0.8rem;
663
+ font-weight: 500;
664
+ white-space: nowrap;
665
+ }
666
+
667
+ .voice-button:hover {
668
+ border-color: var(--primary);
669
+ color: var(--primary);
670
+ transform: translateY(-1px);
671
+ box-shadow: var(--shadow);
672
+ }
673
+
674
+ .voice-button.playing {
675
+ background: var(--success);
676
+ color: white;
677
+ border-color: var(--success);
678
+ animation: pulse 1.5s infinite;
679
+ }
680
+
681
+ @keyframes pulse {
682
+ 0%, 100% { transform: scale(1); }
683
+ 50% { transform: scale(1.02); }
684
+ }
685
+
686
+ .chinese-voices {
687
+ background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%);
688
+ }
689
+
690
+ @media (prefers-color-scheme: dark) {
691
+ .chinese-voices {
692
+ background: linear-gradient(135deg, #451a03 0%, #78350f 100%);
693
+ }
694
+ }
695
+
696
+ .chinese-voices .voice-header {
697
+ background: rgba(255, 255, 255, 0.9);
698
+ }
699
+
700
+ @media (prefers-color-scheme: dark) {
701
+ .chinese-voices .voice-header {
702
+ background: rgba(0, 0, 0, 0.3);
703
+ }
704
+ }
705
+
706
+ .status-message {
707
+ margin-top: 1rem;
708
+ padding: 1rem;
709
+ border-radius: var(--radius);
710
+ font-size: 0.875rem;
711
+ text-align: center;
712
+ display: none;
713
+ font-weight: 500;
714
+ }
715
+
716
+ .status-message.show {
717
+ display: block;
718
+ }
719
+
720
+ .status-message.success {
721
+ background: #f0fdf4;
722
+ color: var(--success);
723
+ border: 1px solid #bbf7d0;
724
+ }
725
+
726
+ .status-message.error {
727
+ background: #fef2f2;
728
+ color: var(--error);
729
+ border: 1px solid #fecaca;
730
+ }
731
+
732
+ @media (prefers-color-scheme: dark) {
733
+ .status-message.success {
734
+ background: #064e3b;
735
+ border-color: #047857;
736
+ }
737
+
738
+ .status-message.error {
739
+ background: #7f1d1d;
740
+ border-color: #dc2626;
741
+ }
742
+ }
743
+
744
+ .mobile-controls {
745
+ position: fixed;
746
+ bottom: 0;
747
+ left: 0;
748
+ right: 0;
749
+ background: var(--surface);
750
+ padding: 1rem;
751
+ box-shadow: var(--shadow-lg);
752
+ border-top: 1px solid var(--border);
753
+ display: none;
754
+ z-index: 1000;
755
+ backdrop-filter: blur(10px);
756
+ }
757
+
758
+ @media (max-width: 1023px) {
759
+ .mobile-controls {
760
+ display: block;
761
+ }
762
+
763
+ .voice-container {
764
+ margin-bottom: 100px;
765
+ }
766
+ }
767
+
768
+ .current-audio-info {
769
+ font-size: 0.8rem;
770
+ color: var(--text-secondary);
771
+ margin-bottom: 0.75rem;
772
+ text-align: center;
773
+ }
774
+
775
+ .audio-controls {
776
+ display: flex;
777
+ gap: 0.75rem;
778
+ align-items: center;
779
+ }
780
+
781
+ .control-button {
782
+ background: var(--primary);
783
+ color: white;
784
+ border: none;
785
+ padding: 0.75rem 1.5rem;
786
+ border-radius: var(--radius);
787
+ cursor: pointer;
788
+ font-size: 0.875rem;
789
+ font-weight: 500;
790
+ transition: all 0.2s ease;
791
+ flex: 1;
792
+ }
793
+
794
+ .control-button:hover {
795
+ background: var(--primary-hover);
796
+ transform: translateY(-1px);
797
+ }
798
+
799
+ .control-button:disabled {
800
+ background: var(--text-muted);
801
+ color: var(--background);
802
+ cursor: not-allowed;
803
+ transform: none;
804
+ }
805
+
806
+ .loading {
807
+ display: inline-block;
808
+ width: 14px;
809
+ height: 14px;
810
+ border: 2px solid rgba(255, 255, 255, 0.3);
811
+ border-radius: 50%;
812
+ border-top-color: white;
813
+ animation: spin 1s linear infinite;
814
+ margin-right: 0.5rem;
815
+ }
816
+
817
+ @keyframes spin {
818
+ to { transform: rotate(360deg); }
819
+ }
820
+
821
+ @media (max-width: 640px) {
822
+ .container {
823
+ padding: 1rem;
824
+ }
825
+
826
+ .panel {
827
+ padding: 1.5rem;
828
+ }
829
+
830
+ .header {
831
+ margin-bottom: 2rem;
832
+ }
833
+
834
+ .filter-tabs {
835
+ gap: 0.25rem;
836
+ }
837
+
838
+ .filter-tab {
839
+ font-size: 0.75rem;
840
+ padding: 0.375rem 0.75rem;
841
+ }
842
+ }
843
+ </style>
844
+ </head>
845
+
846
+ <body>
847
+ <div class="container">
848
+ <div class="header">
849
+ <h1>Edge TTS 语音合成</h1>
850
+ <p>高质量的多语言语音合成服务</p>
851
+ </div>
852
+
853
+ <div class="main-grid">
854
+ <div class="panel">
855
+ <h2 class="panel-title">
856
+ ⚙️ 设置
857
+ </h2>
858
+
859
+ <div class="form-group">
860
+ <label class="form-label">语音筛选</label>
861
+ <div class="filter-tabs">
862
+ <div class="filter-tab active" data-filter="chinese">中文</div>
863
+ <div class="filter-tab" data-filter="english">英文</div>
864
+ <div class="filter-tab" data-filter="multilingual">多语言</div>
865
+ <div class="filter-tab" data-filter="all">全部</div>
866
+ </div>
867
+ <input type="text" id="customFilter" class="form-control" placeholder="自定义筛选...">
868
+ </div>
869
+
870
+ <div class="slider-group">
871
+ <label class="form-label">语速</label>
872
+ <div class="slider-container">
873
+ <input type="range" min="-1" max="1" step="0.1" value="-0.1" class="slider" id="rate">
874
+ <div class="slider-value" id="rateValue">-0.1</div>
875
+ </div>
876
+ </div>
877
+
878
+ <div class="slider-group">
879
+ <label class="form-label">音调</label>
880
+ <div class="slider-container">
881
+ <input type="range" min="-1" max="1" step="0.1" value="0.1" class="slider" id="pitch">
882
+ <div class="slider-value" id="pitchValue">0.1</div>
883
+ </div>
884
+ </div>
885
+
886
+ <div class="form-group">
887
+ <label class="form-label" for="inputText">输入文本</label>
888
+ <textarea id="inputText" class="form-control textarea" placeholder="请输入要转换的文本...">你好,欢迎使用Edge TTS语音合成服务!这里支持多种中文语音选择。</textarea>
889
+ </div>
890
+
891
+ <div class="status-message" id="statusMessage"></div>
892
+ </div>
893
+
894
+ <div class="panel">
895
+ <h2 class="panel-title">
896
+ 🎤 语音选择
897
+ </h2>
898
+
899
+ <div class="voice-container" id="voices"></div>
900
+ </div>
901
+ </div>
902
+
903
+ <div class="mobile-controls">
904
+ <div class="current-audio-info" id="currentAudioInfo">选择语音后开始合成</div>
905
+ <div class="audio-controls">
906
+ <button class="control-button" id="pauseBtn" disabled>暂停</button>
907
+ <button class="control-button" id="stopBtn" disabled>停止</button>
908
+ </div>
909
+ </div>
910
+ </div>
911
+
912
+ <script>
913
+ const voiceList = ${JSON.stringify(groupedVoiceList)};
914
+ let audio = null;
915
+ let currentVoiceButton = null;
916
+ let currentFilter = 'chinese';
917
+
918
+ const chineseVoiceMapping = {
919
+ 'zh-CN': '中文 (普通话)',
920
+ 'zh-HK': '中文 (粤语)',
921
+ 'zh-TW': '中文 (台湾话)',
922
+ 'zh-CN-liaoning': '中文 (辽宁话)',
923
+ 'zh-CN-shaanxi': '中文 (陕西话)'
924
+ };
925
+
926
+ const filterPresets = {
927
+ chinese: ['zh-CN', 'zh-HK', 'zh-TW', 'zh-CN-liaoning', 'zh-CN-shaanxi'],
928
+ english: ['en-US', 'en-GB', 'en-AU', 'en-CA', 'en-IN'],
929
+ multilingual: Object.keys(voiceList).filter(locale =>
930
+ !locale.startsWith('zh-') && !locale.startsWith('en-')
931
+ ).slice(0, 10)
932
+ };
933
+
934
+ function showStatusMessage(message, type = 'success') {
935
+ const statusEl = document.getElementById('statusMessage');
936
+ statusEl.textContent = message;
937
+ statusEl.className = \`status-message show \${type}\`;
938
+ setTimeout(() => {
939
+ statusEl.classList.remove('show');
940
+ }, 3000);
941
+ }
942
+
943
+ function updateMobileControls(voiceName = '') {
944
+ const infoEl = document.getElementById('currentAudioInfo');
945
+ const pauseBtn = document.getElementById('pauseBtn');
946
+ const stopBtn = document.getElementById('stopBtn');
947
+
948
+ if (voiceName) {
949
+ infoEl.textContent = \`当前语音: \${voiceName}\`;
950
+ pauseBtn.disabled = false;
951
+ stopBtn.disabled = false;
952
+ } else {
953
+ infoEl.textContent = '选择语音后开始合成';
954
+ pauseBtn.disabled = true;
955
+ stopBtn.disabled = true;
956
+ }
957
+ }
958
+
959
+ function filterVoices(filterType = 'chinese', customKeyword = '') {
960
+ const voicesDiv = document.getElementById('voices');
961
+ voicesDiv.innerHTML = '';
962
+
963
+ let filteredVoices = {};
964
+
965
+ if (filterType === 'all') {
966
+ filteredVoices = voiceList;
967
+ } else if (filterPresets[filterType]) {
968
+ for (const locale of filterPresets[filterType]) {
969
+ if (voiceList[locale]) {
970
+ filteredVoices[locale] = voiceList[locale];
971
+ }
972
+ }
973
+ }
974
+
975
+ if (customKeyword.trim()) {
976
+ const keyword = customKeyword.trim().toLowerCase();
977
+ const tempFiltered = {};
978
+
979
+ for (const [locale, voices] of Object.entries(filteredVoices)) {
980
+ const matchingVoices = voices.filter(voice =>
981
+ voice.name.toLowerCase().includes(keyword) ||
982
+ voice.friendlyName.toLowerCase().includes(keyword) ||
983
+ locale.toLowerCase().includes(keyword)
984
+ );
985
+
986
+ if (matchingVoices.length > 0) {
987
+ tempFiltered[locale] = matchingVoices;
988
+ }
989
+ }
990
+
991
+ filteredVoices = tempFiltered;
992
+ }
993
+
994
+ for (const [locale, voices] of Object.entries(filteredVoices)) {
995
+ const group = document.createElement('div');
996
+ group.className = \`voice-group \${filterType === 'chinese' ? 'chinese-voices' : ''}\`;
997
+
998
+ const header = document.createElement('div');
999
+ header.className = 'voice-header';
1000
+
1001
+ const displayName = chineseVoiceMapping[locale] || locale.toUpperCase();
1002
+ const headerInfo = document.createElement('div');
1003
+ headerInfo.className = 'voice-header-info';
1004
+ headerInfo.innerHTML = \`
1005
+ <span class="voice-header-title">\${displayName}</span>
1006
+ <span class="voice-header-count">\${voices.length}</span>
1007
+ \`;
1008
+
1009
+ const chevron = document.createElement('span');
1010
+ chevron.className = 'chevron';
1011
+ chevron.innerHTML = '▼';
1012
+
1013
+ header.appendChild(headerInfo);
1014
+ header.appendChild(chevron);
1015
+
1016
+ const buttonsContainer = document.createElement('div');
1017
+ buttonsContainer.className = 'voice-buttons';
1018
+
1019
+ voices.forEach(({model, name, friendlyName}) => {
1020
+ const button = document.createElement('button');
1021
+ button.className = 'voice-button';
1022
+
1023
+ const displayName = name.replace(/Neural$/, '').split('-').pop() || name;
1024
+ button.textContent = displayName;
1025
+ button.title = friendlyName;
1026
+
1027
+ button.onclick = () => synthesize(model, button, displayName);
1028
+ buttonsContainer.appendChild(button);
1029
+ });
1030
+
1031
+ header.onclick = () => {
1032
+ group.classList.toggle('open');
1033
+ header.classList.toggle('active');
1034
+ };
1035
+
1036
+ group.appendChild(header);
1037
+ group.appendChild(buttonsContainer);
1038
+ voicesDiv.appendChild(group);
1039
+
1040
+ if (filterType === 'chinese') {
1041
+ group.classList.add('open');
1042
+ header.classList.add('active');
1043
+ }
1044
+ }
1045
+ }
1046
+
1047
+ function synthesize(model, buttonElement, voiceName) {
1048
+ const text = document.getElementById('inputText').value || '你好,欢迎使用Edge TTS语音合成服务!';
1049
+ const rate = document.getElementById('rate').value || '-0.1';
1050
+ const pitch = document.getElementById('pitch').value || '0.1';
1051
+ const voice = \`rate:\${rate}|pitch:\${pitch}\`;
1052
+
1053
+ if (currentVoiceButton) {
1054
+ currentVoiceButton.classList.remove('playing');
1055
+ currentVoiceButton.innerHTML = currentVoiceButton.textContent;
1056
+ }
1057
+
1058
+ currentVoiceButton = buttonElement;
1059
+ buttonElement.classList.add('playing');
1060
+ buttonElement.innerHTML = '<span class="loading"></span>' + buttonElement.textContent;
1061
+
1062
+ if (audio) {
1063
+ audio.pause();
1064
+ audio.currentTime = 0;
1065
+ }
1066
+
1067
+ updateMobileControls(voiceName);
1068
+ showStatusMessage('正在合成语音...', 'success');
1069
+
1070
+ fetch('/v1/audio/speech', {
1071
+ method: 'POST',
1072
+ headers: {'Content-Type': 'application/json'},
1073
+ body: JSON.stringify({model, input: text, voice})
1074
+ })
1075
+ .then(response => {
1076
+ if (!response.ok) {
1077
+ throw new Error('合成失败');
1078
+ }
1079
+ return response.blob();
1080
+ })
1081
+ .then(blob => {
1082
+ const audioUrl = URL.createObjectURL(blob);
1083
+ audio = new Audio(audioUrl);
1084
+
1085
+ audio.onplay = () => {
1086
+ showStatusMessage(\`正在播放: \${voiceName}\`, 'success');
1087
+ };
1088
+
1089
+ audio.onended = () => {
1090
+ buttonElement.classList.remove('playing');
1091
+ buttonElement.innerHTML = buttonElement.textContent;
1092
+ updateMobileControls();
1093
+ showStatusMessage('播放完成', 'success');
1094
+ };
1095
+
1096
+ audio.onerror = () => {
1097
+ buttonElement.classList.remove('playing');
1098
+ buttonElement.innerHTML = buttonElement.textContent;
1099
+ updateMobileControls();
1100
+ showStatusMessage('播放失败', 'error');
1101
+ };
1102
+
1103
+ audio.play();
1104
+ })
1105
+ .catch(error => {
1106
+ buttonElement.classList.remove('playing');
1107
+ buttonElement.innerHTML = buttonElement.textContent;
1108
+ updateMobileControls();
1109
+ showStatusMessage('合成失败: ' + error.message, 'error');
1110
+ });
1111
+ }
1112
+
1113
+ document.addEventListener('DOMContentLoaded', function() {
1114
+ document.querySelectorAll('.filter-tab').forEach(tab => {
1115
+ tab.addEventListener('click', function() {
1116
+ document.querySelectorAll('.filter-tab').forEach(t => t.classList.remove('active'));
1117
+ this.classList.add('active');
1118
+ currentFilter = this.dataset.filter;
1119
+ filterVoices(currentFilter, document.getElementById('customFilter').value);
1120
+ });
1121
+ });
1122
+
1123
+ document.getElementById('customFilter').addEventListener('input', function() {
1124
+ filterVoices(currentFilter, this.value);
1125
+ });
1126
+
1127
+ const rateSlider = document.getElementById('rate');
1128
+ const rateValue = document.getElementById('rateValue');
1129
+ rateSlider.oninput = function() {
1130
+ rateValue.textContent = this.value;
1131
+ };
1132
+
1133
+ const pitchSlider = document.getElementById('pitch');
1134
+ const pitchValue = document.getElementById('pitchValue');
1135
+ pitchSlider.oninput = function() {
1136
+ pitchValue.textContent = this.value;
1137
+ };
1138
+
1139
+ document.getElementById('pauseBtn').addEventListener('click', function() {
1140
+ if (audio) {
1141
+ if (audio.paused) {
1142
+ audio.play();
1143
+ this.textContent = '暂停';
1144
+ } else {
1145
+ audio.pause();
1146
+ this.textContent = '继续';
1147
+ }
1148
+ }
1149
+ });
1150
+
1151
+ document.getElementById('stopBtn').addEventListener('click', function() {
1152
+ if (audio) {
1153
+ audio.pause();
1154
+ audio.currentTime = 0;
1155
+ if (currentVoiceButton) {
1156
+ currentVoiceButton.classList.remove('playing');
1157
+ currentVoiceButton.innerHTML = currentVoiceButton.textContent;
1158
+ }
1159
+ updateMobileControls();
1160
+ document.getElementById('pauseBtn').textContent = '暂停';
1161
+ }
1162
+ });
1163
+
1164
+ filterVoices('chinese');
1165
+ });
1166
+ </script>
1167
+ </body>
1168
+ </html>`;
1169
+
1170
+ return new Response(html, {
1171
+ headers: { "Content-Type": "text/html" }
1172
+ });
1173
+ }
1174
 
1175
  serve(async (req) => {
1176
  try {
1177
  const url = new URL(req.url);
1178
 
1179
  if (url.pathname === "/") {
1180
+ return handleDemoRequest();
1181
  }
1182
 
1183
  if (url.pathname === "/tts") {