Spaces:
Running
Running
| import { setVoice, textToWavAudio } from "./piper.js"; | |
| const voiceRoot = "https:huggingface.co/rhasspy/piper-voices/resolve/main"; | |
| let languageToSelect = null; | |
| let voiceToSelect = null; | |
| let qualityToSelect = null; | |
| let voices = {}; | |
| const qualitySort = { | |
| x_low: 0, | |
| low: 1, | |
| medium: 2, | |
| high: 3, | |
| }; | |
| let voiceId = ""; | |
| let voiceUrl = ""; | |
| let loadedVoiceId = ""; | |
| async function main() { | |
| loadVoices(); | |
| const buttonSpeak = document.getElementById("buttonSpeak"); | |
| const audioTTS = document.getElementById("audioTTS"); | |
| const textInput = document.getElementById("textInput"); | |
| const status = document.getElementById("status"); | |
| const speakerSelect = document.getElementById("speaker"); | |
| const inputLengthScale = document.getElementById("lengthScale"); | |
| const inputNoiseScale = document.getElementById("noiseScale"); | |
| const inputNoiseWScale = document.getElementById("noiseWScale"); | |
| buttonSpeak.addEventListener("click", async () => { | |
| const text = textInput.value; | |
| if (!text) { | |
| return; | |
| } | |
| if (!voiceId) { | |
| console.log("ERROR: No voice id"); | |
| return; | |
| } | |
| if (voiceId != loadedVoiceId) { | |
| if (!voiceUrl) { | |
| console.log("ERROR: No voice URL"); | |
| return; | |
| } | |
| status.innerHTML = "Loading voice..."; | |
| await setVoice( | |
| `${voiceUrl}/${voiceId}.onnx?download=true`, | |
| `${voiceUrl}/${voiceId}.onnx.json?download=true`, | |
| ); | |
| loadedVoiceId = voiceId; | |
| } | |
| let speakerId = null; | |
| if (speakerSelect.selectedIndex > 0) { | |
| speakerId = parseInt(speakerSelect.value); | |
| } | |
| let lengthScale = parseFloat(inputLengthScale.value); | |
| if (isNaN(lengthScale)) { | |
| lengthScale = null; | |
| } | |
| let noiseScale = parseFloat(inputNoiseScale.value); | |
| if (isNaN(noiseScale)) { | |
| noiseScale = null; | |
| } | |
| let noiseWScale = parseFloat(inputNoiseWScale.value); | |
| if (isNaN(noiseWScale)) { | |
| noiseWScale = null; | |
| } | |
| status.innerHTML = "Synthesizing audio..."; | |
| const wavAudio = await textToWavAudio( | |
| text, | |
| speakerId, | |
| lengthScale, | |
| noiseScale, | |
| noiseWScale, | |
| ); | |
| const audioURL = URL.createObjectURL(wavAudio); | |
| audioTTS.src = audioURL; | |
| audioTTS.play(); | |
| status.innerHTML = "Ready"; | |
| }); | |
| textInput.disabled = false; | |
| buttonSpeak.disabled = false; | |
| window.setLanguage = setLanguage; | |
| window.setVoiceName = setVoiceName; | |
| window.setQuality = setQuality; | |
| window.setSpeaker = setSpeaker; | |
| } | |
| document.addEventListener("DOMContentLoaded", () => { | |
| main(); | |
| }); | |
| // ---------------------------------------------------------------------------- | |
| export function setLanguage() { | |
| var language = document.getElementById("languages").value; | |
| if (language.length > 0) { | |
| var voiceSelect = document.getElementById("voice"); | |
| while (voiceSelect.options.length > 1) { | |
| voiceSelect.remove(voiceSelect.options.length - 1); | |
| } | |
| let names = []; | |
| for (let key in voices) { | |
| let voice = voices[key]; | |
| if (voice.language.code == language) { | |
| names.push(voice.name); | |
| } | |
| } | |
| names = Array.from(new Set(names)).sort(); | |
| for (let i in names) { | |
| let name = names[i]; | |
| let option = document.createElement("option"); | |
| option.text = name; | |
| option.value = name; | |
| voiceSelect.add(option); | |
| } | |
| if (voiceToSelect) { | |
| voiceSelect.value = voiceToSelect; | |
| voiceToSelect = null; | |
| setVoiceName(); | |
| } else if (voiceSelect.options.length > 1) { | |
| // Select first voice | |
| voiceSelect.selectedIndex = 1; | |
| setVoiceName(); | |
| } | |
| } | |
| } | |
| function setVoiceName() { | |
| var language = document.getElementById("languages").value; | |
| var voiceName = document.getElementById("voice").value; | |
| if (voiceName.length > 0) { | |
| var qualitySelect = document.getElementById("quality"); | |
| while (qualitySelect.options.length > 1) { | |
| qualitySelect.remove(qualitySelect.options.length - 1); | |
| } | |
| let qualities = []; | |
| for (let key in voices) { | |
| let voice = voices[key]; | |
| if (voice.language.code == language && voice.name == voiceName) { | |
| qualities.push(voice.quality); | |
| } | |
| } | |
| qualities = Array.from(new Set(qualities)).sort( | |
| (a, b) => qualitySort[a] - qualitySort[b], | |
| ); | |
| for (let i in qualities) { | |
| let quality = qualities[i]; | |
| let option = document.createElement("option"); | |
| option.text = quality; | |
| option.value = quality; | |
| qualitySelect.add(option); | |
| } | |
| if (qualityToSelect) { | |
| qualitySelect.value = qualityToSelect; | |
| qualityToSelect = null; | |
| setQuality(); | |
| } else if (qualitySelect.options.length > 1) { | |
| // Select highest quality | |
| qualitySelect.selectedIndex = qualitySelect.options.length - 1; | |
| setQuality(); | |
| } | |
| } | |
| } | |
| function setQuality() { | |
| var language = document.getElementById("languages").value; | |
| var voiceName = document.getElementById("voice").value; | |
| var quality = document.getElementById("quality").value; | |
| if (quality.length > 0) { | |
| var speakerSelect = document.getElementById("speaker"); | |
| while (speakerSelect.options.length > 1) { | |
| speakerSelect.remove(speakerSelect.options.length - 1); | |
| } | |
| var numSpeakers = 1; | |
| var speakerIdMap = {}; | |
| for (let key in voices) { | |
| let voice = voices[key]; | |
| if ( | |
| voice.language.code == language && | |
| voice.name == voiceName && | |
| voice.quality == quality | |
| ) { | |
| numSpeakers = voice.num_speakers; | |
| speakerIdMap = voice.speaker_id_map; | |
| break; | |
| } | |
| } | |
| if (numSpeakers <= 1) { | |
| // Single speaker model | |
| let option = document.createElement("option"); | |
| option.text = "default"; | |
| option.value = "0"; | |
| speakerSelect.add(option); | |
| } else { | |
| // Multi-speaker model | |
| let sortedSpeakers = Object.keys(speakerIdMap).sort( | |
| (a, b) => speakerIdMap[a] - speakerIdMap[b], | |
| ); | |
| for (let i in sortedSpeakers) { | |
| let speaker = sortedSpeakers[i]; | |
| let option = document.createElement("option"); | |
| option.text = speaker + " (" + i.toString() + ")"; | |
| option.value = i.toString(); | |
| speakerSelect.add(option); | |
| } | |
| } | |
| if (speakerSelect.options.length > 1) { | |
| // Select first speaker | |
| speakerSelect.selectedIndex = 1; | |
| setSpeaker(); | |
| } | |
| voiceId = `${language}-${voiceName}-${quality}`; | |
| window.location.hash = voiceId; | |
| } | |
| } | |
| function setSpeaker() { | |
| var language = document.getElementById("languages").value; | |
| let languageFamily = language.split("_")[0]; | |
| var voiceName = document.getElementById("voice").value; | |
| var quality = document.getElementById("quality").value; | |
| var speaker = document.getElementById("speaker").value; | |
| if (speaker.length > 0) { | |
| for (let key in voices) { | |
| let voice = voices[key]; | |
| if ( | |
| voice.language.code == language && | |
| voice.name == voiceName && | |
| voice.quality == quality | |
| ) { | |
| voiceUrl = `${voiceRoot}/${languageFamily}/${language}/${voiceName}/${quality}`; | |
| let aKey = document.getElementById("key"); | |
| aKey.innerHTML = key; | |
| aKey.href = voiceUrl; | |
| fetch(`txt/${languageFamily}.txt`) | |
| .then((response) => response.text()) | |
| .then((text) => { | |
| document.getElementById("textInput").innerHTML = text; | |
| }); | |
| } | |
| } | |
| } | |
| } | |
| function loadVoices() { | |
| let hash = window.location.hash; | |
| if (hash.length > 0) { | |
| let voiceIdRegexp = RegExp("^#([^-]+)-([^-]+)-([^-]+)$"); | |
| let match = voiceIdRegexp.exec(hash); | |
| if (match) { | |
| languageToSelect = match[1]; | |
| voiceToSelect = match[2]; | |
| qualityToSelect = match[3]; | |
| } | |
| } | |
| fetch(`${voiceRoot}/voices.json?download=true`) | |
| .then((response) => response.json()) | |
| .then((response_obj) => { | |
| voices = response_obj; | |
| let voiceLanguages = []; | |
| let languageNames = {}; | |
| for (let key in voices) { | |
| let voice = voices[key]; | |
| voiceLanguages.push(voice.language.code); | |
| languageNames[voice.language.code] = | |
| voice.language.name_native + | |
| " (" + | |
| voice.language.name_english + | |
| ", " + | |
| voice.language.country_english + | |
| ")"; | |
| } | |
| let sortedLanguages = Array.from(new Set(voiceLanguages)).sort(); | |
| let languagesSelect = document.getElementById("languages"); | |
| for (let i in sortedLanguages) { | |
| let language = sortedLanguages[i]; | |
| let option = document.createElement("option"); | |
| option.text = languageNames[language]; | |
| option.value = language; | |
| languagesSelect.add(option); | |
| } | |
| if (languageToSelect) { | |
| languagesSelect.value = languageToSelect; | |
| languageToSelect = null; | |
| setLanguage(); | |
| } | |
| }); | |
| } | |