import { saveTtsProviderSettings } from './index.js'; export { ChatterboxTtsProvider }; class ChatterboxTtsProvider { //########// // Config // //########// settings = {}; constructor() { // Initialize with default settings this.settings = { provider_endpoint: this.settings.provider_endpoint || 'http://localhost:8004', voice_mode: this.settings.voice_mode || 'predefined', predefined_voice: this.settings.predefined_voice || 'S1', reference_voice: this.settings.reference_voice || '', temperature: this.settings.temperature || 0.8, exaggeration: this.settings.exaggeration || 0.5, cfg_weight: this.settings.cfg_weight || 0.5, seed: this.settings.seed || -1, speed_factor: this.settings.speed_factor || 1.0, language: this.settings.language || 'en', split_text: this.settings.split_text || true, chunk_size: this.settings.chunk_size || 120, output_format: this.settings.output_format || 'wav', voiceMap: this.settings.voiceMap || {}, }; } ready = false; voices = []; separator = '. '; audioElement = document.createElement('audio'); languageLabels = { 'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese': 'zh-cn', 'Japanese': 'ja', 'Korean': 'ko', 'Hindi': 'hi', }; get settingsHtml() { let html = `

Chatterbox TTS Settings

Status: Offline
`; // Server endpoint html += `
`; // Language selection html += `
`; // Generation parameters html += `

Generation Parameters

`; // Temperature html += `
`; // Exaggeration html += `
`; // CFG Weight html += `
`; // Speed Factor html += `
`; // Seed html += `
`; // Text chunking html += `
`; // Chunk size html += `
`; // Output format html += `
`; html += '
'; // End params section // Footer with links html += ``; html += '
'; // End container // Add CSS styles html += ``; return html; } //######################// // Startup & Initialize // //######################// async loadSettings(settings) { this.updateStatus('Offline'); if (Object.keys(settings).length === 0) { console.info('Using default Chatterbox TTS Provider settings'); } else { // Populate settings with provided values for (const key in settings) { if (key in this.settings) { this.settings[key] = settings[key]; } } } // Update UI elements this.updateUIFromSettings(); console.debug('ChatterboxTTS: Settings loaded'); try { // Check if TTS provider is ready await this.checkReady(); if (this.ready) { // Fetch all voice types for the voice map await this.fetchTtsVoiceObjects(); this.updateStatus('Ready'); } this.setupEventListeners(); } catch (error) { console.error('Error loading Chatterbox settings:', error); this.updateStatus('Offline'); } } updateUIFromSettings() { $('#chatterbox-endpoint').val(this.settings.provider_endpoint); $('#chatterbox-language').val(this.settings.language); $('#chatterbox-temperature').val(this.settings.temperature); $('#chatterbox-temperature-value').text(this.settings.temperature); $('#chatterbox-exaggeration').val(this.settings.exaggeration); $('#chatterbox-exaggeration-value').text(this.settings.exaggeration); $('#chatterbox-cfg-weight').val(this.settings.cfg_weight); $('#chatterbox-cfg-weight-value').text(this.settings.cfg_weight); $('#chatterbox-speed').val(this.settings.speed_factor); $('#chatterbox-speed-value').text(this.settings.speed_factor); $('#chatterbox-seed').val(this.settings.seed); $('#chatterbox-split-text').prop('checked', this.settings.split_text); $('#chatterbox-chunk-size').val(this.settings.chunk_size); $('#chatterbox-format').val(this.settings.output_format); // Show/hide chunk size based on split text if (this.settings.split_text) { $('#chunk-size-row').show(); } else { $('#chunk-size-row').hide(); } } //##############################// // Check Server is Available // //##############################// async checkReady() { try { const response = await fetch(`${this.settings.provider_endpoint}/api/ui/initial-data`); if (!response.ok) { throw new Error(`HTTP Error Response: ${response.status} ${response.statusText}`); } const data = await response.json(); // Check if we got valid data if (data) { this.ready = true; console.log('Chatterbox TTS service is ready.'); } else { this.ready = false; console.log('Chatterbox TTS service returned invalid data.'); } } catch (error) { console.error('Error checking Chatterbox TTS service readiness:', error); this.ready = false; } } //######################// // Get Available Voices // //######################// async fetchTtsVoiceObjects() { try { // Always fetch predefined voices const predefinedResponse = await fetch(`${this.settings.provider_endpoint}/get_predefined_voices`); if (!predefinedResponse.ok) { throw new Error(`HTTP ${predefinedResponse.status}: ${predefinedResponse.statusText}`); } const predefinedData = await predefinedResponse.json(); // Transform predefined voices const predefinedVoices = predefinedData.map(voice => ({ name: voice.display_name, voice_id: voice.voice_id || voice.filename, preview_url: null, lang: voice.language || 'en', })); // Always try to fetch reference voices let referenceVoices = []; try { const refResponse = await fetch(`${this.settings.provider_endpoint}/get_reference_files`); if (refResponse.ok) { const refData = await refResponse.json(); referenceVoices = refData.map(filename => ({ name: `[Clone] ${filename}`, voice_id: `ref_${filename}`, preview_url: null, lang: 'en', })); } } catch (error) { console.warn('Failed to fetch reference voices:', error); } // Combine all voices this.voices = [...predefinedVoices, ...referenceVoices]; console.log(`Loaded ${this.voices.length} voices (${predefinedVoices.length} predefined, ${referenceVoices.length} reference)`); return this.voices; } catch (error) { console.error('Error fetching Chatterbox voices:', error); this.voices = []; return []; } } // Alias for internal use async fetchVoices() { return this.fetchTtsVoiceObjects(); } //###########################// // Setup Event Listeners // //###########################// setupEventListeners() { // Server endpoint change $('#chatterbox-endpoint').on('input', () => { this.settings.provider_endpoint = $('#chatterbox-endpoint').val(); this.onSettingsChange(); }); // Language $('#chatterbox-language').on('change', (e) => { this.settings.language = e.target.value; this.onSettingsChange(); }); // Parameter sliders $('#chatterbox-temperature').on('input', (e) => { this.settings.temperature = parseFloat(e.target.value); $('#chatterbox-temperature-value').text(this.settings.temperature); this.onSettingsChange(); }); $('#chatterbox-exaggeration').on('input', (e) => { this.settings.exaggeration = parseFloat(e.target.value); $('#chatterbox-exaggeration-value').text(this.settings.exaggeration); this.onSettingsChange(); }); $('#chatterbox-cfg-weight').on('input', (e) => { this.settings.cfg_weight = parseFloat(e.target.value); $('#chatterbox-cfg-weight-value').text(this.settings.cfg_weight); this.onSettingsChange(); }); $('#chatterbox-speed').on('input', (e) => { this.settings.speed_factor = parseFloat(e.target.value); $('#chatterbox-speed-value').text(this.settings.speed_factor); this.onSettingsChange(); }); // Seed $('#chatterbox-seed').on('change', (e) => { this.settings.seed = parseInt(e.target.value); this.onSettingsChange(); }); // Text splitting $('#chatterbox-split-text').on('change', (e) => { this.settings.split_text = e.target.checked; if (e.target.checked) { $('#chunk-size-row').show(); } else { $('#chunk-size-row').hide(); } this.onSettingsChange(); }); $('#chatterbox-chunk-size').on('change', (e) => { this.settings.chunk_size = parseInt(e.target.value); this.onSettingsChange(); }); // Output format $('#chatterbox-format').on('change', (e) => { this.settings.output_format = e.target.value; this.onSettingsChange(); }); } //#############################// // Store ST interface settings // //#############################// onSettingsChange() { // Save the updated settings saveTtsProviderSettings(); } //#########################// // Handle Reload button // //#########################// async onRefreshClick() { try { this.updateStatus('Processing'); await this.checkReady(); if (this.ready) { await this.fetchTtsVoiceObjects(); this.updateStatus('Ready'); } else { this.updateStatus('Offline'); } } catch (error) { console.error('Error during refresh:', error); this.updateStatus('Offline'); } } //##################// // Preview Voice // //##################// async previewTtsVoice(voiceId) { try { this.updateStatus('Processing'); const previewText = 'Hello! This is a preview of the selected voice.'; // Determine if this is a reference voice let isReferenceVoice = false; let actualVoiceId = voiceId; if (voiceId && voiceId.startsWith('ref_')) { isReferenceVoice = true; actualVoiceId = voiceId.substring(4); // Remove 'ref_' prefix } // Generate preview using the main TTS endpoint const requestBody = { text: previewText, voice_mode: isReferenceVoice ? 'clone' : 'predefined', temperature: this.settings.temperature, exaggeration: this.settings.exaggeration, cfg_weight: this.settings.cfg_weight, seed: this.settings.seed >= 0 ? this.settings.seed : Math.floor(Math.random() * 2147483648), // Use random seed if -1 speed_factor: this.settings.speed_factor, language: this.settings.language, split_text: false, // Don't split for preview output_format: this.settings.output_format, }; // Add voice-specific parameters if (isReferenceVoice) { requestBody.reference_audio_filename = actualVoiceId; } else { requestBody.predefined_voice_id = actualVoiceId; } const response = await fetch(`${this.settings.provider_endpoint}/tts`, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify(requestBody), }); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } // Get the audio blob and play it const audioBlob = await response.blob(); const audioUrl = URL.createObjectURL(audioBlob); const audio = new Audio(audioUrl); audio.addEventListener('ended', () => { URL.revokeObjectURL(audioUrl); this.updateStatus('Ready'); }); await audio.play(); } catch (error) { console.error('Error previewing voice:', error); this.updateStatus('Ready'); throw error; } } //#####################// // Get Voice Object // //#####################// async getVoice(voiceName) { // Ensure voices are loaded if (this.voices.length === 0) { await this.fetchTtsVoiceObjects(); } // Find the voice object by name or voice_id let match = this.voices.find(voice => voice.name === voiceName || voice.voice_id === voiceName || voice.display_name === voiceName, ); if (!match) { console.warn(`Voice not found: ${voiceName}`); // Check if it's a reference voice that wasn't in the list if (voiceName && voiceName.startsWith('ref_')) { const filename = voiceName.substring(4); return { name: `[Clone] ${filename}`, voice_id: voiceName, preview_url: null, lang: 'en', }; } // Return a default voice object return { name: voiceName || 'Default', voice_id: voiceName || this.settings.predefined_voice || 'S1', preview_url: null, lang: 'en', }; } return match; } //##################// // Generate TTS // //##################// async generateTts(inputText, voiceId) { try { this.updateStatus('Processing'); // Determine if this is a reference voice let isReferenceVoice = false; let actualVoiceId = voiceId; if (voiceId && voiceId.startsWith('ref_')) { isReferenceVoice = true; actualVoiceId = voiceId.substring(4); // Remove 'ref_' prefix } // Prepare the request body const requestBody = { text: inputText, voice_mode: isReferenceVoice ? 'clone' : 'predefined', temperature: this.settings.temperature, exaggeration: this.settings.exaggeration, cfg_weight: this.settings.cfg_weight, seed: this.settings.seed >= 0 ? this.settings.seed : Math.floor(Math.random() * 2147483648), // Use random seed if -1 speed_factor: this.settings.speed_factor, language: this.settings.language, split_text: this.settings.split_text, chunk_size: this.settings.chunk_size, output_format: this.settings.output_format, }; // Add voice-specific parameters if (isReferenceVoice) { requestBody.reference_audio_filename = actualVoiceId; } else { requestBody.predefined_voice_id = actualVoiceId || this.settings.predefined_voice; } console.log('Generating TTS with params:', requestBody); const response = await fetch(`${this.settings.provider_endpoint}/tts`, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Cache-Control': 'no-cache', }, body: JSON.stringify(requestBody), }); if (!response.ok) { const errorText = await response.text(); console.error('TTS generation error:', errorText); throw new Error(`HTTP ${response.status}: ${errorText}`); } this.updateStatus('Ready'); // Return the response directly - SillyTavern expects a Response object return response; } catch (error) { console.error('Error in generateTts:', error); this.updateStatus('Ready'); throw error; } } //######################// // Update Status // //######################// updateStatus(status) { const statusElement = document.getElementById('chatterbox-status'); if (statusElement) { statusElement.textContent = status; statusElement.className = status.toLowerCase(); } } }