Haay's picture
Upload 926 files
519a20c verified
import { saveTtsProviderSettings } from './index.js';
export { ChatterboxTtsProvider };
class ChatterboxTtsProvider {
//########//
// Config //
//########//
settings = {};
constructor() {
// Initialize with default settings
this.settings = {
provider_endpoint: this.settings.provider_endpoint || 'http://localhost:8004',
voice_mode: this.settings.voice_mode || 'predefined',
predefined_voice: this.settings.predefined_voice || 'S1',
reference_voice: this.settings.reference_voice || '',
temperature: this.settings.temperature || 0.8,
exaggeration: this.settings.exaggeration || 0.5,
cfg_weight: this.settings.cfg_weight || 0.5,
seed: this.settings.seed || -1,
speed_factor: this.settings.speed_factor || 1.0,
language: this.settings.language || 'en',
split_text: this.settings.split_text || true,
chunk_size: this.settings.chunk_size || 120,
output_format: this.settings.output_format || 'wav',
voiceMap: this.settings.voiceMap || {},
};
}
ready = false;
voices = [];
separator = '. ';
audioElement = document.createElement('audio');
languageLabels = {
'English': 'en',
'Spanish': 'es',
'French': 'fr',
'German': 'de',
'Italian': 'it',
'Portuguese': 'pt',
'Polish': 'pl',
'Turkish': 'tr',
'Russian': 'ru',
'Dutch': 'nl',
'Czech': 'cs',
'Arabic': 'ar',
'Chinese': 'zh-cn',
'Japanese': 'ja',
'Korean': 'ko',
'Hindi': 'hi',
};
get settingsHtml() {
let html = `<div class="chatterbox-settings-container">
<div class="chatterbox-settings-header">
<h3>Chatterbox TTS Settings</h3>
<div class="status-indicator">
Status: <span id="chatterbox-status" class="offline">Offline</span>
</div>
</div>`;
// Server endpoint
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-endpoint">Server Endpoint:</label>
<input id="chatterbox-endpoint" type="text" class="text_pole" value="${this.settings.provider_endpoint}" />
</div>`;
// Language selection
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-language">Language:</label>
<select id="chatterbox-language">`;
for (let language in this.languageLabels) {
html += `<option value="${this.languageLabels[language]}" ${this.languageLabels[language] === this.settings.language ? 'selected' : ''}>${language}</option>`;
}
html += `</select>
</div>`;
// Generation parameters
html += `<div class="chatterbox-params-section">
<h4>Generation Parameters</h4>`;
// Temperature
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-temperature">Temperature: <span id="chatterbox-temperature-value">${this.settings.temperature}</span></label>
<input id="chatterbox-temperature" type="range" min="0" max="1" step="0.1" value="${this.settings.temperature}" />
</div>`;
// Exaggeration
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-exaggeration">Exaggeration: <span id="chatterbox-exaggeration-value">${this.settings.exaggeration}</span></label>
<input id="chatterbox-exaggeration" type="range" min="0" max="2" step="0.1" value="${this.settings.exaggeration}" />
</div>`;
// CFG Weight
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-cfg-weight">CFG Weight: <span id="chatterbox-cfg-weight-value">${this.settings.cfg_weight}</span></label>
<input id="chatterbox-cfg-weight" type="range" min="0" max="1" step="0.1" value="${this.settings.cfg_weight}" />
</div>`;
// Speed Factor
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-speed">Speed Factor: <span id="chatterbox-speed-value">${this.settings.speed_factor}</span></label>
<input id="chatterbox-speed" type="range" min="0.5" max="2" step="0.1" value="${this.settings.speed_factor}" />
</div>`;
// Seed
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-seed">Seed (-1 for random):</label>
<input id="chatterbox-seed" class="text_pole" type="number" min="-1" value="${this.settings.seed}" />
</div>`;
// Text chunking
html += `<div class="chatterbox-setting-row">
<label class="checkbox_label">
<input type="checkbox" id="chatterbox-split-text" ${this.settings.split_text ? 'checked' : ''} />
Split long texts into chunks
</label>
</div>`;
// Chunk size
html += `<div class="chatterbox-setting-row" id="chunk-size-row" ${!this.settings.split_text ? 'style="display: none;"' : ''}>
<label for="chatterbox-chunk-size">Chunk Size:</label>
<input id="chatterbox-chunk-size" class="text_pole" type="number" min="50" max="500" value="${this.settings.chunk_size}" />
</div>`;
// Output format
html += `<div class="chatterbox-setting-row">
<label for="chatterbox-format">Output Format:</label>
<select id="chatterbox-format">
<option value="wav" ${this.settings.output_format === 'wav' ? 'selected' : ''}>WAV</option>
<option value="opus" ${this.settings.output_format === 'opus' ? 'selected' : ''}>Opus</option>
</select>
</div>`;
html += '</div>'; // End params section
// Footer with links
html += `<div class="chatterbox-footer">
<a href="${this.settings.provider_endpoint}" target="_blank">Chatterbox Web UI</a> |
<a href="https://github.com/devnen/Chatterbox-TTS-Server" target="_blank">Documentation</a>
</div>`;
html += '</div>'; // End container
// Add CSS styles
html += `<style>
.chatterbox-settings-container {
padding: 10px;
}
.chatterbox-settings-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 15px;
}
.chatterbox-settings-header h3 {
margin: 0;
}
.chatterbox-settings-container .status-indicator {
font-weight: bold;
}
#chatterbox-status.ready { color: #4CAF50; }
#chatterbox-status.offline { color: #f44336; }
#chatterbox-status.processing { color: #2196F3; }
.chatterbox-setting-row {
margin-bottom: 10px;
display: flex;
align-items: center;
gap: 10px;
}
.chatterbox-setting-row label {
flex: 0 0 150px;
}
.chatterbox-setting-row label.checkbox_label {
flex-basis: auto;
}
.chatterbox-setting-row input[type="text"],
.chatterbox-setting-row input[type="number"],
.chatterbox-setting-row select {
flex: 1;
}
.chatterbox-setting-row input[type="range"] {
flex: 1;
}
.chatterbox-params-section {
margin-top: 15px;
padding-top: 15px;
border-top: 1px solid #ccc;
}
.chatterbox-params-section h4 {
margin-top: 0;
margin-bottom: 10px;
}
.chatterbox-footer {
margin-top: 15px;
padding-top: 15px;
border-top: 1px solid #ccc;
text-align: center;
font-size: 0.9em;
}
</style>`;
return html;
}
//######################//
// Startup & Initialize //
//######################//
async loadSettings(settings) {
this.updateStatus('Offline');
if (Object.keys(settings).length === 0) {
console.info('Using default Chatterbox TTS Provider settings');
} else {
// Populate settings with provided values
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
}
}
}
// Update UI elements
this.updateUIFromSettings();
console.debug('ChatterboxTTS: Settings loaded');
try {
// Check if TTS provider is ready
await this.checkReady();
if (this.ready) {
// Fetch all voice types for the voice map
await this.fetchTtsVoiceObjects();
this.updateStatus('Ready');
}
this.setupEventListeners();
} catch (error) {
console.error('Error loading Chatterbox settings:', error);
this.updateStatus('Offline');
}
}
updateUIFromSettings() {
$('#chatterbox-endpoint').val(this.settings.provider_endpoint);
$('#chatterbox-language').val(this.settings.language);
$('#chatterbox-temperature').val(this.settings.temperature);
$('#chatterbox-temperature-value').text(this.settings.temperature);
$('#chatterbox-exaggeration').val(this.settings.exaggeration);
$('#chatterbox-exaggeration-value').text(this.settings.exaggeration);
$('#chatterbox-cfg-weight').val(this.settings.cfg_weight);
$('#chatterbox-cfg-weight-value').text(this.settings.cfg_weight);
$('#chatterbox-speed').val(this.settings.speed_factor);
$('#chatterbox-speed-value').text(this.settings.speed_factor);
$('#chatterbox-seed').val(this.settings.seed);
$('#chatterbox-split-text').prop('checked', this.settings.split_text);
$('#chatterbox-chunk-size').val(this.settings.chunk_size);
$('#chatterbox-format').val(this.settings.output_format);
// Show/hide chunk size based on split text
if (this.settings.split_text) {
$('#chunk-size-row').show();
} else {
$('#chunk-size-row').hide();
}
}
//##############################//
// Check Server is Available //
//##############################//
async checkReady() {
try {
const response = await fetch(`${this.settings.provider_endpoint}/api/ui/initial-data`);
if (!response.ok) {
throw new Error(`HTTP Error Response: ${response.status} ${response.statusText}`);
}
const data = await response.json();
// Check if we got valid data
if (data) {
this.ready = true;
console.log('Chatterbox TTS service is ready.');
} else {
this.ready = false;
console.log('Chatterbox TTS service returned invalid data.');
}
} catch (error) {
console.error('Error checking Chatterbox TTS service readiness:', error);
this.ready = false;
}
}
//######################//
// Get Available Voices //
//######################//
async fetchTtsVoiceObjects() {
try {
// Always fetch predefined voices
const predefinedResponse = await fetch(`${this.settings.provider_endpoint}/get_predefined_voices`);
if (!predefinedResponse.ok) {
throw new Error(`HTTP ${predefinedResponse.status}: ${predefinedResponse.statusText}`);
}
const predefinedData = await predefinedResponse.json();
// Transform predefined voices
const predefinedVoices = predefinedData.map(voice => ({
name: voice.display_name,
voice_id: voice.voice_id || voice.filename,
preview_url: null,
lang: voice.language || 'en',
}));
// Always try to fetch reference voices
let referenceVoices = [];
try {
const refResponse = await fetch(`${this.settings.provider_endpoint}/get_reference_files`);
if (refResponse.ok) {
const refData = await refResponse.json();
referenceVoices = refData.map(filename => ({
name: `[Clone] ${filename}`,
voice_id: `ref_${filename}`,
preview_url: null,
lang: 'en',
}));
}
} catch (error) {
console.warn('Failed to fetch reference voices:', error);
}
// Combine all voices
this.voices = [...predefinedVoices, ...referenceVoices];
console.log(`Loaded ${this.voices.length} voices (${predefinedVoices.length} predefined, ${referenceVoices.length} reference)`);
return this.voices;
} catch (error) {
console.error('Error fetching Chatterbox voices:', error);
this.voices = [];
return [];
}
}
// Alias for internal use
async fetchVoices() {
return this.fetchTtsVoiceObjects();
}
//###########################//
// Setup Event Listeners //
//###########################//
setupEventListeners() {
// Server endpoint change
$('#chatterbox-endpoint').on('input', () => {
this.settings.provider_endpoint = $('#chatterbox-endpoint').val();
this.onSettingsChange();
});
// Language
$('#chatterbox-language').on('change', (e) => {
this.settings.language = e.target.value;
this.onSettingsChange();
});
// Parameter sliders
$('#chatterbox-temperature').on('input', (e) => {
this.settings.temperature = parseFloat(e.target.value);
$('#chatterbox-temperature-value').text(this.settings.temperature);
this.onSettingsChange();
});
$('#chatterbox-exaggeration').on('input', (e) => {
this.settings.exaggeration = parseFloat(e.target.value);
$('#chatterbox-exaggeration-value').text(this.settings.exaggeration);
this.onSettingsChange();
});
$('#chatterbox-cfg-weight').on('input', (e) => {
this.settings.cfg_weight = parseFloat(e.target.value);
$('#chatterbox-cfg-weight-value').text(this.settings.cfg_weight);
this.onSettingsChange();
});
$('#chatterbox-speed').on('input', (e) => {
this.settings.speed_factor = parseFloat(e.target.value);
$('#chatterbox-speed-value').text(this.settings.speed_factor);
this.onSettingsChange();
});
// Seed
$('#chatterbox-seed').on('change', (e) => {
this.settings.seed = parseInt(e.target.value);
this.onSettingsChange();
});
// Text splitting
$('#chatterbox-split-text').on('change', (e) => {
this.settings.split_text = e.target.checked;
if (e.target.checked) {
$('#chunk-size-row').show();
} else {
$('#chunk-size-row').hide();
}
this.onSettingsChange();
});
$('#chatterbox-chunk-size').on('change', (e) => {
this.settings.chunk_size = parseInt(e.target.value);
this.onSettingsChange();
});
// Output format
$('#chatterbox-format').on('change', (e) => {
this.settings.output_format = e.target.value;
this.onSettingsChange();
});
}
//#############################//
// Store ST interface settings //
//#############################//
onSettingsChange() {
// Save the updated settings
saveTtsProviderSettings();
}
//#########################//
// Handle Reload button //
//#########################//
async onRefreshClick() {
try {
this.updateStatus('Processing');
await this.checkReady();
if (this.ready) {
await this.fetchTtsVoiceObjects();
this.updateStatus('Ready');
} else {
this.updateStatus('Offline');
}
} catch (error) {
console.error('Error during refresh:', error);
this.updateStatus('Offline');
}
}
//##################//
// Preview Voice //
//##################//
async previewTtsVoice(voiceId) {
try {
this.updateStatus('Processing');
const previewText = 'Hello! This is a preview of the selected voice.';
// Determine if this is a reference voice
let isReferenceVoice = false;
let actualVoiceId = voiceId;
if (voiceId && voiceId.startsWith('ref_')) {
isReferenceVoice = true;
actualVoiceId = voiceId.substring(4); // Remove 'ref_' prefix
}
// Generate preview using the main TTS endpoint
const requestBody = {
text: previewText,
voice_mode: isReferenceVoice ? 'clone' : 'predefined',
temperature: this.settings.temperature,
exaggeration: this.settings.exaggeration,
cfg_weight: this.settings.cfg_weight,
seed: this.settings.seed >= 0 ? this.settings.seed : Math.floor(Math.random() * 2147483648), // Use random seed if -1
speed_factor: this.settings.speed_factor,
language: this.settings.language,
split_text: false, // Don't split for preview
output_format: this.settings.output_format,
};
// Add voice-specific parameters
if (isReferenceVoice) {
requestBody.reference_audio_filename = actualVoiceId;
} else {
requestBody.predefined_voice_id = actualVoiceId;
}
const response = await fetch(`${this.settings.provider_endpoint}/tts`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
// Get the audio blob and play it
const audioBlob = await response.blob();
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.addEventListener('ended', () => {
URL.revokeObjectURL(audioUrl);
this.updateStatus('Ready');
});
await audio.play();
} catch (error) {
console.error('Error previewing voice:', error);
this.updateStatus('Ready');
throw error;
}
}
//#####################//
// Get Voice Object //
//#####################//
async getVoice(voiceName) {
// Ensure voices are loaded
if (this.voices.length === 0) {
await this.fetchTtsVoiceObjects();
}
// Find the voice object by name or voice_id
let match = this.voices.find(voice =>
voice.name === voiceName ||
voice.voice_id === voiceName ||
voice.display_name === voiceName,
);
if (!match) {
console.warn(`Voice not found: ${voiceName}`);
// Check if it's a reference voice that wasn't in the list
if (voiceName && voiceName.startsWith('ref_')) {
const filename = voiceName.substring(4);
return {
name: `[Clone] ${filename}`,
voice_id: voiceName,
preview_url: null,
lang: 'en',
};
}
// Return a default voice object
return {
name: voiceName || 'Default',
voice_id: voiceName || this.settings.predefined_voice || 'S1',
preview_url: null,
lang: 'en',
};
}
return match;
}
//##################//
// Generate TTS //
//##################//
async generateTts(inputText, voiceId) {
try {
this.updateStatus('Processing');
// Determine if this is a reference voice
let isReferenceVoice = false;
let actualVoiceId = voiceId;
if (voiceId && voiceId.startsWith('ref_')) {
isReferenceVoice = true;
actualVoiceId = voiceId.substring(4); // Remove 'ref_' prefix
}
// Prepare the request body
const requestBody = {
text: inputText,
voice_mode: isReferenceVoice ? 'clone' : 'predefined',
temperature: this.settings.temperature,
exaggeration: this.settings.exaggeration,
cfg_weight: this.settings.cfg_weight,
seed: this.settings.seed >= 0 ? this.settings.seed : Math.floor(Math.random() * 2147483648), // Use random seed if -1
speed_factor: this.settings.speed_factor,
language: this.settings.language,
split_text: this.settings.split_text,
chunk_size: this.settings.chunk_size,
output_format: this.settings.output_format,
};
// Add voice-specific parameters
if (isReferenceVoice) {
requestBody.reference_audio_filename = actualVoiceId;
} else {
requestBody.predefined_voice_id = actualVoiceId || this.settings.predefined_voice;
}
console.log('Generating TTS with params:', requestBody);
const response = await fetch(`${this.settings.provider_endpoint}/tts`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache',
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
const errorText = await response.text();
console.error('TTS generation error:', errorText);
throw new Error(`HTTP ${response.status}: ${errorText}`);
}
this.updateStatus('Ready');
// Return the response directly - SillyTavern expects a Response object
return response;
} catch (error) {
console.error('Error in generateTts:', error);
this.updateStatus('Ready');
throw error;
}
}
//######################//
// Update Status //
//######################//
updateStatus(status) {
const statusElement = document.getElementById('chatterbox-status');
if (statusElement) {
statusElement.textContent = status;
statusElement.className = status.toLowerCase();
}
}
}