Spaces:
Running
Running
<html lang="en" class="dark"> <!-- Default to dark mode class --> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Dia TTS Server | Text-to-Dialogue</title> | |
<link rel="icon" href="/static/favicon.ico" type="image/x-icon"> | |
<!-- Tailwind CSS (CDN for simplicity, processes styles in <style type="text/tailwindcss"> below) --> | |
<script src="https://cdn.tailwindcss.com"></script> | |
<script> | |
// Configure Tailwind CSS | |
tailwind.config = { | |
darkMode: 'class', // Enable class-based dark mode | |
theme: { | |
extend: { | |
colors: { | |
// Define color palettes used in style.css | |
// Light Mode Colors (Examples - Adjust as needed) | |
gray: { 50: '#f9fafb', 100: '#f3f4f6', 200: '#e5e7eb', 300: '#d1d5db', 400: '#9ca3af', 500: '#6b7280', 600: '#4b5563', 700: '#374151', 800: '#1f2937', 900: '#111827' }, | |
sky: { 50: '#f0f9ff', 100: '#e0f2fe', 200: '#bae6fd', 300: '#7dd3fc', 400: '#38bdf8', 500: '#0ea5e9', 600: '#0284c7', 700: '#0369a1', 800: '#075985', 900: '#0c4a6e' }, | |
indigo: { 50: '#eef2ff', 100: '#e0e7ff', 200: '#c7d2fe', 300: '#a5b4fc', 400: '#818cf8', 500: '#6366f1', 600: '#4f46e5', 700: '#4338ca', 800: '#3730a3', 900: '#312e81' }, | |
red: { 100: '#fee2e2', 300: '#fca5a5', 500: '#ef4444', 600: '#dc2626', 800: '#991b1b', 900: '#7f1d1d' }, | |
green: { 100: '#dcfce7', 300: '#86efac', 500: '#22c55e', 800: '#166534', 900: '#14532d' }, | |
yellow: { 100: '#fef9c3', 300: '#fcd34d', 500: '#eab308', 700: '#b45309', 900: '#78350f' }, | |
// Dark Mode Colors (Copied from previous inline config) | |
primary: { 50: '#f0f9ff', 100: '#e0f2fe', 200: '#bae6fd', 300: '#7dd3fc', 400: '#38bdf8', 500: '#0ea5e9', 600: '#0284c7', 700: '#0369a1', 800: '#075985', 900: '#0c4a6e' }, | |
purple: { 50: '#faf5ff', 100: '#f3e8ff', 200: '#e9d5ff', 300: '#d8b4fe', 400: '#c084fc', 500: '#a855f7', 600: '#9333ea', 700: '#7e22ce', 800: '#6b21a8', 900: '#581c87' }, | |
dark: { 50: '#f9fafb', 100: '#f3f4f6', 200: '#e5e7eb', 300: '#d1d5db', 400: '#9ca3af', 500: '#6b7280', 600: '#4b5563', 700: '#374151', 800: '#1f2937', 900: '#111827', 950: '#030712', 1000: '#0f1729' } | |
} | |
} | |
} | |
} | |
</script> | |
<!-- Removed External Stylesheet Link: <link rel="stylesheet" href="/ui/style.css"> --> | |
<!-- Wavesurfer for audio visualization --> | |
<script src="https://unpkg.com/wavesurfer.js@7"></script> | |
<style type="text/tailwindcss"> | |
/* ui/style.css */ | |
/* Import Tailwind base, components, and utilities */ | |
@tailwind base; | |
@tailwind components; | |
@tailwind utilities; | |
/* Define custom components/utilities */ | |
@layer components { | |
/* Base styles (Light Mode) */ | |
.body-base { | |
@apply h-full bg-gray-100 text-gray-900; | |
} | |
.nav-base { | |
@apply bg-gradient-to-r from-white to-sky-100 border-b border-sky-200 shadow-md; | |
} | |
.nav-link { | |
@apply text-sky-700 hover:text-sky-900 px-3 py-2 rounded-md text-sm font-medium; | |
} | |
.title-link { | |
@apply text-gray-900 text-xl font-bold; | |
} | |
.card-base { | |
@apply bg-white shadow-lg rounded-lg overflow-hidden border border-gray-200; | |
} | |
.card-header { | |
@apply text-lg font-medium text-gray-900 mb-4; | |
} | |
.card-footer { | |
@apply bg-gray-50 px-6 py-4 flex items-center justify-between border-t border-gray-200; | |
} | |
.label-base { | |
@apply block text-sm font-medium text-gray-700 mb-1; | |
} | |
.input-base { | |
@apply block w-full rounded-md border-gray-300 shadow-sm focus:border-sky-500 focus:ring-sky-500 sm:text-sm px-3 py-2 bg-white text-gray-900 placeholder-gray-400; | |
} | |
.textarea-base { | |
@apply input-base; | |
/* Inherit base input styles */ | |
} | |
.select-base { | |
@apply input-base appearance-none pr-8; | |
/* Add padding for arrow */ | |
/* Consider adding a background SVG for the dropdown arrow */ | |
} | |
.button-base { | |
@apply inline-flex items-center justify-center px-4 py-2 border border-transparent rounded-md shadow-sm text-sm font-medium focus:outline-none focus:ring-2 focus:ring-offset-2 transition-colors disabled:opacity-50 disabled:cursor-not-allowed whitespace-nowrap flex-shrink-0; | |
/* Added whitespace-nowrap and flex-shrink-0 for button text */ | |
} | |
.btn-primary { | |
@apply button-base bg-sky-600 text-white hover:bg-sky-700 focus:ring-sky-500; | |
} | |
.btn-secondary { | |
@apply button-base bg-gray-200 text-gray-700 border-gray-300 hover:bg-gray-300 focus:ring-indigo-500; | |
/* Example secondary */ | |
} | |
.btn-danger { | |
@apply button-base bg-red-600 text-white hover:bg-red-700 focus:ring-red-500; | |
} | |
.btn-purple { | |
@apply button-base bg-purple-600 text-white hover:bg-purple-700 focus:ring-purple-500; | |
} | |
.slider-base { | |
@apply w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer; | |
/* Need to style the thumb separately per browser */ | |
} | |
.slider-thumb { | |
/* Basic thumb styling */ | |
@apply appearance-none w-5 h-5 bg-sky-600 rounded-full cursor-pointer; | |
} | |
.radio-label { | |
@apply flex items-center space-x-2 cursor-pointer border border-gray-300 bg-white hover:border-sky-400 p-3 rounded-md transition-colors; | |
} | |
.radio-label-text { | |
@apply text-gray-700; | |
} | |
/* Apply checked styles directly using peer-checked utility on the container/text span */ | |
/* .radio-label input:checked+span { | |
@apply text-sky-600 font-semibold; | |
} | |
.radio-label-checked { | |
@apply border-sky-500 ring-2 ring-sky-500; | |
} */ | |
/* Replaced these custom classes with Tailwind peer utilities in the HTML */ | |
.preset-button { | |
@apply button-base bg-indigo-100 text-indigo-700 border-indigo-200 hover:bg-indigo-200 focus:ring-indigo-500 text-xs px-3 py-1; | |
} | |
.notification-base { | |
@apply px-4 py-3 rounded relative shadow-md flex items-center mb-3; | |
/* Reduced margin bottom */ | |
} | |
.notification-success { | |
@apply notification-base bg-green-100 border border-green-300 text-green-800; | |
} | |
.notification-error { | |
@apply notification-base bg-red-100 border border-red-300 text-red-800; | |
} | |
.notification-warning { | |
@apply notification-base bg-yellow-100 border border-yellow-300 text-yellow-800; | |
} | |
.notification-info { | |
/* Added info style */ | |
@apply notification-base bg-sky-100 border border-sky-300 text-sky-800; | |
} | |
.code-inline { | |
@apply bg-gray-200 px-1 rounded text-sm font-mono text-gray-800; | |
} | |
.tooltip { | |
/* Basic tooltip styling */ | |
@apply absolute hidden group-hover:block bg-gray-700 text-white text-xs rounded py-1 px-2 z-10 -mt-8; | |
} | |
.loading-overlay-base { | |
@apply fixed inset-0 bg-gray-600 bg-opacity-75 flex items-center justify-center z-50 transition-opacity duration-300; | |
} | |
.loading-box-base { | |
@apply bg-white p-6 rounded-lg shadow-xl flex flex-col items-center border border-gray-300; | |
} | |
.loading-spinner { | |
@apply animate-spin h-10 w-10 text-sky-600 mb-4; | |
} | |
.loading-text { | |
@apply text-gray-900 text-lg mb-2; | |
} | |
.loading-status { | |
@apply text-gray-600 text-sm mb-4 text-center max-w-xs; | |
/* Limit width */ | |
} | |
.waveform-container { | |
@apply w-full h-24 bg-gray-100 rounded; | |
} | |
.audio-player-card { | |
@apply card-base mt-8; | |
/* Margin top for spacing */ | |
} | |
.audio-player-controls { | |
@apply flex flex-wrap items-center justify-between gap-4; | |
} | |
.audio-player-buttons { | |
@apply flex items-center space-x-2 sm:space-x-4; | |
/* Adjust spacing */ | |
} | |
.audio-player-info { | |
@apply text-sm text-gray-600 text-right; | |
} | |
.theme-switch { | |
@apply p-2 rounded-md text-gray-600 hover:bg-gray-200 hover:text-gray-800 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:ring-offset-2; | |
} | |
/* Dark Mode Overrides using 'dark:' prefix */ | |
.dark .body-base { | |
@apply bg-[#0f1729] text-white; | |
/* Original dark bg */ | |
} | |
.dark .nav-base { | |
@apply bg-gradient-to-r from-dark-900 to-purple-900 border-b border-purple-800 shadow-lg; | |
} | |
.dark .nav-link { | |
@apply text-primary-300 hover:text-white; | |
} | |
.dark .title-link { | |
@apply text-white; | |
} | |
.dark .card-base { | |
@apply bg-dark-800 border border-dark-700; | |
} | |
.dark .card-header { | |
@apply text-white; | |
} | |
.dark .card-footer { | |
@apply bg-dark-900 border-t border-dark-700; | |
} | |
.dark .label-base { | |
@apply text-gray-300; | |
/* Lighter gray for dark */ | |
} | |
.dark .input-base { | |
@apply border-dark-600 bg-dark-700 text-white placeholder-gray-500 focus:ring-offset-dark-800; | |
} | |
.dark .select-base { | |
/* Dark mode arrow styling if needed */ | |
} | |
.dark .btn-primary { | |
@apply bg-primary-600 text-white hover:bg-primary-700 focus:ring-primary-500 focus:ring-offset-dark-800; | |
} | |
.dark .btn-secondary { | |
@apply bg-dark-700 text-white border-dark-600 hover:bg-dark-600 focus:ring-purple-500 focus:ring-offset-dark-800; | |
} | |
.dark .btn-danger { | |
@apply bg-red-600 text-white hover:bg-red-700 focus:ring-red-500 focus:ring-offset-dark-800; | |
} | |
.dark .btn-purple { | |
@apply bg-purple-600 text-white hover:bg-purple-700 focus:ring-purple-500 focus:ring-offset-dark-800; | |
} | |
.dark .slider-base { | |
@apply bg-dark-600; | |
} | |
.dark .slider-thumb { | |
@apply bg-primary-500; | |
} | |
.dark .radio-label { | |
@apply border-dark-600 bg-dark-800 hover:border-primary-400; | |
} | |
.dark .radio-label-text { | |
@apply text-gray-300; | |
} | |
/* Apply checked styles directly using peer-checked utility on the container/text span */ | |
/* .dark .radio-label input:checked+span { | |
@apply text-primary-400; | |
} | |
.dark .radio-label-checked { | |
@apply border-primary-500 ring-primary-500; | |
} */ | |
/* Replaced these custom classes with Tailwind peer utilities in the HTML */ | |
.dark .preset-button { | |
@apply bg-indigo-900 text-indigo-200 border-indigo-700 hover:bg-indigo-800 focus:ring-indigo-500 focus:ring-offset-dark-800; | |
} | |
.dark .notification-success { | |
@apply notification-base bg-green-900 border border-green-700 text-green-100; | |
} | |
.dark .notification-error { | |
@apply notification-base bg-red-900 border border-red-700 text-red-100; | |
} | |
.dark .notification-warning { | |
@apply notification-base bg-yellow-900 border border-yellow-700 text-yellow-100; | |
} | |
.dark .notification-info { | |
/* Added info style */ | |
@apply notification-base bg-sky-900 border border-sky-700 text-sky-100; | |
} | |
.dark .code-inline { | |
@apply bg-dark-900 text-purple-300; | |
} | |
.dark .tooltip { | |
@apply bg-dark-950; | |
} | |
.dark .loading-overlay-base { | |
@apply bg-dark-900 bg-opacity-75; | |
} | |
.dark .loading-box-base { | |
@apply bg-dark-800 border border-dark-700; | |
} | |
.dark .loading-spinner { | |
@apply text-primary-500; | |
} | |
.dark .loading-text { | |
@apply text-white; | |
} | |
.dark .loading-status { | |
@apply text-gray-400; | |
} | |
.dark .waveform-container { | |
@apply bg-dark-900; | |
} | |
.dark .audio-player-info { | |
@apply text-purple-300; | |
} | |
.dark .theme-switch { | |
@apply text-gray-400 hover:bg-dark-700 hover:text-white focus:ring-offset-dark-900; | |
} | |
} | |
/* Specific slider thumb styling per browser */ | |
/* Apply these within the <style> tag as they target pseudo-elements */ | |
input[type="range"].slider-base::-webkit-slider-thumb { | |
@apply slider-thumb; | |
} | |
input[type="range"].slider-base::-moz-range-thumb { | |
@apply slider-thumb; | |
} | |
/* Dark mode thumbs need specific overrides if needed */ | |
.dark input[type="range"].slider-base::-webkit-slider-thumb { | |
/* Apply dark mode thumb styles directly */ | |
background-color: theme('colors.primary.500'); | |
/* Replaced @apply dark:slider-thumb */ | |
/* Inherit other base thumb styles if needed (like size, border-radius) or re-apply */ | |
@apply appearance-none w-5 h-5 rounded-full cursor-pointer; | |
} | |
.dark input[type="range"].slider-base::-moz-range-thumb { | |
/* Apply dark mode thumb styles directly */ | |
background-color: theme('colors.primary.500'); | |
/* Replaced @apply dark:slider-thumb */ | |
/* Inherit other base thumb styles if needed or re-apply */ | |
@apply appearance-none w-5 h-5 rounded-full cursor-pointer; | |
} | |
</style> | |
</head> | |
<body class="body-base"> | |
<div class="min-h-full"> | |
<!-- Navigation --> | |
<nav class="nav-base"> | |
<div class="mx-auto max-w-7xl px-4 sm:px-6 lg:px-8"> | |
<div class="flex h-16 items-center justify-between"> | |
<div class="flex items-center"> | |
<div class="flex-shrink-0"> | |
<!-- Make title clickable --> | |
<a href="/" class="title-link">Dia TTS Server</a> | |
</div> | |
</div> | |
<div class="flex items-center space-x-2 sm:space-x-4"> | |
<a href="/docs" target="_blank" class="nav-link">API Docs</a> | |
<!-- Theme Toggle Button --> | |
<button id="theme-toggle-btn" type="button" | |
class="relative inline-flex items-center p-1 rounded-full bg-gray-200 dark:bg-dark-700 h-8 w-16 transition-colors" | |
title="Toggle light/dark mode"> | |
<span class="sr-only">Toggle theme</span> | |
<span class="absolute inset-0 rounded-full transition-colors"></span> | |
<!-- Toggle thumb with icons --> | |
<span | |
class="relative rounded-full w-6 h-6 bg-white dark:bg-purple-600 transform transition-transform duration-200 ease-in-out translate-x-0 dark:translate-x-8 flex items-center justify-center shadow-md"> | |
<!-- Sun icon (for light mode) --> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor" | |
class="w-4 h-4 text-yellow-500 dark:opacity-0 transition-opacity"> | |
<path | |
d="M10 2a.75.75 0 0 1 .75.75v1.5a.75.75 0 0 1-1.5 0v-1.5A.75.75 0 0 1 10 2ZM10 15a.75.75 0 0 1 .75.75v1.5a.75.75 0 0 1-1.5 0v-1.5A.75.75 0 0 1 10 15ZM10 7a3 3 0 1 0 0 6 3 3 0 0 0 0-6Z" /> | |
</svg> | |
<!-- Moon icon (for dark mode) --> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor" | |
class="w-4 h-4 text-white opacity-0 dark:opacity-100 transition-opacity"> | |
<path | |
d="M7.455 1.75A8.5 8.5 0 0 1 18.25 12.55a8.5 8.5 0 0 1-8.46 8.46A8.5 8.5 0 0 1 1.75 12.55a8.5 8.5 0 0 1 5.705-10.8Z" /> | |
</svg> | |
</span> | |
</button> | |
</div> | |
</div> | |
</div> | |
</nav> | |
<!-- Main content --> | |
<main> | |
<div class="mx-auto max-w-7xl px-4 py-8 sm:px-6 lg:px-8"> | |
<!-- Notification area --> | |
<div id="notification-area" class="mb-6 space-y-3"> | |
{% if error %} | |
<div class="notification-error" role="alert"> | |
<svg class="h-5 w-5 text-red-500 mr-2 flex-shrink-0" viewBox="0 0 20 20" fill="currentColor"> | |
<path fill-rule="evenodd" | |
d="M10 18a8 8 0 100-16 8 8 0 000 16zM8.707 7.293a1 1 0 00-1.414 1.414L8.586 10l-1.293 1.293a1 1 0 101.414 1.414L10 11.414l1.293 1.293a1 1 0 001.414-1.414L11.414 10l1.293-1.293a1 1 0 00-1.414-1.414L10 8.586 8.707 7.293z" | |
clip-rule="evenodd" /> | |
</svg> | |
<span class="block sm:inline">{{ error }}</span> | |
</div> | |
{% endif %} | |
{% if success %} | |
<div class="notification-success" role="alert"> | |
<svg class="h-5 w-5 text-green-500 mr-2 flex-shrink-0" viewBox="0 0 20 20" fill="currentColor"> | |
<path fill-rule="evenodd" | |
d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-9.293a1 1 0 00-1.414-1.414L9 10.586 7.707 9.293a1 1 0 00-1.414 1.414l2 2a1 1 0 001.414 0l4-4z" | |
clip-rule="evenodd" /> | |
</svg> | |
<span class="block sm:inline">{{ success }}</span> | |
</div> | |
{% endif %} | |
</div> | |
<!-- TTS form --> | |
<div class="card-base"> | |
<form id="tts-form" action="/web/generate" method="post" class="flex flex-col"> | |
<div class="p-6"> | |
<h2 class="card-header">Generate Speech with Dia</h2> | |
<!-- Text input --> | |
<div class="mb-6"> | |
<label for="text" class="label-base">Text to speak</label> | |
<p class="text-xs text-purple-500 dark:text-purple-300 mb-2"> | |
Use <code class="code-inline">[S1]</code> and <code class="code-inline">[S2]</code> | |
tags for speaker turns. Add non-verbals like <code | |
class="code-inline">(laughs)</code>. | |
</p> | |
<div class="relative"> | |
<textarea name="text" id="text" rows="5" maxlength="8192" class="textarea-base" | |
placeholder="Example: [S1] Hello there! [S2] Hi! How are you? [S1] I'm doing well, thanks. (laughs)" | |
required>{{ submitted_text if submitted_text else "" }}</textarea> | |
<div class="absolute bottom-2 right-2 text-xs text-gray-500 dark:text-purple-300"> | |
<span id="char-count">0</span> / 8192 | |
</div> | |
</div> | |
</div> | |
<!-- Voice Mode Selection --> | |
<div class="mb-6"> | |
<label class="label-base mb-2">Voice Mode</label> | |
<div class="grid grid-cols-1 md:grid-cols-2 gap-4"> | |
<!-- Combined Dialogue / Single Speaker Mode --> | |
<label | |
class="radio-label peer-checked:border-sky-500 peer-checked:dark:border-primary-500 peer-checked:ring-2 peer-checked:ring-sky-500 peer-checked:dark:ring-primary-500"> | |
<input type="radio" name="voice_mode" value="dialogue" class="hidden peer" {% if | |
submitted_voice_mode=='dialogue' or not submitted_voice_mode %}checked{% | |
endif %} onchange="toggleCloneOptions()"> | |
<span | |
class="radio-label-text peer-checked:text-sky-600 dark:peer-checked:text-primary-400 peer-checked:font-semibold"> | |
Single / Dialogue (Use [S1]/[S2]) | |
</span> | |
</label> | |
<!-- Clone Mode --> | |
<label | |
class="radio-label peer-checked:border-sky-500 peer-checked:dark:border-primary-500 peer-checked:ring-2 peer-checked:ring-sky-500 peer-checked:dark:ring-primary-500"> | |
<input type="radio" name="voice_mode" value="clone" class="hidden peer" {% if | |
submitted_voice_mode=='clone' %}checked{% endif %} | |
onchange="toggleCloneOptions()"> | |
<span | |
class="radio-label-text peer-checked:text-sky-600 dark:peer-checked:text-primary-400 peer-checked:font-semibold"> | |
Voice Clone (from Reference) | |
</span> | |
</label> | |
</div> | |
</div> | |
<!-- Presets Section --> | |
<div class="mb-6"> | |
<label class="label-base mb-2">Load Example Preset</label> | |
<div id="presets-container" class="flex flex-wrap gap-2"> | |
{% if presets %} | |
{% for preset in presets %} | |
<button type="button" id="preset-btn-{{ loop.index0 }}" class="preset-button" | |
title="Load '{{ preset.name }}' text and settings"> | |
{{ preset.name }} | |
</button> | |
{% endfor %} | |
{% else %} | |
<p class="text-sm text-gray-500 dark:text-gray-400">No presets loaded. Check | |
presets.yaml.</p> | |
{% endif %} | |
</div> | |
</div> | |
<!-- Clone Options (Hidden by default) --> | |
<div id="clone-options" class="mb-6 hidden"> | |
<label for="clone_reference_select" class="label-base">Reference Audio File</label> | |
<p class="text-xs text-purple-500 dark:text-purple-300 mb-2"> | |
Select a <code class="code-inline">.wav</code> or <code | |
class="code-inline">.mp3</code> file from the <code | |
class="code-inline">reference_audio</code> folder. | |
<strong class="dark:text-yellow-300 text-yellow-600">Important:</strong> Prepend the | |
exact transcript of this audio to your text input above for best results. | |
</p> | |
<div class="flex items-center gap-2"> | |
<select id="clone_reference_select" name="clone_reference_select" | |
class="select-base flex-grow"> | |
<option value="none" {% if not submitted_clone_file %}selected{% endif %}>-- | |
Select Reference File --</option> | |
{% for filename in reference_files %} | |
<option value="{{ filename }}" {% if submitted_clone_file==filename %}selected{% | |
endif %}>{{ filename }}</option> | |
{% endfor %} | |
</select> | |
<!-- Hidden file input triggered by the button --> | |
<input type="file" id="clone-file-input" class="hidden" multiple accept=".wav,.mp3" | |
aria-label="Upload reference audio file"> | |
<!-- Modified Load Button --> | |
<button type="button" id="clone-load-button" class="btn-secondary hidden" | |
title="Upload new reference files"> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor" | |
class="w-5 h-5 mr-1"> | |
<path | |
d="M9.25 13.25a.75.75 0 0 0 1.5 0V4.636l2.955 3.129a.75.75 0 0 0 1.09-1.03l-4.25-4.5a.75.75 0 0 0-1.09 0l-4.25 4.5a.75.75 0 1 0 1.09 1.03L9.25 4.636v8.614Z" /> | |
<path | |
d="M3.5 12.75a.75.75 0 0 0-1.5 0v2.5A2.75 2.75 0 0 0 4.75 18h10.5A2.75 2.75 0 0 0 18 15.25v-2.5a.75.75 0 0 0-1.5 0v2.5c0 .69-.56 1.25-1.25 1.25H4.75c-.69 0-1.25-.56-1.25-1.25v-2.5Z" /> | |
</svg> | |
Load | |
</button> | |
</div> | |
</div> | |
<!-- Generation Parameters --> | |
<div class="mb-6"> | |
<details class="group"> | |
<summary class="list-none flex cursor-pointer items-center"> | |
<span class="text-sm font-medium label-base">Generation Parameters</span> | |
<span class="ml-2 text-purple-500 dark:text-purple-300"> | |
<svg class="group-open:rotate-180 h-5 w-5 transition-transform" | |
viewBox="0 0 20 20" fill="currentColor"> | |
<path fill-rule="evenodd" | |
d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" | |
clip-rule="evenodd" /> | |
</svg> | |
</span> | |
</summary> | |
<div class="mt-4 grid grid-cols-1 md:grid-cols-2 gap-x-6 gap-y-4"> | |
<!-- Use default_gen_params passed from server for initial values --> | |
{% set current_gen_params = submitted_gen_params if submitted_gen_params else | |
default_gen_params %} | |
<!-- Speed Factor --> | |
<div> | |
<label for="speed_factor" class="label-base">Speed Factor (<span | |
id="speed_factor_value">{{ current_gen_params.speed_factor | |
}}</span>)</label> | |
<input type="range" id="speed_factor" name="speed_factor" min="0.5" | |
max="2.0" step="0.01" value="{{ current_gen_params.speed_factor }}" | |
class="slider-base"> | |
</div> | |
<!-- CFG Scale --> | |
<div> | |
<label for="cfg_scale" class="label-base">CFG Scale (<span | |
id="cfg_scale_value">{{ current_gen_params.cfg_scale | |
}}</span>)</label> | |
<input type="range" id="cfg_scale" name="cfg_scale" min="1.0" max="5.0" | |
step="0.1" value="{{ current_gen_params.cfg_scale }}" | |
class="slider-base"> | |
</div> | |
<!-- Temperature --> | |
<div> | |
<label for="temperature" class="label-base">Temperature (<span | |
id="temperature_value">{{ current_gen_params.temperature | |
}}</span>)</label> | |
<input type="range" id="temperature" name="temperature" min="1.0" max="1.5" | |
step="0.05" value="{{ current_gen_params.temperature }}" | |
class="slider-base"> | |
</div> | |
<!-- Top P --> | |
<div> | |
<label for="top_p" class="label-base">Top P (<span id="top_p_value">{{ | |
current_gen_params.top_p }}</span>)</label> | |
<input type="range" id="top_p" name="top_p" min="0.8" max="1.0" step="0.01" | |
value="{{ current_gen_params.top_p }}" class="slider-base"> | |
</div> | |
<!-- CFG Filter Top K --> | |
<div> | |
<label for="cfg_filter_top_k" class="label-base">CFG Filter Top K (<span | |
id="cfg_filter_top_k_value">{{ current_gen_params.cfg_filter_top_k | |
}}</span>)</label> | |
<input type="range" id="cfg_filter_top_k" name="cfg_filter_top_k" min="15" | |
max="50" step="1" value="{{ current_gen_params.cfg_filter_top_k }}" | |
class="slider-base"> | |
</div> | |
<!-- Save Gen Defaults Button --> | |
<div class="col-span-1 md:col-span-2 mt-4 flex items-center gap-4"> | |
<button id="save-gen-defaults-btn" type="button" class="btn-secondary"> | |
Save Generation Defaults | |
</button> | |
<span id="gen-defaults-status" class="text-xs hidden"></span> | |
</div> | |
</div> | |
</details> | |
</div> | |
<!-- Server Configuration (Collapsible) --> | |
<div class="mb-6"> | |
<details class="group"> | |
<summary class="list-none flex cursor-pointer items-center"> | |
<span class="text-sm font-medium label-base">Server Configuration</span> | |
<span class="ml-2 text-purple-500 dark:text-purple-300"> | |
<svg class="group-open:rotate-180 h-5 w-5 transition-transform" | |
viewBox="0 0 20 20" fill="currentColor"> | |
<path fill-rule="evenodd" | |
d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z" | |
clip-rule="evenodd" /> | |
</svg> | |
</span> | |
</summary> | |
<div id="server-config-form" | |
class="mt-4 border-t border-gray-200 dark:border-dark-700 pt-4"> | |
<p class="text-xs text-purple-500 dark:text-purple-300 mb-3"> | |
These settings are saved to the <code class="code-inline">.env</code> file. | |
Restart the server to apply changes. | |
</p> | |
<div class="grid grid-cols-1 md:grid-cols-2 gap-4"> | |
<!-- Dia Model Repo ID --> | |
<div> | |
<label for="config_model_repo" class="label-base text-xs">Model Repo | |
ID</label> | |
<input type="text" id="config_model_repo" name="DIA_MODEL_REPO_ID" | |
value="{{ config.DIA_MODEL_REPO_ID }}" | |
placeholder="ttj/dia-1.6b-safetensors" class="input-base text-sm"> | |
</div> | |
<!-- Model Config Filename --> | |
<div> | |
<label for="config_model_config" class="label-base text-xs">Model Config | |
Filename</label> | |
<input type="text" id="config_model_config" | |
name="DIA_MODEL_CONFIG_FILENAME" | |
value="{{ config.DIA_MODEL_CONFIG_FILENAME }}" | |
placeholder="config.json" class="input-base text-sm"> | |
</div> | |
<!-- Model Weights Filename --> | |
<div> | |
<label for="config_model_weights" class="label-base text-xs">Model | |
Weights Filename</label> | |
<input type="text" id="config_model_weights" | |
name="DIA_MODEL_WEIGHTS_FILENAME" | |
value="{{ config.DIA_MODEL_WEIGHTS_FILENAME }}" | |
placeholder="dia-v0_1_bf16.safetensors" class="input-base text-sm"> | |
</div> | |
<!-- Model Cache Path --> | |
<div> | |
<label for="config_model_cache" class="label-base text-xs">Model Cache | |
Path</label> | |
<input type="text" id="config_model_cache" name="DIA_MODEL_CACHE_PATH" | |
value="{{ config.DIA_MODEL_CACHE_PATH }}" | |
placeholder="./model_cache" class="input-base text-sm"> | |
</div> | |
<!-- Reference Audio Path --> | |
<div> | |
<label for="config_ref_audio" class="label-base text-xs">Reference Audio | |
Path</label> | |
<input type="text" id="config_ref_audio" name="REFERENCE_AUDIO_PATH" | |
value="{{ config.REFERENCE_AUDIO_PATH }}" | |
placeholder="./reference_audio" class="input-base text-sm"> | |
</div> | |
<!-- Output Path --> | |
<div> | |
<label for="config_output_path" class="label-base text-xs">Output | |
Path</label> | |
<input type="text" id="config_output_path" name="OUTPUT_PATH" | |
value="{{ config.OUTPUT_PATH }}" placeholder="./outputs" | |
class="input-base text-sm"> | |
</div> | |
<!-- Server Host --> | |
<div> | |
<label for="config_host" class="label-base text-xs">Server Host</label> | |
<input type="text" id="config_host" name="HOST" | |
value="{{ config.HOST }}" placeholder="0.0.0.0" | |
class="input-base text-sm"> | |
</div> | |
<!-- Server Port --> | |
<div> | |
<label for="config_port" class="label-base text-xs">Server Port</label> | |
<input type="number" id="config_port" name="PORT" | |
value="{{ config.PORT }}" min="1024" max="65535" step="1" | |
class="input-base text-sm"> | |
</div> | |
<!-- Save/Restart Buttons --> | |
<div | |
class="col-span-1 md:col-span-2 mt-4 flex flex-col md:flex-row gap-4 items-center"> | |
<button id="save-config-btn" type="button" | |
class="btn-purple w-full md:w-auto"> | |
Save Server Configuration | |
</button> | |
<button id="restart-server-btn" type="button" | |
class="btn-danger w-full md:w-auto hidden"> | |
<svg xmlns="http://www.w3.org/2000/svg" fill="none" | |
viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" | |
class="w-5 h-5 mr-1 inline-block"> | |
<path stroke-linecap="round" stroke-linejoin="round" | |
d="M16.023 9.348h4.992v-.001M2.985 19.644v-4.992m0 0h4.992m-4.993 0 3.181 3.183a8.25 8.25 0 0 0 13.803-3.7M4.031 9.865a8.25 8.25 0 0 1 13.803-3.7l3.181 3.182m0-4.991v4.99" /> | |
</svg> | |
Restart Server | |
</button> | |
<span id="config-status" class="text-xs ml-2 hidden"></span> | |
</div> | |
</div> | |
</div> | |
</details> | |
</div> | |
</div> <!-- End p-6 --> | |
<!-- Form Actions --> | |
<div class="card-footer"> | |
<div class="text-sm text-gray-600 dark:text-purple-300"> | |
<p>Use <code class="code-inline">[S1]</code>/<code class="code-inline">[S2]</code> for | |
dialogue. Add <code class="code-inline">(laughs)</code> etc.</p> | |
</div> | |
<button type="submit" id="generate-btn" class="btn-primary"> | |
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" | |
stroke-width="1.5" stroke="currentColor" class="w-5 h-5 mr-1 inline-block"> | |
<path stroke-linecap="round" stroke-linejoin="round" | |
d="M19.114 5.636a9 9 0 0 1 0 12.728M16.463 8.288a5.25 5.25 0 0 1 0 7.424M6.75 8.25l4.72-4.72a.75.75 0 0 1 1.28.53v15.88a.75.75 0 0 1-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.009 9.009 0 0 1 2.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75Z" /> | |
</svg> | |
Generate Speech | |
</button> | |
</div> | |
</form> | |
</div> <!-- End TTS Form Card --> | |
<!-- Audio player container - Populated by JavaScript if generation is successful --> | |
<div id="audio-player-container" class="mt-8"> | |
{% if output_file_url %} | |
<!-- Template for initial load if result is passed from server --> | |
<!-- Add data attribute to signal JS that result is present --> | |
<div id="output-file-url-data" data-initial-audio-url="{{ output_file_url }}" class="hidden"></div> | |
<div class="audio-player-card"> | |
<div class="p-6"> | |
<h2 class="card-header">Generated Audio</h2> | |
<div class="mb-4"> | |
<div id="waveform" class="waveform-container"></div> | |
</div> | |
<div class="audio-player-controls"> | |
<div class="audio-player-buttons"> | |
<button id="play-btn" class="btn-primary" disabled> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor" | |
class="w-5 h-5 mr-1"> | |
<path fill-rule="evenodd" | |
d="M2 10a8 8 0 1 1 16 0 8 8 0 0 1-16 0Zm6.39-2.908a.75.75 0 0 1 .766.027l3.5 2.25a.75.75 0 0 1 0 1.262l-3.5 2.25A.75.75 0 0 1 8 12.25v-4.5a.75.75 0 0 1 .39-.658Z" | |
clip-rule="evenodd" /> | |
</svg> | |
Play | |
</button> | |
<a id="download-link" href="{{ output_file_url }}" | |
download="{{ output_file_url.split('/')[-1] }}" class="btn-secondary"> | |
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor" | |
class="w-5 h-5 mr-1"> | |
<path | |
d="M10.75 2.75a.75.75 0 0 0-1.5 0v8.614L6.295 8.235a.75.75 0 1 0-1.09 1.03l4.25 4.5a.75.75 0 0 0 1.09 0l4.25-4.5a.75.75 0 0 0-1.09-1.03l-2.955 3.129V2.75Z" /> | |
<path | |
d="M3.5 12.75a.75.75 0 0 0-1.5 0v2.5A2.75 2.75 0 0 0 4.75 18h10.5A2.75 2.75 0 0 0 18 15.25v-2.5a.75.75 0 0 0-1.5 0v2.5c0 .69-.56 1.25-1.25 1.25H4.75c-.69 0-1.25-.56-1.25-1.25v-2.5Z" /> | |
</svg> | |
Download WAV | |
</a> | |
</div> | |
<div class="audio-player-info"> | |
Mode: <span class="font-medium">{{ submitted_voice_mode }}</span> | |
{% if submitted_voice_mode == 'clone' and submitted_clone_file %} | |
(<span class="font-medium">{{ submitted_clone_file }}</span>) | |
{% endif %} | |
• Gen Time: <span class="font-medium">{{ generation_time }}s</span> | |
• Duration: <span id="audio-duration" class="font-medium">--:--</span> | |
</div> | |
</div> | |
</div> | |
</div> | |
{% endif %} | |
</div> | |
<!-- Tips Section --> | |
<div class="mt-8"> | |
<h2 class="card-header mb-4">Tips & Tricks for Dia</h2> | |
<div class="card-base"> | |
<div class="p-6"> | |
<ul class="list-disc pl-5 text-sm text-gray-700 dark:text-purple-300 space-y-2"> | |
<li>For **Dialogue** mode, clearly mark speaker turns using <code | |
class="code-inline">[S1]</code> and <code class="code-inline">[S2]</code>.</li> | |
<li>Add non-verbal sounds like <code class="code-inline">(laughs)</code>, <code | |
class="code-inline">(sighs)</code>, <code | |
class="code-inline">(clears throat)</code> within the text where desired.</li> | |
<li>For **Voice Clone** mode, upload a clean reference audio file (<code | |
class="code-inline">.wav</code>/<code class="code-inline">.mp3</code>) using the | |
"Load" button. <strong class="dark:text-yellow-300 text-yellow-600">Crucially, | |
include the exact transcript of the reference audio at the beginning of your | |
text input</strong> (e.g., <code | |
class="code-inline">[S1] Reference transcript. [S1] Target text...</code>).</li> | |
<li>Experiment with **CFG Scale** (higher = more adherence to text, potentially less | |
natural) and **Temperature** (higher = more random/varied).</li> | |
<li>The **Speed Factor** adjusts playback speed (0.8 = slower, 1.0 = original).</li> | |
<li>Use the <code class="code-inline">/v1/audio/speech</code> endpoint for OpenAI | |
compatibility. Use the <code class="code-inline">voice</code> parameter to specify | |
mode ('S1', 'S2', 'dialogue', 'reference_file.wav').</li> | |
</ul> | |
</div> | |
</div> | |
</div> | |
</div> | |
</main> | |
<footer class="nav-base py-6 mt-12"> | |
<div class="mx-auto max-w-7xl px-4 sm:px-6 lg:px-8"> | |
<div class="flex justify-center"> | |
<a href="https://github.com/devnen/Dia-TTS-Server" | |
class="flex items-center gap-2 text-gray-600 dark:text-purple-300 text-sm hover:text-sky-600 dark:hover:text-primary-400 transition-colors"> | |
<!-- GitHub icon --> | |
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" | |
viewBox="0 0 16 16" class="flex-shrink-0"> | |
<path | |
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z" /> | |
</svg> | |
<span>Dia TTS Server | Powered by FastAPI</span> | |
</a> | |
</div> | |
</div> | |
</footer> | |
</div> | |
<!-- Loading spinner template (hidden by default) --> | |
<div id="loading-overlay" class="loading-overlay-base hidden"> | |
<div class="loading-box-base"> | |
<svg class="loading-spinner" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24"> | |
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle> | |
<path class="opacity-75" fill="currentColor" | |
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"> | |
</path> | |
</svg> | |
<p id="loading-message" class="loading-text">Generating audio...</p> | |
<p id="loading-status" class="loading-status">Please wait.</p> | |
<button id="loading-cancel-btn" type="button" class="btn-secondary mt-4">Cancel</button> | |
</div> | |
</div> | |
<!-- Pass data from server to JavaScript --> | |
<script> | |
// Make presets data available to script.js | |
// Ensure this is correctly populated by your Jinja2 template context | |
window.appPresets = {{ presets | tojson | safe }}; | |
</script> | |
<!-- Link External JavaScript (Ensure it's loaded AFTER the DOM) --> | |
<script src="/ui/script.js" defer></script> | |
</body> | |
</html> |