Spaces:
Running
Running
File size: 53,946 Bytes
ac5de5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 |
<!DOCTYPE html>
<html lang="en" class="dark"> <!-- Default to dark mode class -->
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Dia TTS Server | Text-to-Dialogue</title>
<link rel="icon" href="/static/favicon.ico" type="image/x-icon">
<!-- Tailwind CSS (CDN for simplicity, processes styles in <style type="text/tailwindcss"> below) -->
<script src="https://cdn.tailwindcss.com"></script>
<script>
// Configure Tailwind CSS
tailwind.config = {
darkMode: 'class', // Enable class-based dark mode
theme: {
extend: {
colors: {
// Define color palettes used in style.css
// Light Mode Colors (Examples - Adjust as needed)
gray: { 50: '#f9fafb', 100: '#f3f4f6', 200: '#e5e7eb', 300: '#d1d5db', 400: '#9ca3af', 500: '#6b7280', 600: '#4b5563', 700: '#374151', 800: '#1f2937', 900: '#111827' },
sky: { 50: '#f0f9ff', 100: '#e0f2fe', 200: '#bae6fd', 300: '#7dd3fc', 400: '#38bdf8', 500: '#0ea5e9', 600: '#0284c7', 700: '#0369a1', 800: '#075985', 900: '#0c4a6e' },
indigo: { 50: '#eef2ff', 100: '#e0e7ff', 200: '#c7d2fe', 300: '#a5b4fc', 400: '#818cf8', 500: '#6366f1', 600: '#4f46e5', 700: '#4338ca', 800: '#3730a3', 900: '#312e81' },
red: { 100: '#fee2e2', 300: '#fca5a5', 500: '#ef4444', 600: '#dc2626', 800: '#991b1b', 900: '#7f1d1d' },
green: { 100: '#dcfce7', 300: '#86efac', 500: '#22c55e', 800: '#166534', 900: '#14532d' },
yellow: { 100: '#fef9c3', 300: '#fcd34d', 500: '#eab308', 700: '#b45309', 900: '#78350f' },
// Dark Mode Colors (Copied from previous inline config)
primary: { 50: '#f0f9ff', 100: '#e0f2fe', 200: '#bae6fd', 300: '#7dd3fc', 400: '#38bdf8', 500: '#0ea5e9', 600: '#0284c7', 700: '#0369a1', 800: '#075985', 900: '#0c4a6e' },
purple: { 50: '#faf5ff', 100: '#f3e8ff', 200: '#e9d5ff', 300: '#d8b4fe', 400: '#c084fc', 500: '#a855f7', 600: '#9333ea', 700: '#7e22ce', 800: '#6b21a8', 900: '#581c87' },
dark: { 50: '#f9fafb', 100: '#f3f4f6', 200: '#e5e7eb', 300: '#d1d5db', 400: '#9ca3af', 500: '#6b7280', 600: '#4b5563', 700: '#374151', 800: '#1f2937', 900: '#111827', 950: '#030712', 1000: '#0f1729' }
}
}
}
}
</script>
<!-- Removed External Stylesheet Link: <link rel="stylesheet" href="/ui/style.css"> -->
<!-- Wavesurfer for audio visualization -->
<script src="https://unpkg.com/wavesurfer.js@7"></script>
<style type="text/tailwindcss">
/* ui/style.css */
/* Import Tailwind base, components, and utilities */
@tailwind base;
@tailwind components;
@tailwind utilities;
/* Define custom components/utilities */
@layer components {
/* Base styles (Light Mode) */
.body-base {
@apply h-full bg-gray-100 text-gray-900;
}
.nav-base {
@apply bg-gradient-to-r from-white to-sky-100 border-b border-sky-200 shadow-md;
}
.nav-link {
@apply text-sky-700 hover:text-sky-900 px-3 py-2 rounded-md text-sm font-medium;
}
.title-link {
@apply text-gray-900 text-xl font-bold;
}
.card-base {
@apply bg-white shadow-lg rounded-lg overflow-hidden border border-gray-200;
}
.card-header {
@apply text-lg font-medium text-gray-900 mb-4;
}
.card-footer {
@apply bg-gray-50 px-6 py-4 flex items-center justify-between border-t border-gray-200;
}
.label-base {
@apply block text-sm font-medium text-gray-700 mb-1;
}
.input-base {
@apply block w-full rounded-md border-gray-300 shadow-sm focus:border-sky-500 focus:ring-sky-500 sm:text-sm px-3 py-2 bg-white text-gray-900 placeholder-gray-400;
}
.textarea-base {
@apply input-base;
/* Inherit base input styles */
}
.select-base {
@apply input-base appearance-none pr-8;
/* Add padding for arrow */
/* Consider adding a background SVG for the dropdown arrow */
}
.button-base {
@apply inline-flex items-center justify-center px-4 py-2 border border-transparent rounded-md shadow-sm text-sm font-medium focus:outline-none focus:ring-2 focus:ring-offset-2 transition-colors disabled:opacity-50 disabled:cursor-not-allowed whitespace-nowrap flex-shrink-0;
/* Added whitespace-nowrap and flex-shrink-0 for button text */
}
.btn-primary {
@apply button-base bg-sky-600 text-white hover:bg-sky-700 focus:ring-sky-500;
}
.btn-secondary {
@apply button-base bg-gray-200 text-gray-700 border-gray-300 hover:bg-gray-300 focus:ring-indigo-500;
/* Example secondary */
}
.btn-danger {
@apply button-base bg-red-600 text-white hover:bg-red-700 focus:ring-red-500;
}
.btn-purple {
@apply button-base bg-purple-600 text-white hover:bg-purple-700 focus:ring-purple-500;
}
.slider-base {
@apply w-full h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer;
/* Need to style the thumb separately per browser */
}
.slider-thumb {
/* Basic thumb styling */
@apply appearance-none w-5 h-5 bg-sky-600 rounded-full cursor-pointer;
}
.radio-label {
@apply flex items-center space-x-2 cursor-pointer border border-gray-300 bg-white hover:border-sky-400 p-3 rounded-md transition-colors;
}
.radio-label-text {
@apply text-gray-700;
}
/* Apply checked styles directly using peer-checked utility on the container/text span */
/* .radio-label input:checked+span {
@apply text-sky-600 font-semibold;
}
.radio-label-checked {
@apply border-sky-500 ring-2 ring-sky-500;
} */
/* Replaced these custom classes with Tailwind peer utilities in the HTML */
.preset-button {
@apply button-base bg-indigo-100 text-indigo-700 border-indigo-200 hover:bg-indigo-200 focus:ring-indigo-500 text-xs px-3 py-1;
}
.notification-base {
@apply px-4 py-3 rounded relative shadow-md flex items-center mb-3;
/* Reduced margin bottom */
}
.notification-success {
@apply notification-base bg-green-100 border border-green-300 text-green-800;
}
.notification-error {
@apply notification-base bg-red-100 border border-red-300 text-red-800;
}
.notification-warning {
@apply notification-base bg-yellow-100 border border-yellow-300 text-yellow-800;
}
.notification-info {
/* Added info style */
@apply notification-base bg-sky-100 border border-sky-300 text-sky-800;
}
.code-inline {
@apply bg-gray-200 px-1 rounded text-sm font-mono text-gray-800;
}
.tooltip {
/* Basic tooltip styling */
@apply absolute hidden group-hover:block bg-gray-700 text-white text-xs rounded py-1 px-2 z-10 -mt-8;
}
.loading-overlay-base {
@apply fixed inset-0 bg-gray-600 bg-opacity-75 flex items-center justify-center z-50 transition-opacity duration-300;
}
.loading-box-base {
@apply bg-white p-6 rounded-lg shadow-xl flex flex-col items-center border border-gray-300;
}
.loading-spinner {
@apply animate-spin h-10 w-10 text-sky-600 mb-4;
}
.loading-text {
@apply text-gray-900 text-lg mb-2;
}
.loading-status {
@apply text-gray-600 text-sm mb-4 text-center max-w-xs;
/* Limit width */
}
.waveform-container {
@apply w-full h-24 bg-gray-100 rounded;
}
.audio-player-card {
@apply card-base mt-8;
/* Margin top for spacing */
}
.audio-player-controls {
@apply flex flex-wrap items-center justify-between gap-4;
}
.audio-player-buttons {
@apply flex items-center space-x-2 sm:space-x-4;
/* Adjust spacing */
}
.audio-player-info {
@apply text-sm text-gray-600 text-right;
}
.theme-switch {
@apply p-2 rounded-md text-gray-600 hover:bg-gray-200 hover:text-gray-800 focus:outline-none focus:ring-2 focus:ring-sky-500 focus:ring-offset-2;
}
/* Dark Mode Overrides using 'dark:' prefix */
.dark .body-base {
@apply bg-[#0f1729] text-white;
/* Original dark bg */
}
.dark .nav-base {
@apply bg-gradient-to-r from-dark-900 to-purple-900 border-b border-purple-800 shadow-lg;
}
.dark .nav-link {
@apply text-primary-300 hover:text-white;
}
.dark .title-link {
@apply text-white;
}
.dark .card-base {
@apply bg-dark-800 border border-dark-700;
}
.dark .card-header {
@apply text-white;
}
.dark .card-footer {
@apply bg-dark-900 border-t border-dark-700;
}
.dark .label-base {
@apply text-gray-300;
/* Lighter gray for dark */
}
.dark .input-base {
@apply border-dark-600 bg-dark-700 text-white placeholder-gray-500 focus:ring-offset-dark-800;
}
.dark .select-base {
/* Dark mode arrow styling if needed */
}
.dark .btn-primary {
@apply bg-primary-600 text-white hover:bg-primary-700 focus:ring-primary-500 focus:ring-offset-dark-800;
}
.dark .btn-secondary {
@apply bg-dark-700 text-white border-dark-600 hover:bg-dark-600 focus:ring-purple-500 focus:ring-offset-dark-800;
}
.dark .btn-danger {
@apply bg-red-600 text-white hover:bg-red-700 focus:ring-red-500 focus:ring-offset-dark-800;
}
.dark .btn-purple {
@apply bg-purple-600 text-white hover:bg-purple-700 focus:ring-purple-500 focus:ring-offset-dark-800;
}
.dark .slider-base {
@apply bg-dark-600;
}
.dark .slider-thumb {
@apply bg-primary-500;
}
.dark .radio-label {
@apply border-dark-600 bg-dark-800 hover:border-primary-400;
}
.dark .radio-label-text {
@apply text-gray-300;
}
/* Apply checked styles directly using peer-checked utility on the container/text span */
/* .dark .radio-label input:checked+span {
@apply text-primary-400;
}
.dark .radio-label-checked {
@apply border-primary-500 ring-primary-500;
} */
/* Replaced these custom classes with Tailwind peer utilities in the HTML */
.dark .preset-button {
@apply bg-indigo-900 text-indigo-200 border-indigo-700 hover:bg-indigo-800 focus:ring-indigo-500 focus:ring-offset-dark-800;
}
.dark .notification-success {
@apply notification-base bg-green-900 border border-green-700 text-green-100;
}
.dark .notification-error {
@apply notification-base bg-red-900 border border-red-700 text-red-100;
}
.dark .notification-warning {
@apply notification-base bg-yellow-900 border border-yellow-700 text-yellow-100;
}
.dark .notification-info {
/* Added info style */
@apply notification-base bg-sky-900 border border-sky-700 text-sky-100;
}
.dark .code-inline {
@apply bg-dark-900 text-purple-300;
}
.dark .tooltip {
@apply bg-dark-950;
}
.dark .loading-overlay-base {
@apply bg-dark-900 bg-opacity-75;
}
.dark .loading-box-base {
@apply bg-dark-800 border border-dark-700;
}
.dark .loading-spinner {
@apply text-primary-500;
}
.dark .loading-text {
@apply text-white;
}
.dark .loading-status {
@apply text-gray-400;
}
.dark .waveform-container {
@apply bg-dark-900;
}
.dark .audio-player-info {
@apply text-purple-300;
}
.dark .theme-switch {
@apply text-gray-400 hover:bg-dark-700 hover:text-white focus:ring-offset-dark-900;
}
}
/* Specific slider thumb styling per browser */
/* Apply these within the <style> tag as they target pseudo-elements */
input[type="range"].slider-base::-webkit-slider-thumb {
@apply slider-thumb;
}
input[type="range"].slider-base::-moz-range-thumb {
@apply slider-thumb;
}
/* Dark mode thumbs need specific overrides if needed */
.dark input[type="range"].slider-base::-webkit-slider-thumb {
/* Apply dark mode thumb styles directly */
background-color: theme('colors.primary.500');
/* Replaced @apply dark:slider-thumb */
/* Inherit other base thumb styles if needed (like size, border-radius) or re-apply */
@apply appearance-none w-5 h-5 rounded-full cursor-pointer;
}
.dark input[type="range"].slider-base::-moz-range-thumb {
/* Apply dark mode thumb styles directly */
background-color: theme('colors.primary.500');
/* Replaced @apply dark:slider-thumb */
/* Inherit other base thumb styles if needed or re-apply */
@apply appearance-none w-5 h-5 rounded-full cursor-pointer;
}
</style>
</head>
<body class="body-base">
<div class="min-h-full">
<!-- Navigation -->
<nav class="nav-base">
<div class="mx-auto max-w-7xl px-4 sm:px-6 lg:px-8">
<div class="flex h-16 items-center justify-between">
<div class="flex items-center">
<div class="flex-shrink-0">
<!-- Make title clickable -->
<a href="/" class="title-link">Dia TTS Server</a>
</div>
</div>
<div class="flex items-center space-x-2 sm:space-x-4">
<a href="/docs" target="_blank" class="nav-link">API Docs</a>
<!-- Theme Toggle Button -->
<button id="theme-toggle-btn" type="button"
class="relative inline-flex items-center p-1 rounded-full bg-gray-200 dark:bg-dark-700 h-8 w-16 transition-colors"
title="Toggle light/dark mode">
<span class="sr-only">Toggle theme</span>
<span class="absolute inset-0 rounded-full transition-colors"></span>
<!-- Toggle thumb with icons -->
<span
class="relative rounded-full w-6 h-6 bg-white dark:bg-purple-600 transform transition-transform duration-200 ease-in-out translate-x-0 dark:translate-x-8 flex items-center justify-center shadow-md">
<!-- Sun icon (for light mode) -->
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"
class="w-4 h-4 text-yellow-500 dark:opacity-0 transition-opacity">
<path
d="M10 2a.75.75 0 0 1 .75.75v1.5a.75.75 0 0 1-1.5 0v-1.5A.75.75 0 0 1 10 2ZM10 15a.75.75 0 0 1 .75.75v1.5a.75.75 0 0 1-1.5 0v-1.5A.75.75 0 0 1 10 15ZM10 7a3 3 0 1 0 0 6 3 3 0 0 0 0-6Z" />
</svg>
<!-- Moon icon (for dark mode) -->
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"
class="w-4 h-4 text-white opacity-0 dark:opacity-100 transition-opacity">
<path
d="M7.455 1.75A8.5 8.5 0 0 1 18.25 12.55a8.5 8.5 0 0 1-8.46 8.46A8.5 8.5 0 0 1 1.75 12.55a8.5 8.5 0 0 1 5.705-10.8Z" />
</svg>
</span>
</button>
</div>
</div>
</div>
</nav>
<!-- Main content -->
<main>
<div class="mx-auto max-w-7xl px-4 py-8 sm:px-6 lg:px-8">
<!-- Notification area -->
<div id="notification-area" class="mb-6 space-y-3">
{% if error %}
<div class="notification-error" role="alert">
<svg class="h-5 w-5 text-red-500 mr-2 flex-shrink-0" viewBox="0 0 20 20" fill="currentColor">
<path fill-rule="evenodd"
d="M10 18a8 8 0 100-16 8 8 0 000 16zM8.707 7.293a1 1 0 00-1.414 1.414L8.586 10l-1.293 1.293a1 1 0 101.414 1.414L10 11.414l1.293 1.293a1 1 0 001.414-1.414L11.414 10l1.293-1.293a1 1 0 00-1.414-1.414L10 8.586 8.707 7.293z"
clip-rule="evenodd" />
</svg>
<span class="block sm:inline">{{ error }}</span>
</div>
{% endif %}
{% if success %}
<div class="notification-success" role="alert">
<svg class="h-5 w-5 text-green-500 mr-2 flex-shrink-0" viewBox="0 0 20 20" fill="currentColor">
<path fill-rule="evenodd"
d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-9.293a1 1 0 00-1.414-1.414L9 10.586 7.707 9.293a1 1 0 00-1.414 1.414l2 2a1 1 0 001.414 0l4-4z"
clip-rule="evenodd" />
</svg>
<span class="block sm:inline">{{ success }}</span>
</div>
{% endif %}
</div>
<!-- TTS form -->
<div class="card-base">
<form id="tts-form" action="/web/generate" method="post" class="flex flex-col">
<div class="p-6">
<h2 class="card-header">Generate Speech with Dia</h2>
<!-- Text input -->
<div class="mb-6">
<label for="text" class="label-base">Text to speak</label>
<p class="text-xs text-purple-500 dark:text-purple-300 mb-2">
Use <code class="code-inline">[S1]</code> and <code class="code-inline">[S2]</code>
tags for speaker turns. Add non-verbals like <code
class="code-inline">(laughs)</code>.
</p>
<div class="relative">
<textarea name="text" id="text" rows="5" maxlength="8192" class="textarea-base"
placeholder="Example: [S1] Hello there! [S2] Hi! How are you? [S1] I'm doing well, thanks. (laughs)"
required>{{ submitted_text if submitted_text else "" }}</textarea>
<div class="absolute bottom-2 right-2 text-xs text-gray-500 dark:text-purple-300">
<span id="char-count">0</span> / 8192
</div>
</div>
</div>
<!-- Voice Mode Selection -->
<div class="mb-6">
<label class="label-base mb-2">Voice Mode</label>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
<!-- Combined Dialogue / Single Speaker Mode -->
<label
class="radio-label peer-checked:border-sky-500 peer-checked:dark:border-primary-500 peer-checked:ring-2 peer-checked:ring-sky-500 peer-checked:dark:ring-primary-500">
<input type="radio" name="voice_mode" value="dialogue" class="hidden peer" {% if
submitted_voice_mode=='dialogue' or not submitted_voice_mode %}checked{%
endif %} onchange="toggleCloneOptions()">
<span
class="radio-label-text peer-checked:text-sky-600 dark:peer-checked:text-primary-400 peer-checked:font-semibold">
Single / Dialogue (Use [S1]/[S2])
</span>
</label>
<!-- Clone Mode -->
<label
class="radio-label peer-checked:border-sky-500 peer-checked:dark:border-primary-500 peer-checked:ring-2 peer-checked:ring-sky-500 peer-checked:dark:ring-primary-500">
<input type="radio" name="voice_mode" value="clone" class="hidden peer" {% if
submitted_voice_mode=='clone' %}checked{% endif %}
onchange="toggleCloneOptions()">
<span
class="radio-label-text peer-checked:text-sky-600 dark:peer-checked:text-primary-400 peer-checked:font-semibold">
Voice Clone (from Reference)
</span>
</label>
</div>
</div>
<!-- Presets Section -->
<div class="mb-6">
<label class="label-base mb-2">Load Example Preset</label>
<div id="presets-container" class="flex flex-wrap gap-2">
{% if presets %}
{% for preset in presets %}
<button type="button" id="preset-btn-{{ loop.index0 }}" class="preset-button"
title="Load '{{ preset.name }}' text and settings">
{{ preset.name }}
</button>
{% endfor %}
{% else %}
<p class="text-sm text-gray-500 dark:text-gray-400">No presets loaded. Check
presets.yaml.</p>
{% endif %}
</div>
</div>
<!-- Clone Options (Hidden by default) -->
<div id="clone-options" class="mb-6 hidden">
<label for="clone_reference_select" class="label-base">Reference Audio File</label>
<p class="text-xs text-purple-500 dark:text-purple-300 mb-2">
Select a <code class="code-inline">.wav</code> or <code
class="code-inline">.mp3</code> file from the <code
class="code-inline">reference_audio</code> folder.
<strong class="dark:text-yellow-300 text-yellow-600">Important:</strong> Prepend the
exact transcript of this audio to your text input above for best results.
</p>
<div class="flex items-center gap-2">
<select id="clone_reference_select" name="clone_reference_select"
class="select-base flex-grow">
<option value="none" {% if not submitted_clone_file %}selected{% endif %}>--
Select Reference File --</option>
{% for filename in reference_files %}
<option value="{{ filename }}" {% if submitted_clone_file==filename %}selected{%
endif %}>{{ filename }}</option>
{% endfor %}
</select>
<!-- Hidden file input triggered by the button -->
<input type="file" id="clone-file-input" class="hidden" multiple accept=".wav,.mp3"
aria-label="Upload reference audio file">
<!-- Modified Load Button -->
<button type="button" id="clone-load-button" class="btn-secondary hidden"
title="Upload new reference files">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"
class="w-5 h-5 mr-1">
<path
d="M9.25 13.25a.75.75 0 0 0 1.5 0V4.636l2.955 3.129a.75.75 0 0 0 1.09-1.03l-4.25-4.5a.75.75 0 0 0-1.09 0l-4.25 4.5a.75.75 0 1 0 1.09 1.03L9.25 4.636v8.614Z" />
<path
d="M3.5 12.75a.75.75 0 0 0-1.5 0v2.5A2.75 2.75 0 0 0 4.75 18h10.5A2.75 2.75 0 0 0 18 15.25v-2.5a.75.75 0 0 0-1.5 0v2.5c0 .69-.56 1.25-1.25 1.25H4.75c-.69 0-1.25-.56-1.25-1.25v-2.5Z" />
</svg>
Load
</button>
</div>
</div>
<!-- Generation Parameters -->
<div class="mb-6">
<details class="group">
<summary class="list-none flex cursor-pointer items-center">
<span class="text-sm font-medium label-base">Generation Parameters</span>
<span class="ml-2 text-purple-500 dark:text-purple-300">
<svg class="group-open:rotate-180 h-5 w-5 transition-transform"
viewBox="0 0 20 20" fill="currentColor">
<path fill-rule="evenodd"
d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
clip-rule="evenodd" />
</svg>
</span>
</summary>
<div class="mt-4 grid grid-cols-1 md:grid-cols-2 gap-x-6 gap-y-4">
<!-- Use default_gen_params passed from server for initial values -->
{% set current_gen_params = submitted_gen_params if submitted_gen_params else
default_gen_params %}
<!-- Speed Factor -->
<div>
<label for="speed_factor" class="label-base">Speed Factor (<span
id="speed_factor_value">{{ current_gen_params.speed_factor
}}</span>)</label>
<input type="range" id="speed_factor" name="speed_factor" min="0.5"
max="2.0" step="0.01" value="{{ current_gen_params.speed_factor }}"
class="slider-base">
</div>
<!-- CFG Scale -->
<div>
<label for="cfg_scale" class="label-base">CFG Scale (<span
id="cfg_scale_value">{{ current_gen_params.cfg_scale
}}</span>)</label>
<input type="range" id="cfg_scale" name="cfg_scale" min="1.0" max="5.0"
step="0.1" value="{{ current_gen_params.cfg_scale }}"
class="slider-base">
</div>
<!-- Temperature -->
<div>
<label for="temperature" class="label-base">Temperature (<span
id="temperature_value">{{ current_gen_params.temperature
}}</span>)</label>
<input type="range" id="temperature" name="temperature" min="1.0" max="1.5"
step="0.05" value="{{ current_gen_params.temperature }}"
class="slider-base">
</div>
<!-- Top P -->
<div>
<label for="top_p" class="label-base">Top P (<span id="top_p_value">{{
current_gen_params.top_p }}</span>)</label>
<input type="range" id="top_p" name="top_p" min="0.8" max="1.0" step="0.01"
value="{{ current_gen_params.top_p }}" class="slider-base">
</div>
<!-- CFG Filter Top K -->
<div>
<label for="cfg_filter_top_k" class="label-base">CFG Filter Top K (<span
id="cfg_filter_top_k_value">{{ current_gen_params.cfg_filter_top_k
}}</span>)</label>
<input type="range" id="cfg_filter_top_k" name="cfg_filter_top_k" min="15"
max="50" step="1" value="{{ current_gen_params.cfg_filter_top_k }}"
class="slider-base">
</div>
<!-- Save Gen Defaults Button -->
<div class="col-span-1 md:col-span-2 mt-4 flex items-center gap-4">
<button id="save-gen-defaults-btn" type="button" class="btn-secondary">
Save Generation Defaults
</button>
<span id="gen-defaults-status" class="text-xs hidden"></span>
</div>
</div>
</details>
</div>
<!-- Server Configuration (Collapsible) -->
<div class="mb-6">
<details class="group">
<summary class="list-none flex cursor-pointer items-center">
<span class="text-sm font-medium label-base">Server Configuration</span>
<span class="ml-2 text-purple-500 dark:text-purple-300">
<svg class="group-open:rotate-180 h-5 w-5 transition-transform"
viewBox="0 0 20 20" fill="currentColor">
<path fill-rule="evenodd"
d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
clip-rule="evenodd" />
</svg>
</span>
</summary>
<div id="server-config-form"
class="mt-4 border-t border-gray-200 dark:border-dark-700 pt-4">
<p class="text-xs text-purple-500 dark:text-purple-300 mb-3">
These settings are saved to the <code class="code-inline">.env</code> file.
Restart the server to apply changes.
</p>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4">
<!-- Dia Model Repo ID -->
<div>
<label for="config_model_repo" class="label-base text-xs">Model Repo
ID</label>
<input type="text" id="config_model_repo" name="DIA_MODEL_REPO_ID"
value="{{ config.DIA_MODEL_REPO_ID }}"
placeholder="ttj/dia-1.6b-safetensors" class="input-base text-sm">
</div>
<!-- Model Config Filename -->
<div>
<label for="config_model_config" class="label-base text-xs">Model Config
Filename</label>
<input type="text" id="config_model_config"
name="DIA_MODEL_CONFIG_FILENAME"
value="{{ config.DIA_MODEL_CONFIG_FILENAME }}"
placeholder="config.json" class="input-base text-sm">
</div>
<!-- Model Weights Filename -->
<div>
<label for="config_model_weights" class="label-base text-xs">Model
Weights Filename</label>
<input type="text" id="config_model_weights"
name="DIA_MODEL_WEIGHTS_FILENAME"
value="{{ config.DIA_MODEL_WEIGHTS_FILENAME }}"
placeholder="dia-v0_1_bf16.safetensors" class="input-base text-sm">
</div>
<!-- Model Cache Path -->
<div>
<label for="config_model_cache" class="label-base text-xs">Model Cache
Path</label>
<input type="text" id="config_model_cache" name="DIA_MODEL_CACHE_PATH"
value="{{ config.DIA_MODEL_CACHE_PATH }}"
placeholder="./model_cache" class="input-base text-sm">
</div>
<!-- Reference Audio Path -->
<div>
<label for="config_ref_audio" class="label-base text-xs">Reference Audio
Path</label>
<input type="text" id="config_ref_audio" name="REFERENCE_AUDIO_PATH"
value="{{ config.REFERENCE_AUDIO_PATH }}"
placeholder="./reference_audio" class="input-base text-sm">
</div>
<!-- Output Path -->
<div>
<label for="config_output_path" class="label-base text-xs">Output
Path</label>
<input type="text" id="config_output_path" name="OUTPUT_PATH"
value="{{ config.OUTPUT_PATH }}" placeholder="./outputs"
class="input-base text-sm">
</div>
<!-- Server Host -->
<div>
<label for="config_host" class="label-base text-xs">Server Host</label>
<input type="text" id="config_host" name="HOST"
value="{{ config.HOST }}" placeholder="0.0.0.0"
class="input-base text-sm">
</div>
<!-- Server Port -->
<div>
<label for="config_port" class="label-base text-xs">Server Port</label>
<input type="number" id="config_port" name="PORT"
value="{{ config.PORT }}" min="1024" max="65535" step="1"
class="input-base text-sm">
</div>
<!-- Save/Restart Buttons -->
<div
class="col-span-1 md:col-span-2 mt-4 flex flex-col md:flex-row gap-4 items-center">
<button id="save-config-btn" type="button"
class="btn-purple w-full md:w-auto">
Save Server Configuration
</button>
<button id="restart-server-btn" type="button"
class="btn-danger w-full md:w-auto hidden">
<svg xmlns="http://www.w3.org/2000/svg" fill="none"
viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"
class="w-5 h-5 mr-1 inline-block">
<path stroke-linecap="round" stroke-linejoin="round"
d="M16.023 9.348h4.992v-.001M2.985 19.644v-4.992m0 0h4.992m-4.993 0 3.181 3.183a8.25 8.25 0 0 0 13.803-3.7M4.031 9.865a8.25 8.25 0 0 1 13.803-3.7l3.181 3.182m0-4.991v4.99" />
</svg>
Restart Server
</button>
<span id="config-status" class="text-xs ml-2 hidden"></span>
</div>
</div>
</div>
</details>
</div>
</div> <!-- End p-6 -->
<!-- Form Actions -->
<div class="card-footer">
<div class="text-sm text-gray-600 dark:text-purple-300">
<p>Use <code class="code-inline">[S1]</code>/<code class="code-inline">[S2]</code> for
dialogue. Add <code class="code-inline">(laughs)</code> etc.</p>
</div>
<button type="submit" id="generate-btn" class="btn-primary">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24"
stroke-width="1.5" stroke="currentColor" class="w-5 h-5 mr-1 inline-block">
<path stroke-linecap="round" stroke-linejoin="round"
d="M19.114 5.636a9 9 0 0 1 0 12.728M16.463 8.288a5.25 5.25 0 0 1 0 7.424M6.75 8.25l4.72-4.72a.75.75 0 0 1 1.28.53v15.88a.75.75 0 0 1-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.009 9.009 0 0 1 2.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75Z" />
</svg>
Generate Speech
</button>
</div>
</form>
</div> <!-- End TTS Form Card -->
<!-- Audio player container - Populated by JavaScript if generation is successful -->
<div id="audio-player-container" class="mt-8">
{% if output_file_url %}
<!-- Template for initial load if result is passed from server -->
<!-- Add data attribute to signal JS that result is present -->
<div id="output-file-url-data" data-initial-audio-url="{{ output_file_url }}" class="hidden"></div>
<div class="audio-player-card">
<div class="p-6">
<h2 class="card-header">Generated Audio</h2>
<div class="mb-4">
<div id="waveform" class="waveform-container"></div>
</div>
<div class="audio-player-controls">
<div class="audio-player-buttons">
<button id="play-btn" class="btn-primary" disabled>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"
class="w-5 h-5 mr-1">
<path fill-rule="evenodd"
d="M2 10a8 8 0 1 1 16 0 8 8 0 0 1-16 0Zm6.39-2.908a.75.75 0 0 1 .766.027l3.5 2.25a.75.75 0 0 1 0 1.262l-3.5 2.25A.75.75 0 0 1 8 12.25v-4.5a.75.75 0 0 1 .39-.658Z"
clip-rule="evenodd" />
</svg>
Play
</button>
<a id="download-link" href="{{ output_file_url }}"
download="{{ output_file_url.split('/')[-1] }}" class="btn-secondary">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"
class="w-5 h-5 mr-1">
<path
d="M10.75 2.75a.75.75 0 0 0-1.5 0v8.614L6.295 8.235a.75.75 0 1 0-1.09 1.03l4.25 4.5a.75.75 0 0 0 1.09 0l4.25-4.5a.75.75 0 0 0-1.09-1.03l-2.955 3.129V2.75Z" />
<path
d="M3.5 12.75a.75.75 0 0 0-1.5 0v2.5A2.75 2.75 0 0 0 4.75 18h10.5A2.75 2.75 0 0 0 18 15.25v-2.5a.75.75 0 0 0-1.5 0v2.5c0 .69-.56 1.25-1.25 1.25H4.75c-.69 0-1.25-.56-1.25-1.25v-2.5Z" />
</svg>
Download WAV
</a>
</div>
<div class="audio-player-info">
Mode: <span class="font-medium">{{ submitted_voice_mode }}</span>
{% if submitted_voice_mode == 'clone' and submitted_clone_file %}
(<span class="font-medium">{{ submitted_clone_file }}</span>)
{% endif %}
• Gen Time: <span class="font-medium">{{ generation_time }}s</span>
• Duration: <span id="audio-duration" class="font-medium">--:--</span>
</div>
</div>
</div>
</div>
{% endif %}
</div>
<!-- Tips Section -->
<div class="mt-8">
<h2 class="card-header mb-4">Tips & Tricks for Dia</h2>
<div class="card-base">
<div class="p-6">
<ul class="list-disc pl-5 text-sm text-gray-700 dark:text-purple-300 space-y-2">
<li>For **Dialogue** mode, clearly mark speaker turns using <code
class="code-inline">[S1]</code> and <code class="code-inline">[S2]</code>.</li>
<li>Add non-verbal sounds like <code class="code-inline">(laughs)</code>, <code
class="code-inline">(sighs)</code>, <code
class="code-inline">(clears throat)</code> within the text where desired.</li>
<li>For **Voice Clone** mode, upload a clean reference audio file (<code
class="code-inline">.wav</code>/<code class="code-inline">.mp3</code>) using the
"Load" button. <strong class="dark:text-yellow-300 text-yellow-600">Crucially,
include the exact transcript of the reference audio at the beginning of your
text input</strong> (e.g., <code
class="code-inline">[S1] Reference transcript. [S1] Target text...</code>).</li>
<li>Experiment with **CFG Scale** (higher = more adherence to text, potentially less
natural) and **Temperature** (higher = more random/varied).</li>
<li>The **Speed Factor** adjusts playback speed (0.8 = slower, 1.0 = original).</li>
<li>Use the <code class="code-inline">/v1/audio/speech</code> endpoint for OpenAI
compatibility. Use the <code class="code-inline">voice</code> parameter to specify
mode ('S1', 'S2', 'dialogue', 'reference_file.wav').</li>
</ul>
</div>
</div>
</div>
</div>
</main>
<footer class="nav-base py-6 mt-12">
<div class="mx-auto max-w-7xl px-4 sm:px-6 lg:px-8">
<div class="flex justify-center">
<a href="https://github.com/devnen/Dia-TTS-Server"
class="flex items-center gap-2 text-gray-600 dark:text-purple-300 text-sm hover:text-sky-600 dark:hover:text-primary-400 transition-colors">
<!-- GitHub icon -->
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor"
viewBox="0 0 16 16" class="flex-shrink-0">
<path
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z" />
</svg>
<span>Dia TTS Server | Powered by FastAPI</span>
</a>
</div>
</div>
</footer>
</div>
<!-- Loading spinner template (hidden by default) -->
<div id="loading-overlay" class="loading-overlay-base hidden">
<div class="loading-box-base">
<svg class="loading-spinner" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
<path class="opacity-75" fill="currentColor"
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z">
</path>
</svg>
<p id="loading-message" class="loading-text">Generating audio...</p>
<p id="loading-status" class="loading-status">Please wait.</p>
<button id="loading-cancel-btn" type="button" class="btn-secondary mt-4">Cancel</button>
</div>
</div>
<!-- Pass data from server to JavaScript -->
<script>
// Make presets data available to script.js
// Ensure this is correctly populated by your Jinja2 template context
window.appPresets = {{ presets | tojson | safe }};
</script>
<!-- Link External JavaScript (Ensure it's loaded AFTER the DOM) -->
<script src="/ui/script.js" defer></script>
</body>
</html> |