feat: update tech report
Browse files- README.md +3 -1
- index.html +1022 -19
- style.css +830 -17
README.md
CHANGED
@@ -7,4 +7,6 @@ sdk: static
|
|
7 |
pinned: false
|
8 |
---
|
9 |
|
10 |
-
|
|
|
|
|
|
7 |
pinned: false
|
8 |
---
|
9 |
|
10 |
+
Here are our latest tech reports:
|
11 |
+
|
12 |
+
- [MiniMax Speech Tech Report](https://minimax-ai.github.io/tts_tech_report/)
|
index.html
CHANGED
@@ -1,19 +1,1022 @@
|
|
1 |
-
<!
|
2 |
-
<html>
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
</
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
|
4 |
+
<head>
|
5 |
+
<meta charset="UTF-8" />
|
6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
7 |
+
<title>MiniMax-Speech Tech Report | Intrinsic Zero-Shot Text-to-Speech with a Learnable Speaker Encoder</title>
|
8 |
+
<meta name="description"
|
9 |
+
content=" MiniMax-Speech, an autoregressive Transformer-based Text-to-Speech (TTS) model that generates high-quality speech" />
|
10 |
+
<meta name="keywords" content="latex.css,css library,class-less css,latex css" />
|
11 |
+
<meta property="og:title"
|
12 |
+
content="MiniMax-Speech Tech Report | Intrinsic Zero-Shot Text-to-Speech with a Learnable Speaker Encoder" />
|
13 |
+
<meta property="og:url" content="https://huggingface.co/spaces/MiniMaxAI/MiniMax-Speech-Tech-Report" />
|
14 |
+
<meta property="og:description"
|
15 |
+
content=" MiniMax-Speech, an autoregressive Transformer-based Text-to-Speech (TTS) model that generates high-quality speech" />
|
16 |
+
<meta property="og:type" content="website" />
|
17 |
+
|
18 |
+
<link rel="stylesheet" href="style.css" />
|
19 |
+
</head>
|
20 |
+
|
21 |
+
<body id="top" class="text-justify">
|
22 |
+
<header
|
23 |
+
style="background-image: url('assets/images/header-bg.jpeg'); background-size: cover; background-position: center; padding: 1rem 0; border-radius: 1rem;">
|
24 |
+
<h1>MiniMax-Speech</h1>
|
25 |
+
<h4 style="font-size: 1.3rem; line-height: 1; text-align: center;">Intrinsic Zero-Shot Text-to-Speech
|
26 |
+
with a
|
27 |
+
Learnable Speaker
|
28 |
+
Encoder</h4>
|
29 |
+
<p class="author">
|
30 |
+
MiniMax Team <span class="date">May 2025</span><br />
|
31 |
+
<a style="font-size: 1.1rem;"
|
32 |
+
href="https://huggingface.co/spaces/MiniMaxAI/MiniMax-Speech-Tech-Report/blob/main/MiniMax_Speech.pdf">[Tech
|
33 |
+
Report]</a>
|
34 |
+
</p>
|
35 |
+
</header>
|
36 |
+
|
37 |
+
<div class="abstract">
|
38 |
+
<h2>Abstract</h2>
|
39 |
+
<p style="text-align: left;">
|
40 |
+
We introduce MiniMax-Speech, an autoregressive Transformer-based Text-to-Speech (TTS) model that generates
|
41 |
+
high-quality
|
42 |
+
speech. A key innovation is our learnable speaker encoder, which extracts timbre features from a reference audio
|
43 |
+
without
|
44 |
+
requiring its transcription. This enables MiniMax-Speech to produce highly expressive speech with timbre
|
45 |
+
consistent with
|
46 |
+
the reference in a zero-shot manner, while also supporting one-shot voice cloning with exceptionally high
|
47 |
+
similarity to
|
48 |
+
the reference voice. In addition, the overall quality of the synthesized audio is enhanced through the proposed
|
49 |
+
Flow-VAE. Our model supports 32 languages and demonstrates excellent performance across multiple objective and
|
50 |
+
subjective evaluations metrics. Notably, it achieves state-of-the-art (SOTA) results on objective voice cloning
|
51 |
+
metrics
|
52 |
+
(Word Error Rate and Speaker Similarity) and has secured the top position on the public TTS Arena leaderboard.
|
53 |
+
Another
|
54 |
+
key strength of MiniMax-Speech, granted by the robust and disentangled representations from the speaker encoder,
|
55 |
+
is its
|
56 |
+
extensibility without modifying the base model, enabling various applications such as: arbitrary voice emotion
|
57 |
+
control
|
58 |
+
via LoRA; text to voice (T2V) by synthesizing timbre features directly from text description; and professional
|
59 |
+
voice
|
60 |
+
cloning (PVC) by fine-tuning timbre features with additional data. We encourage readers to visit
|
61 |
+
<a href="https://github.com/MiniMax-AI">https://minimax-ai.github.io/tts_tech_report</a> for more examples.
|
62 |
+
</p>
|
63 |
+
</div>
|
64 |
+
|
65 |
+
<nav role="navigation" class="toc">
|
66 |
+
<h2>Contents</h2>
|
67 |
+
<ol>
|
68 |
+
<li>
|
69 |
+
<a href="#architecture-overview">Architecture Overview</a>
|
70 |
+
</li>
|
71 |
+
<li>
|
72 |
+
<a href="#expressiveness-demonstrations">Expressiveness Demonstrations</a>
|
73 |
+
<ol>
|
74 |
+
<li><a href="#showcase-with-high-versatility">Showcase with High Versatility</a></li>
|
75 |
+
<li><a href="#showcase-with-multiple-generation-attempts">Showcase with Multiple Generation Attempts</a></li>
|
76 |
+
<li><a href="#examples-with-more-possibilities">Examples with More Possibilities</a></li>
|
77 |
+
</ol>
|
78 |
+
</li>
|
79 |
+
<li><a href="#zero-shot-vs-one-shot-demonstrations">Zero-Shot vs. One-Shot Demonstrations</a></li>
|
80 |
+
<li><a href="#multilingual-and-cross-lingual-capabilities-demonstrations">Multilingual and Cross-Lingual
|
81 |
+
Capabilities Demonstrations</a></li>
|
82 |
+
<li><a href="#flow-vae-vs-vae-comparisons">Flow-VAE vs. VAE Comparisons</a></li>
|
83 |
+
<li><a href="#professional-voice-clone-pvc-demonstrations">Professional Voice Clone (PVC) Demonstrations</a></li>
|
84 |
+
<li><a href="#emotion-control-demonstrations">Emotion Control Demonstrations</a></li>
|
85 |
+
<li><a href="#text-prompted-voice-generation-demonstrations">Text-Prompted Voice Generation Demonstrations</a>
|
86 |
+
</li>
|
87 |
+
<li><a href="#comparison-of-voice-naturalness">Comparison of voice
|
88 |
+
naturalness with the previous generation products</a></li>
|
89 |
+
</ol>
|
90 |
+
</nav>
|
91 |
+
|
92 |
+
<main>
|
93 |
+
<article>
|
94 |
+
<div class="article-block">
|
95 |
+
<h2 id="architecture-overview">Architecture Overview</h2>
|
96 |
+
<figure>
|
97 |
+
<img src="assets/images/system-overview.jpg" loading="lazy" alt="System Architecture" width="100%"
|
98 |
+
height="auto" />
|
99 |
+
<figcaption>
|
100 |
+
An overview of the architecture of MiniMax-Speech.
|
101 |
+
</figcaption>
|
102 |
+
</figure>
|
103 |
+
</div>
|
104 |
+
|
105 |
+
<div class="article-block">
|
106 |
+
<h2 id="expressiveness-demonstrations">Expressiveness Demonstrations</h2>
|
107 |
+
<h3 id="showcase-with-high-versatility">Showcase with High Versatility</h3>
|
108 |
+
<div class="scroll-wrapper">
|
109 |
+
<table style="width: 100%;">
|
110 |
+
<tbody>
|
111 |
+
<tr class="border-bottom-thin">
|
112 |
+
<th scope="col" style="width: 40%;">Description</th>
|
113 |
+
<th scope="col" style="width: 30%; text-align: center;">Source Audio</th>
|
114 |
+
<th scope="col" style="width: 30%; text-align: center;">Generated Audio</th>
|
115 |
+
</tr>
|
116 |
+
<tr class="border-bottom-thin">
|
117 |
+
<td>
|
118 |
+
A Compelling and Persuasive Speaker Voice
|
119 |
+
</td>
|
120 |
+
<td>
|
121 |
+
<audio class="audio-md" src="assets/audios/Marketing_Voice_Sourse.wav" controls></audio>
|
122 |
+
</td>
|
123 |
+
<td>
|
124 |
+
<audio class="audio-md" src="assets/audios/Compelling%20and%20Persuasive.wav" controls></audio>
|
125 |
+
</td>
|
126 |
+
</tr>
|
127 |
+
<tr class="border-bottom-thin">
|
128 |
+
<td>
|
129 |
+
A Clear and Explanatory Voice with Broad Emotional Dynamics Across Different Texts
|
130 |
+
</td>
|
131 |
+
<td>
|
132 |
+
<audio class="audio-md" src="assets/audios/Science_Voice_Sourse.wav" controls></audio>
|
133 |
+
</td>
|
134 |
+
<td>
|
135 |
+
<audio class="audio-md" src="assets/audios/Explanatory%20Broad%20Emotional.wav" controls></audio>
|
136 |
+
</td>
|
137 |
+
</tr>
|
138 |
+
<tr class="border-bottom-thin">
|
139 |
+
<td>
|
140 |
+
Another Explanatory Voice with Supernatural Prosody, <br>
|
141 |
+
Featuring Distinct Ethnic and Age Characteristics
|
142 |
+
</td>
|
143 |
+
<td>
|
144 |
+
<audio class="audio-md" src="assets/audios/Sociology_Sourse.mp3" controls></audio>
|
145 |
+
</td>
|
146 |
+
<td>
|
147 |
+
<audio class="audio-md" src="assets/audios/Explanatory Supernatural Prosody.MP3" controls></audio>
|
148 |
+
</td>
|
149 |
+
</tr>
|
150 |
+
<tr class="border-bottom-thin">
|
151 |
+
<td>
|
152 |
+
A Warm and Magnetic Voice that Brings Comfort
|
153 |
+
</td>
|
154 |
+
<td>
|
155 |
+
<audio class="audio-md" src="assets/audios/Warm%20and%20Magnetic_Sourse.mp3" controls></audio>
|
156 |
+
</td>
|
157 |
+
<td>
|
158 |
+
<audio class="audio-md" src="assets/audios/Warm%20and%20Magnetic.mp3" controls></audio>
|
159 |
+
</td>
|
160 |
+
</tr>
|
161 |
+
</tbody>
|
162 |
+
</table>
|
163 |
+
</div>
|
164 |
+
|
165 |
+
<h3 id="showcase-with-multiple-generation-attempts">Showcase with Multiple Generation Attempts, Post-Processing
|
166 |
+
Audio Effects and Added Sound Effects</h3>
|
167 |
+
<div class="scroll-wrapper">
|
168 |
+
<table style="width: 100%;">
|
169 |
+
<tbody>
|
170 |
+
<tr class="border-bottom-thin">
|
171 |
+
<th scope="col" style="width: 50%;">Description</th>
|
172 |
+
<th scope="col" style="width: 50%; text-align: center;">Generated Audio</th>
|
173 |
+
</tr>
|
174 |
+
<tr class="border-bottom-thin">
|
175 |
+
<td>
|
176 |
+
A Husky Male Voice: From Soft Murmur to Excitement to Anger, then to Whispers
|
177 |
+
</td>
|
178 |
+
<td>
|
179 |
+
<audio class="audio-lg" src="assets/audios/Murmur-Excitement-Anger-%20Whispers.MP3" controls></audio>
|
180 |
+
</td>
|
181 |
+
</tr>
|
182 |
+
<tr class="border-bottom-thin">
|
183 |
+
<td>
|
184 |
+
An Angry Female Voice: From Soft Murmur to Rage to Reminiscence, then to Weeping
|
185 |
+
</td>
|
186 |
+
<td>
|
187 |
+
<audio class="audio-lg" src="assets/audios/Neutral-Rage-Reminiscence-Weeping.MP3" controls></audio>
|
188 |
+
</td>
|
189 |
+
</tr>
|
190 |
+
</tbody>
|
191 |
+
</table>
|
192 |
+
</div>
|
193 |
+
|
194 |
+
<h3 id="examples-with-more-possibilities">Examples with More Possibilities, Audio Effects and Sound Effects are
|
195 |
+
Generated</h3>
|
196 |
+
<div class="scroll-wrapper">
|
197 |
+
<table style="width: 100%;">
|
198 |
+
<tbody>
|
199 |
+
<tr class="border-bottom-thin">
|
200 |
+
<th scope="col" style="width: 50%;">Description</th>
|
201 |
+
<th scope="col" style="width: 50%; text-align: center;">Generated Audio</th>
|
202 |
+
</tr>
|
203 |
+
<tr class="border-bottom-thin">
|
204 |
+
<td>
|
205 |
+
An ASMR Whispering Voice with Generated Breathing and Sound Effects
|
206 |
+
</td>
|
207 |
+
<td>
|
208 |
+
<audio class="audio-lg" src="assets/audios/Breathy%20ASMR.MP3" controls></audio>
|
209 |
+
</td>
|
210 |
+
</tr>
|
211 |
+
<tr class="border-bottom-thin">
|
212 |
+
<td>
|
213 |
+
A Robotic Voice with Rich Bass Resonance and Spatial Presence
|
214 |
+
</td>
|
215 |
+
<td>
|
216 |
+
<audio class="audio-lg" src="assets/audios/Lucky%20Robot.mp3" controls></audio>
|
217 |
+
</td>
|
218 |
+
</tr>
|
219 |
+
<tr class="border-bottom-thin">
|
220 |
+
<td>
|
221 |
+
A Sardonic Mature Female Voice
|
222 |
+
</td>
|
223 |
+
<td>
|
224 |
+
<audio class="audio-lg" src="assets/audios/Onee-san.wav" controls></audio>
|
225 |
+
</td>
|
226 |
+
</tr>
|
227 |
+
</tbody>
|
228 |
+
</table>
|
229 |
+
</div>
|
230 |
+
</div>
|
231 |
+
|
232 |
+
<div class="article-block">
|
233 |
+
<h2 id="zero-shot-vs-one-shot-demonstrations">Zero-Shot vs. One-Shot Demonstrations</h2>
|
234 |
+
<p>
|
235 |
+
ZeroShot maintains speaker identity while generating more natural emotions, pauses, and other expressive
|
236 |
+
features based
|
237 |
+
on the text content, whereas OneShot adheres more strictly to the speaker characteristics (prosody, speech
|
238 |
+
rate,
|
239 |
+
emotions, etc.) demonstrated in the audio prompt.
|
240 |
+
</p>
|
241 |
+
<div class="scroll-wrapper" style="margin-top: 2rem;">
|
242 |
+
<table style="width: 100%;">
|
243 |
+
<tbody>
|
244 |
+
<tr class="border-bottom-thin">
|
245 |
+
<th scope="col">Source Audio</th>
|
246 |
+
<th scope="col">Prompt</th>
|
247 |
+
<th scope="col">Text</th>
|
248 |
+
<th scope="col">Zero-Shot Version</th>
|
249 |
+
<th scope="col">One-Shot Version</th>
|
250 |
+
<th scope="col">Elevenlabs Multilingual_v2</th>
|
251 |
+
</tr>
|
252 |
+
<tr class="border-bottom-thin">
|
253 |
+
<th>
|
254 |
+
<audio class="audio-sm" src="assets/audios/Lyrical%20Cantonese_Source.WAV" controls></audio>
|
255 |
+
</th>
|
256 |
+
<td>
|
257 |
+
<audio class="audio-sm" src="assets/audios/Lyrical%20Cantonese_Prompt.WAV" controls></audio>
|
258 |
+
</td>
|
259 |
+
<td>
|
260 |
+
命运就算颠沛流离,<br>
|
261 |
+
命运就算曲折离奇,<br>
|
262 |
+
命运就算恐吓着你,<br>
|
263 |
+
做人没趣味。<br>
|
264 |
+
别流泪,心酸,更不应舍弃。<br>
|
265 |
+
我愿能,一生永远陪伴你。
|
266 |
+
</td>
|
267 |
+
<td>
|
268 |
+
<audio class="audio-sm" src="assets/audios/Lyrical%20Cantonese_ZeroShot.mp3" controls></audio>
|
269 |
+
Preserving Distinctive Voice<br>
|
270 |
+
Timbre and Expressive <br>
|
271 |
+
Prosody with Regularized <br>
|
272 |
+
Pausing and Speech Rate
|
273 |
+
</td>
|
274 |
+
<td>
|
275 |
+
<audio class="audio-sm" src="assets/audios/Lyrical%20Cantonese_Oneshot.mp3" controls></audio>
|
276 |
+
Better Reproduction of<br>
|
277 |
+
Prompt's Exaggerated Speech<br>
|
278 |
+
Rate and Characteristic<br>
|
279 |
+
Phrase-Initial Pauses
|
280 |
+
</td>
|
281 |
+
<td>
|
282 |
+
Cantonese not supported
|
283 |
+
</td>
|
284 |
+
</tr>
|
285 |
+
<tr class="border-bottom-thin">
|
286 |
+
<th>
|
287 |
+
<audio class="audio-sm" src="assets/audios/Breaking%20Down%20Mandarin_Source.WAV" controls></audio>
|
288 |
+
</th>
|
289 |
+
<td>
|
290 |
+
<audio class="audio-sm" src="assets/audios/Breaking%20Down%20Mandarin_Prompt.WAV" controls></audio>
|
291 |
+
</td>
|
292 |
+
<td>
|
293 |
+
你们这些躲在道德高地的懦夫,<br>
|
294 |
+
敢承认自己对本我的恐惧吗?<br>
|
295 |
+
回答我!嗯?你回答我!<br>
|
296 |
+
Look in my eyes!<br>
|
297 |
+
老子写梦的解析时<br>
|
298 |
+
你们还在玩泥巴,<br>
|
299 |
+
我精神分析引论每个字母都能<br>
|
300 |
+
刺穿文明社会的虚伪面具,<br>
|
301 |
+
我解剖潜意识就像<br>
|
302 |
+
外科医生划开皮肤。<br>
|
303 |
+
是不是啊?说话!
|
304 |
+
</td>
|
305 |
+
<td>
|
306 |
+
<audio class="audio-sm" src="assets/audios/Breaking%20Down%20Mandarin_ZeroShot.mp3" controls></audio>
|
307 |
+
Capable of Generating<br>
|
308 |
+
Relatively Calmer Emotions<br>
|
309 |
+
while Preserving Voice<br>
|
310 |
+
Identity
|
311 |
+
</td>
|
312 |
+
<td>
|
313 |
+
<audio class="audio-sm" src="assets/audios/Breaking%20Down%20Mandarin_OneShot.mp3" controls></audio>
|
314 |
+
Consistently Reproducing the<br>
|
315 |
+
Angry Emotion from Prompt<br>
|
316 |
+
in Every Utterance
|
317 |
+
</td>
|
318 |
+
<td>
|
319 |
+
<audio class="audio-sm" src="assets/audios/ElevenLabs_Breaking Down Mandarin.mp3" controls></audio>
|
320 |
+
</td>
|
321 |
+
</tr>
|
322 |
+
<tr class="border-bottom-thin">
|
323 |
+
<th>
|
324 |
+
<audio class="audio-sm" src="assets/audios/Quirky%20Female%20English.MP3" controls></audio>
|
325 |
+
</th>
|
326 |
+
<td>
|
327 |
+
<audio class="audio-sm" src="assets/audios/Quirky%20Female%20English_Prompt.MP3" controls></audio>
|
328 |
+
</td>
|
329 |
+
<td>
|
330 |
+
Would you believe what happened at the<br>
|
331 |
+
grocery store today? My goodness! The<br>
|
332 |
+
avocados were on sale - half price! Half<br>
|
333 |
+
price! I bought twenty of them!
|
334 |
+
</td>
|
335 |
+
<td>
|
336 |
+
<audio class="audio-sm" src="assets/audios/Quirky%20Female%20English_ZeroShot.MP3" controls></audio>
|
337 |
+
Effectively follows textual cues<br>
|
338 |
+
for both longer and shorter<br>
|
339 |
+
inter-sentence pauses
|
340 |
+
</td>
|
341 |
+
<td>
|
342 |
+
<audio class="audio-sm" src="assets/audios/Quirky%20Female%20English_OneShot.MP3" controls></audio>
|
343 |
+
Better reproduces the<br>
|
344 |
+
exaggerated high pitch<br>
|
345 |
+
characteristic of anime voices<br>
|
346 |
+
</td>
|
347 |
+
<td>
|
348 |
+
<audio class="audio-sm" src="assets/audios/ElevenLabs_Quirky%20Female%20English.mp3" controls></audio>
|
349 |
+
</td>
|
350 |
+
</tr>
|
351 |
+
<tr>
|
352 |
+
<th>
|
353 |
+
<audio class="audio-sm" src="assets/audios/Neurotic%20Teenage%20English.MP3" controls></audio>
|
354 |
+
</th>
|
355 |
+
<td>
|
356 |
+
<audio class="audio-sm" src="assets/audios/Neurotic%20Teenage%20English_Prompt.MP3" controls></audio>
|
357 |
+
</td>
|
358 |
+
<td>
|
359 |
+
Oh my gosh, like, I literally can't believe<br>
|
360 |
+
what just happened! Um, so basically, I was,<br>
|
361 |
+
you know, just sitting there in class,<br>
|
362 |
+
right? And then, ugh, this totally weird<br>
|
363 |
+
thing happened - like, seriously weird! Wait,<br>
|
364 |
+
wait... Should I even be talking about this?<br>
|
365 |
+
Ugh, whatever.
|
366 |
+
</td>
|
367 |
+
<td>
|
368 |
+
<audio class="audio-sm" src="assets/audios/Neurotic%20Teenage%20English_ZeroShot.MP3"
|
369 |
+
controls></audio>
|
370 |
+
Effectively follows textual cues<br>
|
371 |
+
for both longer and shorter<br>
|
372 |
+
inter-sentence pauses
|
373 |
+
</td>
|
374 |
+
<td>
|
375 |
+
<audio class="audio-sm" src="assets/audios/Neurotic%20Teenage%20English_OneShot.MP3" controls></audio>
|
376 |
+
Better reproduces the<br>
|
377 |
+
exaggerated high pitch<br>
|
378 |
+
characteristic of anime voices<br>
|
379 |
+
</td>
|
380 |
+
<td>
|
381 |
+
<audio class="audio-sm" src="assets/audios/ElevenLabs_Neurotic%20Teenage%20English.mp3"
|
382 |
+
controls></audio>
|
383 |
+
</td>
|
384 |
+
</tr>
|
385 |
+
</tbody>
|
386 |
+
</table>
|
387 |
+
</div>
|
388 |
+
</div>
|
389 |
+
|
390 |
+
<div class="article-block">
|
391 |
+
<h2 id="multilingual-and-cross-lingual-capabilities-demonstrations">Multilingual and Cross-Lingual Capabilities
|
392 |
+
Demonstrations</h2>
|
393 |
+
<p>Speech-02-HD maintains high naturalness in less common languages while demonstrating significant advantages
|
394 |
+
in
|
395 |
+
Standard
|
396 |
+
Chinese pronunciation accuracy.</p>
|
397 |
+
<div class="scroll-wrapper" style="margin-top: 2rem;">
|
398 |
+
<table style="width: 100%;">
|
399 |
+
<tbody>
|
400 |
+
<tr class="border-bottom-thin">
|
401 |
+
<th scope="col">Languages</th>
|
402 |
+
<th scope="col">Source Audio</th>
|
403 |
+
<th scope="col">Text</th>
|
404 |
+
<th scope="col">Minimax<br>Speech_02_HD</th>
|
405 |
+
<th scope="col">ElevenLabs<br>Multilingual_v2</th>
|
406 |
+
<th scope="col">OpenAI<br>TTS_1_HD<br>(*not cloned voice)</th>
|
407 |
+
</tr>
|
408 |
+
<!-- Thai -->
|
409 |
+
<tr class="border-bottom-thin">
|
410 |
+
<th>Thai</th>
|
411 |
+
<td>
|
412 |
+
<audio class="audio-sm" src="assets/audios/Thai_Male_Sourse.wav" controls></audio>
|
413 |
+
</td>
|
414 |
+
<td>
|
415 |
+
สวัสดีค่ะ วันนี้อากาศดีมากเลย<br>
|
416 |
+
คุณจะไปทานอาหารกลางวันที่ไหนคะ<br>
|
417 |
+
ฉันกำลังคิดว่าจะไปร้านอาหารไทยแถวนี้<br>
|
418 |
+
</td>
|
419 |
+
<td>
|
420 |
+
<audio class="audio-sm" src="assets/audios/Thai.mp3" controls></audio>
|
421 |
+
</td>
|
422 |
+
<td>
|
423 |
+
Thai not perfectly supported
|
424 |
+
<audio class="audio-sm" src="assets/audios/ElevenLabs_Thai.mp3" controls></audio>
|
425 |
+
</td>
|
426 |
+
<td>
|
427 |
+
<audio class="audio-sm" src="assets/audios/OpenAI_Thai.mp3" controls></audio>
|
428 |
+
</td>
|
429 |
+
</tr>
|
430 |
+
<!-- Vietnamese -->
|
431 |
+
<tr class="border-bottom-thin">
|
432 |
+
<th>Vietnamese</th>
|
433 |
+
<td>
|
434 |
+
<audio class="audio-sm" src="assets/audios/Vietnamese_Female_Sourse.wav" controls></audio>
|
435 |
+
</td>
|
436 |
+
<td>
|
437 |
+
Tôi đang đọc một cuốn sách rất hay về lịch sử Việt Nam.<br>
|
438 |
+
Những câu chuyện về văn hóa truyền<br>
|
439 |
+
thống thật sự rất thú vị.<br>
|
440 |
+
</td>
|
441 |
+
<td>
|
442 |
+
<audio class="audio-sm" src="assets/audios/Vietnamese.mp3" controls></audio>
|
443 |
+
</td>
|
444 |
+
<td>
|
445 |
+
Vietnamese not perfectly supported
|
446 |
+
<audio class="audio-sm" src="assets/audios/ElevenLabs_Vietnamese.mp3" controls></audio>
|
447 |
+
</td>
|
448 |
+
<td>
|
449 |
+
<audio class="audio-sm" src="assets/audios/OpenAI_Vietnamese.mp3" controls></audio>
|
450 |
+
</td>
|
451 |
+
</tr>
|
452 |
+
<!-- Czech -->
|
453 |
+
<tr class="border-bottom-thin">
|
454 |
+
<th>Czech</th>
|
455 |
+
<td>
|
456 |
+
<audio class="audio-sm" src="assets/audios/Czech_Female_Sourse.wav" controls></audio>
|
457 |
+
</td>
|
458 |
+
<td>
|
459 |
+
Ranní mlha se pomalu zvedá nad řekou,<br>
|
460 |
+
zatímco první paprsky slunce prosvítají mezi stromy.<br>
|
461 |
+
Ptáci začínají svůj ranní koncert.<br>
|
462 |
+
</td>
|
463 |
+
<td>
|
464 |
+
<audio class="audio-sm" src="assets/audios/Czech.mp3" controls></audio>
|
465 |
+
</td>
|
466 |
+
<td>
|
467 |
+
<audio class="audio-sm" src="assets/audios/ElevenLabs_Czech.mp3" controls></audio>
|
468 |
+
</td>
|
469 |
+
<td>
|
470 |
+
<audio class="audio-sm" src="assets/audios/OpenAI_Czech.mp3" controls></audio>
|
471 |
+
</td>
|
472 |
+
</tr>
|
473 |
+
<!-- Polish -->
|
474 |
+
<tr class="border-bottom-thin">
|
475 |
+
<th>Polish</th>
|
476 |
+
<td>
|
477 |
+
<audio class="audio-sm" src="assets/audios/Polish_Male_Sourse.wav" controls></audio>、
|
478 |
+
</td>
|
479 |
+
<td>
|
480 |
+
Młoda sowa siedzi cicho na gałęzi sosny,<br>
|
481 |
+
obserwując leśną polanę w świetle księżyca.<br>
|
482 |
+
Wiatr delikatnie porusza liśćmi drzew.<br>
|
483 |
+
</td>
|
484 |
+
<td>
|
485 |
+
<audio class="audio-sm" src="assets/audios/Polish.mp3" controls></audio>
|
486 |
+
</td>
|
487 |
+
<td>
|
488 |
+
<audio class="audio-sm" src="assets/audios/ElevenLabs_Polish.mp3" controls></audio>
|
489 |
+
</td>
|
490 |
+
<td>
|
491 |
+
<audio class="audio-sm" src="assets/audios/OpenAI_Polish.mp3" controls></audio>
|
492 |
+
</td>
|
493 |
+
</tr>
|
494 |
+
<!-- Japanese -->
|
495 |
+
<tr class="border-bottom-thin">
|
496 |
+
<th>Japanese</th>
|
497 |
+
<td>
|
498 |
+
<audio class="audio-sm" src="assets/audios/Japanese_DominantMan_Sourse.mp3" controls></audio>
|
499 |
+
</td>
|
500 |
+
<td>
|
501 |
+
電車が遅延している影響で、渋谷駅がとても混雑<br>
|
502 |
+
しています。次の山手線は約10分後に到着<br>
|
503 |
+
予定です。お急ぎのお客様は、他の路線も<br>
|
504 |
+
ご利用ください。
|
505 |
+
</td>
|
506 |
+
<td>
|
507 |
+
<audio class="audio-sm" src="assets/audios/Japanese.mp3" controls></audio>
|
508 |
+
</td>
|
509 |
+
<td>
|
510 |
+
<audio class="audio-sm" src="assets/audios/ElevenLabs_Japanese_Dominant_Man.mp3" controls></audio>
|
511 |
+
</td>
|
512 |
+
<td>
|
513 |
+
<audio class="audio-sm" src="assets/audios/OpenAI_Japanese.mp3" controls></audio>
|
514 |
+
</td>
|
515 |
+
</tr>
|
516 |
+
</tbody>
|
517 |
+
</table>
|
518 |
+
</div>
|
519 |
+
<p style="margin-top: 4rem;">Speech-02-HD has superior performance in zero-shot cross-lingual scenarios.</p>
|
520 |
+
<div class="scroll-wrapper" style="margin-top: 2rem;">
|
521 |
+
<table style="width: 100%;">
|
522 |
+
<tbody>
|
523 |
+
<tr class="border-bottom-thin">
|
524 |
+
<th scope="col">Original Language</th>
|
525 |
+
<th scope="col">Mixed Language</th>
|
526 |
+
<th scope="col">Source Audio</th>
|
527 |
+
<th scope="col">Text</th>
|
528 |
+
<th scope="col">Minimax<br>Speech_02_HD</th>
|
529 |
+
<th scope="col">ElevenLabs<br>Multilingual_v2</th>
|
530 |
+
<th scope="col">OpenAI<br>TTS_1_HD<br>(*not cloned voice)</th>
|
531 |
+
</tr>
|
532 |
+
<tr class="border-bottom-thin">
|
533 |
+
<td>English</td>
|
534 |
+
<td>English + Mandarin</td>
|
535 |
+
<td>
|
536 |
+
<audio class="audio-sm" src="assets/audios/Wong_Sourse.mp3" controls></audio>
|
537 |
+
</td>
|
538 |
+
<td>
|
539 |
+
Kiddo! Come come come, 学如逆水行舟,不进则退。<br>
|
540 |
+
I see you're using AI tools already - so smart!<br>
|
541 |
+
But eh, cannot just rely on tools only lah!<br>
|
542 |
+
The future belongs to those who can work alongside AI,<br>
|
543 |
+
not those scared of it.
|
544 |
+
</td>
|
545 |
+
<td>
|
546 |
+
<audio class="audio-md" src="assets/audios/English-Mandarin.wav" controls></audio>
|
547 |
+
</td>
|
548 |
+
<td>
|
549 |
+
<audio class="audio-md" src="assets/audios/ElevenLabs_English-Mandarin.mp3" controls></audio>
|
550 |
+
</td>
|
551 |
+
<td>
|
552 |
+
<audio class="audio-md" src="assets/audios/OpenAI_English-Mandarin.mp3" controls></audio>
|
553 |
+
</td>
|
554 |
+
</tr>
|
555 |
+
<tr class="border-bottom-thin">
|
556 |
+
<td>Mandarin</td>
|
557 |
+
<td>Mandarin + Cantonese</td>
|
558 |
+
<td>
|
559 |
+
<audio class="audio-sm" src="assets/audios/ShiBanYu_Sourse.mp3" controls></audio>
|
560 |
+
</td>
|
561 |
+
<td>
|
562 |
+
老铁啊,多谢晒你送我呢本,广州话正音字典,咁好嘢喎!<br>
|
563 |
+
我呢个大老爷们儿学广州话真系好难㗎!成日都分唔清声调啊。<br>
|
564 |
+
嗱,而家有咗呢本书,什么都好啦。
|
565 |
+
</td>
|
566 |
+
<td>
|
567 |
+
<audio class="audio-md" src="assets/audios/Mandarin-Cantonese.wav" controls></audio>
|
568 |
+
</td>
|
569 |
+
<td>
|
570 |
+
Cantonese not supported
|
571 |
+
</td>
|
572 |
+
<td>
|
573 |
+
Cantonese not supported
|
574 |
+
</td>
|
575 |
+
</tr>
|
576 |
+
<tr class="border-bottom-thin">
|
577 |
+
<td>Mandarin</td>
|
578 |
+
<td>Mandarin + English</td>
|
579 |
+
<td>
|
580 |
+
<audio class="audio-sm" src="assets/audios/ShuanQ_Sourse.mp3" controls></audio>
|
581 |
+
</td>
|
582 |
+
<td>
|
583 |
+
The people said, 桂林's scenery is the first under heaven.<br>
|
584 |
+
Yet in my opinion, 阳朔 scenery is better than 桂林。<br>
|
585 |
+
群峰倒影山浮水,无水无山不入神。
|
586 |
+
</td>
|
587 |
+
<td>
|
588 |
+
<audio class="audio-md" src="assets/audios/Mandarin-English.WAV" controls></audio>
|
589 |
+
</td>
|
590 |
+
<td>
|
591 |
+
<audio class="audio-md" src="assets/audios/ElevenLabs_Mandarin-English.mp3" controls></audio>
|
592 |
+
</td>
|
593 |
+
<td>
|
594 |
+
<audio class="audio-md" src="assets/audios/OpenAI_Mandarin-English.mp3" controls></audio>
|
595 |
+
</td>
|
596 |
+
</tr>
|
597 |
+
<tr class="border-bottom-thin">
|
598 |
+
<td>English</td>
|
599 |
+
<td>English + Spanish</td>
|
600 |
+
<td>
|
601 |
+
<audio class="audio-sm" src="assets/audios/CoCo_Sourse.mp3" controls></audio>
|
602 |
+
</td>
|
603 |
+
<td>
|
604 |
+
Mi abuelita always told me "el que persevera, alcanza".<br>
|
605 |
+
If you persevere, you'll achieve your dreams!<br>
|
606 |
+
Guess what! They choose me to play the lead role in our BIG show!
|
607 |
+
</td>
|
608 |
+
<td>
|
609 |
+
<audio class="audio-md" src="assets/audios/English-Spanish.wav" controls></audio>
|
610 |
+
</td>
|
611 |
+
<td>
|
612 |
+
<audio class="audio-md" src="assets/audios/ElevenLabs_English-Spanish.mp3" controls></audio>
|
613 |
+
</td>
|
614 |
+
<td>
|
615 |
+
<audio class="audio-md" src="assets/audios/OpenAI_English-Spanish.mp3" controls></audio>
|
616 |
+
</td>
|
617 |
+
</tr>
|
618 |
+
<tr class="border-bottom-thin">
|
619 |
+
<td>Japanese</td>
|
620 |
+
<td>Japanese + Korean</td>
|
621 |
+
<td>
|
622 |
+
<audio class="audio-sm" src="assets/audios/Powerful_Girl_Sourse.mp3" controls></audio>
|
623 |
+
</td>
|
624 |
+
<td>
|
625 |
+
最近の天気予報によりますと、今週末は桜の開花に最適<br>
|
626 |
+
な気温になる予定です。<br>
|
627 |
+
東京都内の各公園では花見客で賑わうことが予想されますが、<br>
|
628 |
+
서울에서도 벚꽃이 피기 시작했다고 하네요.<br>
|
629 |
+
이번 주말에는 여의도 공원에서 벚꽃 축제가 열린다고 하니<br>
|
630 |
+
많은 분들이 찾아오실 것 같습니다.
|
631 |
+
</td>
|
632 |
+
<td>
|
633 |
+
<audio class="audio-md" src="assets/audios/Japanese-Korean.mp3" controls></audio>
|
634 |
+
</td>
|
635 |
+
<td>
|
636 |
+
<audio class="audio-md" src="assets/audios/ElevenLabs_Japanese-Korean.mp3" controls></audio>
|
637 |
+
</td>
|
638 |
+
<td>
|
639 |
+
<audio class="audio-md" src="assets/audios/OpenAI_Japanese-Korean.mp3" controls></audio>
|
640 |
+
</td>
|
641 |
+
</tr>
|
642 |
+
</tbody>
|
643 |
+
</table>
|
644 |
+
</div>
|
645 |
+
<p>*Although OpenAI currently does not support voice cloning functionality, we still wish to conduct comparative
|
646 |
+
listening
|
647 |
+
tests with its excellent naturalness as a reference.</p>
|
648 |
+
</div>
|
649 |
+
|
650 |
+
<div class="article-block">
|
651 |
+
<h2 id="flow-vae-vs-vae-comparisons">Flow-VAE vs. VAE Comparison</h2>
|
652 |
+
<p>Flow-VAE is less likely to produce the following instabilities.</p>
|
653 |
+
<div class="scroll-wrapper" style="margin-top: 2rem;">
|
654 |
+
<table style="width: 100%;">
|
655 |
+
<tbody>
|
656 |
+
<tr class="border-bottom-thin">
|
657 |
+
<th scope="col" style="text-align: center;">Source Audio</th>
|
658 |
+
<th scope="col" style="text-align: center;">Flow-VAE</th>
|
659 |
+
<th scope="col" style="text-align: center;">VAE</th>
|
660 |
+
<th scope="col" style="text-align: center;">Differences</th>
|
661 |
+
</tr>
|
662 |
+
<tr class="border-bottom-thin">
|
663 |
+
<td style="width: 25%">
|
664 |
+
<audio src="assets/audios/Condition1.wav" controls></audio>
|
665 |
+
</td>
|
666 |
+
<td style="width: 25%">
|
667 |
+
<audio src="assets/audios/FlowVAE1.wav" controls></audio>
|
668 |
+
</td>
|
669 |
+
<td style="width: 25%">
|
670 |
+
<audio src="assets/audios/VAE1.wav" controls></audio>
|
671 |
+
</td>
|
672 |
+
<td>
|
673 |
+
Flow-VAE reproduces more continuous<br>
|
674 |
+
and natural reverberation
|
675 |
+
</td>
|
676 |
+
</tr>
|
677 |
+
<tr class="border-bottom-thin">
|
678 |
+
<td>
|
679 |
+
<audio src="assets/audios/Condition2.wav" controls></audio>
|
680 |
+
</td>
|
681 |
+
<td>
|
682 |
+
<audio src="assets/audios/FlowVAE2.wav" controls></audio>
|
683 |
+
</td>
|
684 |
+
<td>
|
685 |
+
<audio src="assets/audios/VAE2.wav" controls></audio>
|
686 |
+
</td>
|
687 |
+
<td>
|
688 |
+
VAE introduces unwanted<br>
|
689 |
+
high-frequency components
|
690 |
+
</td>
|
691 |
+
</tr>
|
692 |
+
<tr>
|
693 |
+
<td>
|
694 |
+
<audio src="assets/audios/Conditon3.wav" controls></audio>
|
695 |
+
</td>
|
696 |
+
<td>
|
697 |
+
<audio src="assets/audios/FlowVAE3.wav" controls></audio>
|
698 |
+
</td>
|
699 |
+
<td>
|
700 |
+
<audio src="assets/audios/VAE3.wav" controls></audio>
|
701 |
+
</td>
|
702 |
+
<td>
|
703 |
+
VAE produces electronic-sounding<br>
|
704 |
+
artifacts at the beginning
|
705 |
+
</td>
|
706 |
+
</tr>
|
707 |
+
</tbody>
|
708 |
+
</table>
|
709 |
+
</div>
|
710 |
+
</div>
|
711 |
+
|
712 |
+
<div class="article-block">
|
713 |
+
<h2 id="professional-voice-clone-pvc-demonstrations">Professional Voice Clone (PVC) Demonstrations</h2>
|
714 |
+
<p>For more complex dialectal accents and tonal characteristics, PVC can reproduce these features while
|
715 |
+
maintaining high
|
716 |
+
naturalness based on the text content.</p>
|
717 |
+
<div class="scroll-wrapper" style="margin-top: 2rem;">
|
718 |
+
<table style="width: 100%;">
|
719 |
+
<tbody>
|
720 |
+
<tr class="border-bottom-thin">
|
721 |
+
<th scope="col" style="text-align: center;">Source Audio</th>
|
722 |
+
<th scope="col" style="text-align: center;">Zero-Shot</th>
|
723 |
+
<th scope="col" style="text-align: center;">PVC</th>
|
724 |
+
<th scope="col" style="text-align: center;">Differences</th>
|
725 |
+
</tr>
|
726 |
+
<tr class="border-bottom-thin">
|
727 |
+
<td style="width: 25%">
|
728 |
+
<audio src="assets/audios/JosephBrodsky_Source.wav" controls></audio>
|
729 |
+
</td>
|
730 |
+
<td style="width: 25%">
|
731 |
+
<audio src="assets/audios/JosephBrodsky_Fast.mp3" controls></audio>
|
732 |
+
</td>
|
733 |
+
<td style="width: 25%">
|
734 |
+
<audio src="assets/audios/JosephBrodsky_PVC.mp3" controls></audio>
|
735 |
+
</td>
|
736 |
+
<td>
|
737 |
+
Like the ZeroShot version, the PVC<br>
|
738 |
+
version has rising sentence-final intonation,<br>
|
739 |
+
but distinctively sustains this<br>
|
740 |
+
elevated pitch instead of the typical<br>
|
741 |
+
pitch declination found in common<br>
|
742 |
+
declarative sentences
|
743 |
+
</td>
|
744 |
+
</tr>
|
745 |
+
<tr class="border-bottom-thin">
|
746 |
+
<td>
|
747 |
+
<audio src="assets/audios/TianJin_Source.wav" controls></audio>
|
748 |
+
</td>
|
749 |
+
<td>
|
750 |
+
<audio src="assets/audios/TianJin_Fast.mp3" controls></audio>
|
751 |
+
</td>
|
752 |
+
<td>
|
753 |
+
<audio src="assets/audios/TianJin_PVC.mp3" controls></audio>
|
754 |
+
</td>
|
755 |
+
<td>
|
756 |
+
With more materials, the model not only<br>
|
757 |
+
reproduces the speaker's voice characteristics<br>
|
758 |
+
but also accurately captures more<br>
|
759 |
+
dialectal features
|
760 |
+
</td>
|
761 |
+
</tr>
|
762 |
+
</tbody>
|
763 |
+
</table>
|
764 |
+
</div>
|
765 |
+
</div>
|
766 |
+
|
767 |
+
<div class="article-block">
|
768 |
+
<h2 id="emotion-control-demonstrations">Emotion Control Demonstrations</h2>
|
769 |
+
<h3>Source Audio for Refreshing Young Man</h3>
|
770 |
+
<audio src="assets/audios/Mandarin_Refreshing_Young_Man_Sourse.mp3" controls></audio>
|
771 |
+
<h3>DEMO</h3>
|
772 |
+
<div class="scroll-wrapper">
|
773 |
+
<table style="width: 100%;">
|
774 |
+
<tbody>
|
775 |
+
<tr class="border-bottom-thin">
|
776 |
+
<th scope="col">Neutral</th>
|
777 |
+
<th scope="col" style="min-width: 120px;">Emotion</th>
|
778 |
+
<th scope="col">Text</th>
|
779 |
+
<th scope="col">Emotion Control Audio</th>
|
780 |
+
</tr>
|
781 |
+
<tr class="border-bottom-thin">
|
782 |
+
<td>
|
783 |
+
<audio class="audio-md" src="assets/audios/Neutral1.mp3" controls></audio>
|
784 |
+
</td>
|
785 |
+
<td>
|
786 |
+
Surprised
|
787 |
+
</td>
|
788 |
+
<td>
|
789 |
+
天哪!我完全没想到会在这里遇见你,<br>
|
790 |
+
都过去这么多年了,你一点都没变!
|
791 |
+
</td>
|
792 |
+
<td>
|
793 |
+
<audio class="audio-md" src="assets/audios/Surprised.mp3" controls></audio>
|
794 |
+
</td>
|
795 |
+
</tr>
|
796 |
+
<tr class="border-bottom-thin">
|
797 |
+
<td>
|
798 |
+
<audio class="audio-md" src="assets/audios/Neutral2.mp3" controls></audio>
|
799 |
+
</td>
|
800 |
+
<td>
|
801 |
+
Disgusted
|
802 |
+
</td>
|
803 |
+
<td>
|
804 |
+
这个地方实在太脏乱了,到处都是垃圾和难闻的气味儿,<br>
|
805 |
+
我一秒钟都不想多待。
|
806 |
+
</td>
|
807 |
+
<td>
|
808 |
+
<audio class="audio-md" src="assets/audios/Disgusted.mp3" controls></audio>
|
809 |
+
</td>
|
810 |
+
</tr>
|
811 |
+
<tr class="border-bottom-thin">
|
812 |
+
<td>
|
813 |
+
<audio class="audio-md" src="assets/audios/Neutral3.mp3" controls></audio>
|
814 |
+
</td>
|
815 |
+
<td>
|
816 |
+
Fearful
|
817 |
+
</td>
|
818 |
+
<td>
|
819 |
+
深夜回家的路上,我清楚地听见身后有脚步声在跟着我,<br>
|
820 |
+
可是回头却什么都看不见。
|
821 |
+
</td>
|
822 |
+
<td>
|
823 |
+
<audio class="audio-md" src="assets/audios/Fearful.mp3" controls></audio>
|
824 |
+
</td>
|
825 |
+
</tr>
|
826 |
+
<tr class="border-bottom-thin">
|
827 |
+
<td>
|
828 |
+
<audio class="audio-md" src="assets/audios/Neutral4.mp3" controls></audio>
|
829 |
+
</td>
|
830 |
+
<td>
|
831 |
+
Angry
|
832 |
+
</td>
|
833 |
+
<td>
|
834 |
+
我付出了这么多,换来的却是这样的背叛!<br>
|
835 |
+
你怎么可以这样对待我的信任!
|
836 |
+
</td>
|
837 |
+
<td>
|
838 |
+
<audio class="audio-md" src="assets/audios/Angry.mp3" controls></audio>
|
839 |
+
</td>
|
840 |
+
</tr>
|
841 |
+
<tr class="border-bottom-thin">
|
842 |
+
<td>
|
843 |
+
<audio class="audio-md" src="assets/audios/Neutral5.mp3" controls></audio>
|
844 |
+
</td>
|
845 |
+
<td>
|
846 |
+
Sad
|
847 |
+
</td>
|
848 |
+
<td>
|
849 |
+
躺在床上翻来覆去,心里压着说不出的难过和沮丧,<br>
|
850 |
+
昨天晚上又失眠了。
|
851 |
+
</td>
|
852 |
+
<td>
|
853 |
+
<audio class="audio-md" src="assets/audios/Sad.mp3" controls></audio>
|
854 |
+
</td>
|
855 |
+
</tr>
|
856 |
+
<tr class="border-bottom-thin">
|
857 |
+
<td>
|
858 |
+
<audio class="audio-md" src="assets/audios/Neutral6.mp3" controls></audio>
|
859 |
+
</td>
|
860 |
+
<td>
|
861 |
+
Happy
|
862 |
+
</td>
|
863 |
+
<td>
|
864 |
+
和好朋友一起在院子里烧烤,聊着有趣的故事,<br>
|
865 |
+
享受着美食和欢乐的时光。
|
866 |
+
</td>
|
867 |
+
<td>
|
868 |
+
<audio class="audio-md" src="assets/audios/Happy.mp3" controls></audio>
|
869 |
+
</td>
|
870 |
+
</tr>
|
871 |
+
</tbody>
|
872 |
+
</table>
|
873 |
+
</div>
|
874 |
+
</div>
|
875 |
+
|
876 |
+
<div class="article-block">
|
877 |
+
<h2 id="text-prompted-voice-generation-demonstrations">Text-Prompted Voice Generation Demonstrations</h2>
|
878 |
+
<div class="scroll-wrapper">
|
879 |
+
<table style="width: 100%;">
|
880 |
+
<tbody>
|
881 |
+
<tr class="border-bottom-thin">
|
882 |
+
<th scope="col">Prompt</th>
|
883 |
+
<th scope="col">Text</th>
|
884 |
+
<th scope="col" style="text-align: center;">Audio</th>
|
885 |
+
</tr>
|
886 |
+
<tr class="border-bottom-thin">
|
887 |
+
<td>
|
888 |
+
男性中年声音,说中文,音色浑厚醇厚,带有自然的磁性,语速偏慢,<br>
|
889 |
+
音量适中,音调偏低沉。声音整体给人沉稳可靠的感觉,<br>
|
890 |
+
在深度访谈场景中��现出专业性和亲和力,音质清晰,吐字规整有力。
|
891 |
+
</td>
|
892 |
+
<td>
|
893 |
+
在这个安静的夜晚,让我们一起走进《人生笔记》这本书。<br>
|
894 |
+
作者用平实的文字记录下生活中的点点滴滴,<br>
|
895 |
+
让我们看到平凡中的真善美。<br>
|
896 |
+
今天,我们先来读第一章:'生活的痕迹'......
|
897 |
+
</td>
|
898 |
+
<td>
|
899 |
+
<audio class="audio-md" src="assets/audios/深度访谈男中年.wav" controls></audio>
|
900 |
+
</td>
|
901 |
+
</tr>
|
902 |
+
<tr class="border-bottom-thin">
|
903 |
+
<td>
|
904 |
+
说中文的女青年,音色偏甜美,语速比较快,说话时带着一种轻快的感觉,<br>
|
905 |
+
整体音调较高,像是在直播带货,整体氛围比较活跃,<br>
|
906 |
+
声音清晰,听起来很有亲和力。
|
907 |
+
</td>
|
908 |
+
<td>
|
909 |
+
亲爱的宝宝们,等了好久的神仙面霜终于到货啦!<br>
|
910 |
+
你们看这个包装是不是超级精致?<br>
|
911 |
+
我自己已经用了一个月了,效果真的绝绝子!<br>
|
912 |
+
而且这次活动价真的太划算了,错过真的会后悔的哦~
|
913 |
+
</td>
|
914 |
+
<td>
|
915 |
+
<audio class="audio-md" src="assets/audios/直播带货女青年.wav" controls></audio>
|
916 |
+
</td>
|
917 |
+
</tr>
|
918 |
+
<tr class="border-bottom-thin">
|
919 |
+
<td>
|
920 |
+
中国男性声音,听着像是青年,音色清亮,语速比较快,<br>
|
921 |
+
说话很有激情,像是在解说比赛,声音中带着紧张和兴奋的感觉。
|
922 |
+
</td>
|
923 |
+
<td>
|
924 |
+
漂亮!这个进攻太精彩了!张伟突破防线,<br>
|
925 |
+
一个漂亮的转身,球传到禁区,王超跟上,射门!<br>
|
926 |
+
球进了!难以置信的精彩配合,现场观众都沸腾了!
|
927 |
+
</td>
|
928 |
+
<td>
|
929 |
+
<audio class="audio-md" src="assets/audios/体育解说男青年.wav" controls></audio>
|
930 |
+
</td>
|
931 |
+
</tr>
|
932 |
+
<tr>
|
933 |
+
<td>
|
934 |
+
中国女青年的声音,音色清脆,说话速度偏快,语调活泼,<br>
|
935 |
+
像是在做游戏直播,声音中带着愉快的感觉,整体音调较高,<br>
|
936 |
+
整体氛围比较轻松。
|
937 |
+
</td>
|
938 |
+
<td>
|
939 |
+
啊!这里有个宝箱!让我们看看里面是什么~<br>
|
940 |
+
哇!是传说中的紫色装备!运气也太好了吧!<br>
|
941 |
+
谢谢小伙伴们的打赏,我们继续往前探索......
|
942 |
+
</td>
|
943 |
+
<td>
|
944 |
+
<audio class="audio-md" src="assets/audios/游戏主播女青年.wav" controls></audio>
|
945 |
+
</td>
|
946 |
+
</tr>
|
947 |
+
</tbody>
|
948 |
+
</table>
|
949 |
+
</div>
|
950 |
+
</div>
|
951 |
+
|
952 |
+
<div class="article-block">
|
953 |
+
<h2 id="comparison-of-voice-naturalness">Comparison of voice naturalness
|
954 |
+
with the previous generation products</h2>
|
955 |
+
<p>The new model demonstrates significant advantages in naturalness compared to the previous version.</p>
|
956 |
+
<h3 style="margin-top: 2rem;">Source Audio for Radiant_Girl</h3>
|
957 |
+
<audio src="assets/audios/English_Radiant_Girl_Sourse.wav" controls></audio>
|
958 |
+
<h3>DEMO</h3>
|
959 |
+
<div class="scroll-wrapper">
|
960 |
+
<table style="width: 100%;">
|
961 |
+
<tbody>
|
962 |
+
<tr class="border-bottom-thin">
|
963 |
+
<th scope="col">Text</th>
|
964 |
+
<th scope="col" style="text-align: center;">Mnimax<br>Speech_02_HD</th>
|
965 |
+
<th scope="col" style="text-align: center;">Microsoft<br>Azure TTS</th>
|
966 |
+
<th scope="col" style="text-align: center;">AWS<br>Polly</th>
|
967 |
+
</tr>
|
968 |
+
<tr class="border-bottom-thin">
|
969 |
+
<td>
|
970 |
+
I sat alone in the empty room, staring at the old photographs,<br>
|
971 |
+
wondering how everything could change so quickly,<br>
|
972 |
+
how a lifetime of memories could fade away just like that.
|
973 |
+
</td>
|
974 |
+
<td>
|
975 |
+
<audio class="audio-md" src="assets/audios/Radiant_Girl_1.mp3" controls></audio>
|
976 |
+
</td>
|
977 |
+
<td>
|
978 |
+
<audio class="audio-md" src="assets/audios/Emma_1.mp3" controls></audio>
|
979 |
+
</td>
|
980 |
+
<td>
|
981 |
+
<audio class="audio-md" src="assets/audios/Joanna_1.mp3" controls></audio>
|
982 |
+
</td>
|
983 |
+
</tr>
|
984 |
+
<tr class="border-bottom-thin">
|
985 |
+
<td>
|
986 |
+
The moment I held my acceptance letter, my heart burst with joy - <br>
|
987 |
+
all those sleepless nights finally paid off, and I couldn't stop<br>
|
988 |
+
dancing around the room, calling everyone I knew to share this amazing news!
|
989 |
+
</td>
|
990 |
+
<td>
|
991 |
+
<audio class="audio-md" src="assets/audios/Radiant_Girl_2.mp3" controls></audio>
|
992 |
+
</td>
|
993 |
+
<td>
|
994 |
+
<audio class="audio-md" src="assets/audios/Emma_2.mp3" controls></audio>
|
995 |
+
</td>
|
996 |
+
<td>
|
997 |
+
<audio class="audio-md" src="assets/audios/Joanna_2.mp3" controls></audio>
|
998 |
+
</td>
|
999 |
+
</tr>
|
1000 |
+
</tbody>
|
1001 |
+
</table>
|
1002 |
+
</div>
|
1003 |
+
</div>
|
1004 |
+
</article>
|
1005 |
+
</main>
|
1006 |
+
|
1007 |
+
<script>
|
1008 |
+
MathJax = {
|
1009 |
+
tex: {
|
1010 |
+
inlineMath: [['$', '$'],],
|
1011 |
+
},
|
1012 |
+
}
|
1013 |
+
|
1014 |
+
const darkModeToggle = document.getElementById('dark-mode-toggle')
|
1015 |
+
darkModeToggle.addEventListener('click', () => {
|
1016 |
+
document.body.classList.toggle('latex-dark')
|
1017 |
+
})
|
1018 |
+
</script>
|
1019 |
+
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
|
1020 |
+
</body>
|
1021 |
+
|
1022 |
+
</html>
|
style.css
CHANGED
@@ -1,28 +1,841 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
body {
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
}
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
}
|
10 |
|
11 |
p {
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
}
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
24 |
}
|
25 |
|
26 |
-
.
|
27 |
-
|
28 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/*!
|
2 |
+
* LaTeX.css (https://latex.vercel.app/)
|
3 |
+
*
|
4 |
+
* Source: https://github.com/vincentdoerig/latex-css
|
5 |
+
* Licensed under MIT (https://github.com/vincentdoerig/latex-css/blob/master/LICENSE)
|
6 |
+
*/
|
7 |
+
|
8 |
+
@font-face {
|
9 |
+
font-family: 'Latin Modern';
|
10 |
+
font-style: normal;
|
11 |
+
font-weight: normal;
|
12 |
+
font-display: swap;
|
13 |
+
src: url('./fonts/LM-regular.woff2') format('woff2'),
|
14 |
+
url('./fonts/LM-regular.woff') format('woff'),
|
15 |
+
url('./fonts/LM-regular.ttf') format('truetype');
|
16 |
+
}
|
17 |
+
|
18 |
+
@font-face {
|
19 |
+
font-family: 'Latin Modern';
|
20 |
+
font-style: italic;
|
21 |
+
font-weight: normal;
|
22 |
+
font-display: swap;
|
23 |
+
src: url('./fonts/LM-italic.woff2') format('woff2'),
|
24 |
+
url('./fonts/LM-italic.woff') format('woff'),
|
25 |
+
url('./fonts/LM-italic.ttf') format('truetype');
|
26 |
+
}
|
27 |
+
|
28 |
+
@font-face {
|
29 |
+
font-family: 'Latin Modern';
|
30 |
+
font-style: normal;
|
31 |
+
font-weight: bold;
|
32 |
+
font-display: swap;
|
33 |
+
src: url('./fonts/LM-bold.woff2') format('woff2'),
|
34 |
+
url('./fonts/LM-bold.woff') format('woff'),
|
35 |
+
url('./fonts/LM-bold.ttf') format('truetype');
|
36 |
+
}
|
37 |
+
|
38 |
+
@font-face {
|
39 |
+
font-family: 'Latin Modern';
|
40 |
+
font-style: italic;
|
41 |
+
font-weight: bold;
|
42 |
+
font-display: swap;
|
43 |
+
src: url('./fonts/LM-bold-italic.woff2') format('woff2'),
|
44 |
+
url('./fonts/LM-bold-italic.woff') format('woff'),
|
45 |
+
url('./fonts/LM-bold-italic.ttf') format('truetype');
|
46 |
+
}
|
47 |
+
|
48 |
+
@font-face {
|
49 |
+
font-family: 'Libertinus';
|
50 |
+
font-style: normal;
|
51 |
+
font-weight: normal;
|
52 |
+
font-display: swap;
|
53 |
+
src: url('./fonts/Libertinus-regular.woff2') format('woff2');
|
54 |
+
}
|
55 |
+
|
56 |
+
@font-face {
|
57 |
+
font-family: 'Libertinus';
|
58 |
+
font-style: italic;
|
59 |
+
font-weight: normal;
|
60 |
+
font-display: swap;
|
61 |
+
src: url('./fonts/Libertinus-italic.woff2') format('woff2');
|
62 |
+
}
|
63 |
+
|
64 |
+
@font-face {
|
65 |
+
font-family: 'Libertinus';
|
66 |
+
font-style: normal;
|
67 |
+
font-weight: bold;
|
68 |
+
font-display: swap;
|
69 |
+
src: url('./fonts/Libertinus-bold.woff2') format('woff2');
|
70 |
+
}
|
71 |
+
|
72 |
+
@font-face {
|
73 |
+
font-family: 'Libertinus';
|
74 |
+
font-style: italic;
|
75 |
+
font-weight: bold;
|
76 |
+
font-display: swap;
|
77 |
+
src: url('./fonts/Libertinus-bold-italic.woff2') format('woff2');
|
78 |
+
}
|
79 |
+
|
80 |
+
@font-face {
|
81 |
+
font-family: 'Libertinus';
|
82 |
+
font-style: normal;
|
83 |
+
font-weight: 600;
|
84 |
+
font-display: swap;
|
85 |
+
src: url('./fonts/Libertinus-semibold.woff2') format('woff2');
|
86 |
+
}
|
87 |
+
|
88 |
+
@font-face {
|
89 |
+
font-family: 'Libertinus';
|
90 |
+
font-style: italic;
|
91 |
+
font-weight: 600;
|
92 |
+
font-display: swap;
|
93 |
+
src: url('./fonts/Libertinus-semibold-italic.woff2') format('woff2');
|
94 |
+
}
|
95 |
+
|
96 |
+
/* Box sizing rules */
|
97 |
+
*,
|
98 |
+
*::before,
|
99 |
+
*::after {
|
100 |
+
box-sizing: border-box;
|
101 |
+
}
|
102 |
+
|
103 |
+
:root {
|
104 |
+
--body-color: hsl(0, 5%, 10%);
|
105 |
+
--body-bg-color: hsl(210, 20%, 98%);
|
106 |
+
--link-visited: hsl(0, 100%, 33%);
|
107 |
+
--link-focus-outline: hsl(220, 90%, 52%);
|
108 |
+
--pre-bg-color: hsl(210, 28%, 93%);
|
109 |
+
--kbd-bg-color: hsl(210, 5%, 100%);
|
110 |
+
--kbd-border-color: hsl(210, 5%, 70%);
|
111 |
+
--table-border-color: black;
|
112 |
+
--border-width-thin: 1.36px;
|
113 |
+
--border-color-thin: rgba(0, 0, 0, 0.1);
|
114 |
+
--border-width-thick: 2.27px;
|
115 |
+
--sidenote-target-border-color: hsl(55, 55%, 70%);
|
116 |
+
--footnotes-border-color: hsl(0, 0%, 39%);
|
117 |
+
--text-indent-size: 1.463rem; /* In 12pt [Latin Modern font] LaTeX article
|
118 |
+
\parindent =~ 17.625pt; taking also into account the ratio
|
119 |
+
1pt[LaTeX] = (72 / 72.27) * 1pt[HTML], with default 12pt/1rem LaTeX.css font
|
120 |
+
size, the identation value in rem CSS units is:
|
121 |
+
\parindent =~ 17.625 * (72 / 72.27) / 12 = 1.463rem. */
|
122 |
+
}
|
123 |
+
|
124 |
+
.latex-dark {
|
125 |
+
--body-color: hsl(0, 0%, 86%);
|
126 |
+
--body-bg-color: hsl(0, 0%, 16%);
|
127 |
+
--link-visited: hsl(196 80% 77%);
|
128 |
+
--link-focus-outline: hsl(215, 63%, 73%);
|
129 |
+
--pre-bg-color: hsl(0, 1%, 25%);
|
130 |
+
--kbd-bg-color: hsl(0, 0%, 16%);
|
131 |
+
--kbd-border-color: hsl(210, 5%, 70%);
|
132 |
+
--table-border-color: white;
|
133 |
+
--sidenote-target-border-color: hsl(0, 0%, 59%);
|
134 |
+
--footnotes-border-color: hsl(0, 0%, 59%);
|
135 |
+
--proof-symbol-filter: invert(80%);
|
136 |
+
}
|
137 |
+
|
138 |
+
@media (prefers-color-scheme: dark) {
|
139 |
+
.latex-dark-auto {
|
140 |
+
--body-color: hsl(0, 0%, 86%);
|
141 |
+
--body-bg-color: hsl(0, 0%, 16%);
|
142 |
+
--link-visited: hsl(196 80% 77%);
|
143 |
+
--link-focus-outline: hsl(215, 63%, 73%);
|
144 |
+
--pre-bg-color: hsl(0, 1%, 25%);
|
145 |
+
--kbd-bg-color: hsl(0, 0%, 16%);
|
146 |
+
--kbd-border-color: hsl(210, 5%, 70%);
|
147 |
+
--table-border-color: white;
|
148 |
+
--sidenote-target-border-color: hsl(0, 0%, 59%);
|
149 |
+
--footnotes-border-color: hsl(0, 0%, 59%);
|
150 |
+
--proof-symbol-filter: invert(80%);
|
151 |
+
}
|
152 |
+
}
|
153 |
+
|
154 |
+
/* Remove default margin */
|
155 |
+
body,
|
156 |
+
h1,
|
157 |
+
h2,
|
158 |
+
h3,
|
159 |
+
h4,
|
160 |
+
p,
|
161 |
+
ul[class],
|
162 |
+
ol[class],
|
163 |
+
li,
|
164 |
+
figure,
|
165 |
+
figcaption,
|
166 |
+
dl,
|
167 |
+
dd {
|
168 |
+
margin: 0;
|
169 |
+
}
|
170 |
+
|
171 |
+
/* Make default font-size 1rem and add smooth scrolling to anchors */
|
172 |
+
html {
|
173 |
+
font-size: 1rem;
|
174 |
+
}
|
175 |
+
@media (prefers-reduced-motion: no-preference) {
|
176 |
+
html {
|
177 |
+
scroll-behavior: smooth;
|
178 |
+
}
|
179 |
+
}
|
180 |
+
|
181 |
+
body.libertinus {
|
182 |
+
font-family: 'Libertinus', Georgia, Cambria, 'Times New Roman', Times, serif;
|
183 |
+
}
|
184 |
+
|
185 |
+
.text-justify {
|
186 |
+
text-align: justify;
|
187 |
+
}
|
188 |
+
|
189 |
body {
|
190 |
+
font-family: 'Latin Modern', Georgia, Cambria, 'Times New Roman', Times, serif;
|
191 |
+
line-height: 1.8;
|
192 |
+
|
193 |
+
max-width: 60vw;
|
194 |
+
min-height: 100vh;
|
195 |
+
overflow-x: hidden;
|
196 |
+
margin: 0 auto;
|
197 |
+
padding: 2rem 1.25rem;
|
198 |
+
|
199 |
+
counter-reset: theorem definition sidenote-counter;
|
200 |
+
|
201 |
+
color: var(--body-color);
|
202 |
+
background-color: var(--body-bg-color);
|
203 |
+
|
204 |
+
text-rendering: optimizeLegibility;
|
205 |
+
|
206 |
+
/* Allow automatic hyphenation for all text in the document */
|
207 |
+
hyphens: auto;
|
208 |
+
-webkit-hyphens: auto;
|
209 |
+
-moz-hyphens: auto;
|
210 |
}
|
211 |
|
212 |
+
@media (min-width: 1050px) {
|
213 |
+
body {
|
214 |
+
max-width: 80vw;
|
215 |
+
}
|
216 |
+
}
|
217 |
+
|
218 |
+
@media (max-width: 768px) {
|
219 |
+
body {
|
220 |
+
max-width: 90vw;
|
221 |
+
}
|
222 |
}
|
223 |
|
224 |
p {
|
225 |
+
margin-top: 1rem;
|
226 |
+
}
|
227 |
+
|
228 |
+
/* Indents paragraphs like in LaTeX documents*/
|
229 |
+
.indent-pars p {
|
230 |
+
text-indent: var(--text-indent-size);
|
231 |
+
}
|
232 |
+
|
233 |
+
.indent-pars p.no-indent,
|
234 |
+
p.no-indent {
|
235 |
+
text-indent: 0;
|
236 |
+
}
|
237 |
+
|
238 |
+
.indent-pars ol p,
|
239 |
+
.indent-pars ul p {
|
240 |
+
text-indent: 0;
|
241 |
+
}
|
242 |
+
|
243 |
+
.indent-pars h2 + p,
|
244 |
+
.indent-pars h3 + p,
|
245 |
+
.indent-pars h4 + p {
|
246 |
+
text-indent: 0;
|
247 |
+
}
|
248 |
+
|
249 |
+
/* A elements that don't have a class get default styles */
|
250 |
+
a:not([class]) {
|
251 |
+
text-decoration-skip-ink: auto;
|
252 |
+
}
|
253 |
+
|
254 |
+
/* Make links red */
|
255 |
+
a,
|
256 |
+
a:visited {
|
257 |
+
color: var(--link-visited);
|
258 |
+
}
|
259 |
+
|
260 |
+
a:focus {
|
261 |
+
outline-offset: 2px;
|
262 |
+
outline: 2px solid var(--link-focus-outline);
|
263 |
+
}
|
264 |
+
|
265 |
+
/* Allow line breaks between any two characters */
|
266 |
+
.break-all {
|
267 |
+
/*
|
268 |
+
NOTE: Whith `break-all` value no hyphens are shown, even if the word breaks
|
269 |
+
at a hyphenation point
|
270 |
+
*/
|
271 |
+
word-break: break-all;
|
272 |
+
}
|
273 |
+
|
274 |
+
/* Make images easier to work with */
|
275 |
+
img {
|
276 |
+
max-width: 100%;
|
277 |
+
height: auto;
|
278 |
+
display: block;
|
279 |
+
}
|
280 |
+
|
281 |
+
audio {
|
282 |
+
display: block;
|
283 |
+
width: 100%;
|
284 |
+
margin: 0.7rem auto;
|
285 |
+
}
|
286 |
+
|
287 |
+
.audio-sm {
|
288 |
+
min-width: 190px;
|
289 |
+
}
|
290 |
+
|
291 |
+
.audio-md {
|
292 |
+
min-width: 220px;
|
293 |
+
}
|
294 |
+
|
295 |
+
.audio-lg {
|
296 |
+
min-width: 300px;
|
297 |
+
}
|
298 |
+
|
299 |
+
|
300 |
+
|
301 |
+
/* Inherit fonts for inputs and buttons */
|
302 |
+
input,
|
303 |
+
button,
|
304 |
+
textarea,
|
305 |
+
select {
|
306 |
+
font: inherit;
|
307 |
+
}
|
308 |
+
|
309 |
+
/* Prevent textarea from overflowing */
|
310 |
+
textarea {
|
311 |
+
width: 100%;
|
312 |
+
}
|
313 |
+
|
314 |
+
/* Natural flow and rhythm in articles by default */
|
315 |
+
article > * + * {
|
316 |
+
margin-top: 1em;
|
317 |
+
}
|
318 |
+
|
319 |
+
.article-block {
|
320 |
+
border-radius: 8px;
|
321 |
+
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1);
|
322 |
+
padding: 2rem;
|
323 |
+
margin: 3rem 0;
|
324 |
+
box-sizing: border-box;
|
325 |
+
|
326 |
+
}
|
327 |
+
|
328 |
+
.article-block h2 {
|
329 |
+
margin-top: 0 !important;
|
330 |
+
}
|
331 |
+
|
332 |
+
.article-block p {
|
333 |
+
font-size: 1.1rem;
|
334 |
+
line-height: 1.6;
|
335 |
+
}
|
336 |
+
|
337 |
+
/* Styles for inline code or code snippets */
|
338 |
+
code,
|
339 |
+
pre,
|
340 |
+
kbd {
|
341 |
+
font-family: Menlo, Monaco, Consolas, 'Liberation Mono', 'Courier New',
|
342 |
+
monospace;
|
343 |
+
font-size: 85%;
|
344 |
+
hyphens: none;
|
345 |
+
}
|
346 |
+
pre {
|
347 |
+
padding: 1rem 1.4rem;
|
348 |
+
max-width: 100%;
|
349 |
+
overflow: auto;
|
350 |
+
border-radius: 4px;
|
351 |
+
background: var(--pre-bg-color);
|
352 |
+
}
|
353 |
+
pre code {
|
354 |
+
font-size: 95%;
|
355 |
+
position: relative;
|
356 |
+
}
|
357 |
+
kbd {
|
358 |
+
background: var(--kbd-bg-color);
|
359 |
+
border: 1px solid var(--kbd-border-color);
|
360 |
+
border-radius: 2px;
|
361 |
+
padding: 2px 4px;
|
362 |
+
font-size: 75%;
|
363 |
+
}
|
364 |
+
|
365 |
+
/* Better tables */
|
366 |
+
table:not(.borders-custom) {
|
367 |
+
border-collapse: collapse;
|
368 |
+
border-spacing: 0;
|
369 |
+
width: auto;
|
370 |
+
max-width: 100%;
|
371 |
+
border-top: var(--border-width-thick) solid var(--table-border-color);
|
372 |
+
border-bottom: var(--border-width-thick) solid var(--table-border-color);
|
373 |
+
/* display: block; */
|
374 |
+
overflow-x: auto; /* does not work because element is not block */
|
375 |
+
/* white-space: nowrap; */
|
376 |
+
counter-increment: caption;
|
377 |
+
}
|
378 |
+
/* add bottom border on column table headings */
|
379 |
+
table:not(.borders-custom) tr > th[scope='col'] {
|
380 |
+
border-bottom: var(--border-width-thin) solid var(--table-border-color);
|
381 |
+
}
|
382 |
+
/* add right border on row table headings */
|
383 |
+
table:not(.borders-custom) tr > th[scope='row'] {
|
384 |
+
border-right: var(--border-width-thin) solid var(--table-border-color);
|
385 |
+
}
|
386 |
+
table:not(.borders-custom) > tbody > tr:first-child > td,
|
387 |
+
table:not(.borders-custom) > tbody > tr:first-child > th {
|
388 |
+
border-top: var(--border-width-thin) solid var(--table-border-color);
|
389 |
+
}
|
390 |
+
table:not(.borders-custom) > tbody > tr:last-child > td,
|
391 |
+
table:not(.borders-custom) > tbody > tr:last-child > th {
|
392 |
+
border-bottom: var(--border-width-thin) solid var(--table-border-color);
|
393 |
+
}
|
394 |
+
|
395 |
+
th,
|
396 |
+
td {
|
397 |
+
text-align: left;
|
398 |
+
padding: 0.5rem;
|
399 |
+
line-height: 1.1;
|
400 |
+
}
|
401 |
+
|
402 |
+
/* Table caption */
|
403 |
+
caption {
|
404 |
+
text-align: left;
|
405 |
+
font-size: 0.923em;
|
406 |
+
/* border-bottom: 2pt solid #000; */
|
407 |
+
padding: 0 0.25em 0.25em;
|
408 |
+
width: 100%;
|
409 |
+
margin-left: 0;
|
410 |
+
}
|
411 |
+
|
412 |
+
caption::before {
|
413 |
+
content: 'Table ' counter(caption) '. ';
|
414 |
+
font-weight: bold;
|
415 |
+
}abstract
|
416 |
+
|
417 |
+
/* allow scroll on the x-axis */
|
418 |
+
.scroll-wrapper, .article-block .scroll-wrapper {
|
419 |
+
overflow-x: auto;
|
420 |
+
width: 100%;
|
421 |
+
}
|
422 |
+
|
423 |
+
/* if a table is wrapped in a scroll wrapper,
|
424 |
+
the table cells shouldn't wrap */
|
425 |
+
.scroll-wrapper > table td, .article-block .scroll-wrapper > table td {
|
426 |
+
white-space: nowrap;
|
427 |
}
|
428 |
|
429 |
+
/* Table custom borders */
|
430 |
+
table.borders-custom {
|
431 |
+
border-collapse: collapse;
|
432 |
+
border-spacing: 0;
|
433 |
+
width: auto;
|
434 |
+
max-width: 100%;
|
435 |
+
overflow-x: auto;
|
436 |
+
counter-increment: caption;
|
437 |
}
|
438 |
|
439 |
+
.border-top-thick {
|
440 |
+
border-top: var(--border-width-thick) solid var(--table-border-color);
|
441 |
}
|
442 |
+
.border-right-thick {
|
443 |
+
border-right: var(--border-width-thick) solid var(--table-border-color);
|
444 |
+
}
|
445 |
+
.border-bottom-thick {
|
446 |
+
border-bottom: var(--border-width-thick) solid var(--table-border-color);
|
447 |
+
}
|
448 |
+
.border-left-thick {
|
449 |
+
border-left: var(--border-width-thick) solid var(--table-border-color);
|
450 |
+
}
|
451 |
+
|
452 |
+
.border-top-thin {
|
453 |
+
border-top: var(--border-width-thin) solid var(--table-border-color);
|
454 |
+
}
|
455 |
+
.border-right-thin {
|
456 |
+
border-right: var(--border-width-thin) solid var(--table-border-color);
|
457 |
+
}
|
458 |
+
.border-bottom-thin {
|
459 |
+
border-bottom: var(--border-width-thin) solid var(--border-color-thin);
|
460 |
+
}
|
461 |
+
.border-left-thin {
|
462 |
+
border-left: var(--border-width-thin) solid var(--table-border-color);
|
463 |
+
}
|
464 |
+
|
465 |
+
/* Table column alignment */
|
466 |
+
.col-1-l tr > :nth-child(1),
|
467 |
+
.col-2-l tr > :nth-child(2),
|
468 |
+
.col-3-l tr > :nth-child(3),
|
469 |
+
.col-4-l tr > :nth-child(4),
|
470 |
+
.col-5-l tr > :nth-child(5),
|
471 |
+
.col-6-l tr > :nth-child(6),
|
472 |
+
.col-7-l tr > :nth-child(7),
|
473 |
+
.col-8-l tr > :nth-child(8),
|
474 |
+
.col-9-l tr > :nth-child(9),
|
475 |
+
.col-10-l tr > :nth-child(10),
|
476 |
+
.col-11-l tr > :nth-child(11),
|
477 |
+
.col-12-l tr > :nth-child(12) {
|
478 |
+
text-align: left;
|
479 |
+
}
|
480 |
+
.col-1-c tr > :nth-child(1),
|
481 |
+
.col-2-c tr > :nth-child(2),
|
482 |
+
.col-3-c tr > :nth-child(3),
|
483 |
+
.col-4-c tr > :nth-child(4),
|
484 |
+
.col-5-c tr > :nth-child(5),
|
485 |
+
.col-6-c tr > :nth-child(6),
|
486 |
+
.col-7-c tr > :nth-child(7),
|
487 |
+
.col-8-c tr > :nth-child(8),
|
488 |
+
.col-9-c tr > :nth-child(9),
|
489 |
+
.col-10-c tr > :nth-child(10),
|
490 |
+
.col-11-c tr > :nth-child(11),
|
491 |
+
.col-12-c tr > :nth-child(12) {
|
492 |
+
text-align: center;
|
493 |
+
}
|
494 |
+
.col-1-r tr > :nth-child(1),
|
495 |
+
.col-2-r tr > :nth-child(2),
|
496 |
+
.col-3-r tr > :nth-child(3),
|
497 |
+
.col-4-r tr > :nth-child(4),
|
498 |
+
.col-5-r tr > :nth-child(5),
|
499 |
+
.col-6-r tr > :nth-child(6),
|
500 |
+
.col-7-r tr > :nth-child(7),
|
501 |
+
.col-8-r tr > :nth-child(8),
|
502 |
+
.col-9-r tr > :nth-child(9),
|
503 |
+
.col-10-r tr > :nth-child(10),
|
504 |
+
.col-11-r tr > :nth-child(11),
|
505 |
+
.col-12-r tr > :nth-child(12) {
|
506 |
+
text-align: right;
|
507 |
+
}
|
508 |
+
|
509 |
+
/* Format figure captions (based on table captions) */
|
510 |
+
figure {
|
511 |
+
counter-increment: figcaption;
|
512 |
+
}
|
513 |
+
figcaption {
|
514 |
+
text-align: left;
|
515 |
+
font-size: 0.923em;
|
516 |
+
padding: 0.25em 0.25em 0;
|
517 |
+
width: 100%;
|
518 |
+
margin-left: 0;
|
519 |
+
}
|
520 |
+
figcaption::before {
|
521 |
+
content: 'Figure ' counter(figcaption) '. ';
|
522 |
+
font-weight: bold;
|
523 |
+
}
|
524 |
+
|
525 |
+
/* Center align the title */
|
526 |
+
h1:first-child {
|
527 |
+
text-align: center;
|
528 |
+
}
|
529 |
+
|
530 |
+
/* Nested ordered list for ToC */
|
531 |
+
nav ol {
|
532 |
+
counter-reset: item;
|
533 |
+
padding-left: 2rem;
|
534 |
+
}
|
535 |
+
nav ol > li {
|
536 |
+
display: block;
|
537 |
+
}
|
538 |
+
nav ol > li::before {
|
539 |
+
content: counters(item, '.') ' ';
|
540 |
+
counter-increment: item;
|
541 |
+
padding-right: 0.85rem;
|
542 |
+
}
|
543 |
+
|
544 |
+
/* Center definitions (most useful for display equations) */
|
545 |
+
dl dd {
|
546 |
+
text-align: center;
|
547 |
+
}
|
548 |
+
|
549 |
+
/* Theorem */
|
550 |
+
.theorem {
|
551 |
+
counter-increment: theorem;
|
552 |
+
display: block;
|
553 |
+
margin: 12px 0;
|
554 |
+
font-style: italic;
|
555 |
+
}
|
556 |
+
.theorem::before {
|
557 |
+
content: 'Theorem ' counter(theorem) '. ';
|
558 |
+
font-weight: bold;
|
559 |
+
font-style: normal;
|
560 |
+
}
|
561 |
+
|
562 |
+
/* Lemma */
|
563 |
+
.lemma {
|
564 |
+
counter-increment: theorem;
|
565 |
+
display: block;
|
566 |
+
margin: 12px 0;
|
567 |
+
font-style: italic;
|
568 |
+
}
|
569 |
+
.lemma::before {
|
570 |
+
content: 'Lemma ' counter(theorem) '. ';
|
571 |
+
font-weight: bold;
|
572 |
+
font-style: normal;
|
573 |
+
}
|
574 |
+
|
575 |
+
/* Proof */
|
576 |
+
.proof {
|
577 |
+
display: block;
|
578 |
+
margin: 12px 0;
|
579 |
+
font-style: normal;
|
580 |
+
position: relative;
|
581 |
+
}
|
582 |
+
.proof::before {
|
583 |
+
content: 'Proof. ' attr(title);
|
584 |
+
font-style: italic;
|
585 |
+
}
|
586 |
+
.proof::after {
|
587 |
+
content: '◾️';
|
588 |
+
filter: var(--proof-symbol-filter);
|
589 |
+
position: absolute;
|
590 |
+
right: -12px;
|
591 |
+
bottom: -2px;
|
592 |
+
}
|
593 |
+
|
594 |
+
/* Definition */
|
595 |
+
.definition {
|
596 |
+
counter-increment: definition;
|
597 |
+
display: block;
|
598 |
+
margin: 12px 0;
|
599 |
+
font-style: normal;
|
600 |
+
}
|
601 |
+
.definition::before {
|
602 |
+
content: 'Definition ' counter(definition) '. ';
|
603 |
+
font-weight: bold;
|
604 |
+
font-style: normal;
|
605 |
+
}
|
606 |
+
|
607 |
+
/* Center align author name, use small caps and add vertical spacing */
|
608 |
+
.author {
|
609 |
+
margin: 0.85rem 0;
|
610 |
+
text-align: center;
|
611 |
+
}
|
612 |
+
|
613 |
+
.date {
|
614 |
+
margin-left: 1.5rem;
|
615 |
+
}
|
616 |
+
|
617 |
+
/* Sidenotes */
|
618 |
+
|
619 |
+
.sidenote {
|
620 |
+
font-size: 0.8rem;
|
621 |
+
float: right;
|
622 |
+
clear: right;
|
623 |
+
width: 18vw;
|
624 |
+
margin-right: -20vw;
|
625 |
+
margin-bottom: 1em;
|
626 |
+
text-indent: 0;
|
627 |
+
/* Right sidenotes explicitly aligned to left for wide screens */
|
628 |
+
text-align: left;
|
629 |
+
}
|
630 |
+
|
631 |
+
.sidenote.left {
|
632 |
+
float: left;
|
633 |
+
margin-left: -20vw;
|
634 |
+
margin-bottom: 1em;
|
635 |
+
text-indent: 0;
|
636 |
+
/* Left sidenotes explicitly aligned to right for wide screens */
|
637 |
+
text-align: right;
|
638 |
+
}
|
639 |
+
|
640 |
+
/* Justify all sidenotes for wide screens when `text-justify` class is used */
|
641 |
+
body.text-justify .sidenote,
|
642 |
+
body.text-justify .sidenote.left {
|
643 |
+
text-align: justify;
|
644 |
+
}
|
645 |
+
|
646 |
+
/* (WIP) add border when a sidenote is clicked on */
|
647 |
+
.sidenote:target {
|
648 |
+
border: var(--sidenote-target-border-color) 1.5px solid;
|
649 |
+
padding: 0 .5rem;
|
650 |
+
scroll-margin-block-start: 10rem;
|
651 |
+
}
|
652 |
+
|
653 |
+
/* sidenote counter */
|
654 |
+
.sidenote-number {
|
655 |
+
counter-increment: sidenote-counter;
|
656 |
+
}
|
657 |
+
|
658 |
+
.sidenote-number::after,
|
659 |
+
.sidenote::before {
|
660 |
+
position: relative;
|
661 |
+
vertical-align: baseline;
|
662 |
+
}
|
663 |
+
|
664 |
+
/* add number in main content */
|
665 |
+
.sidenote-number::after {
|
666 |
+
content: counter(sidenote-counter);
|
667 |
+
font-size: 0.7rem;
|
668 |
+
top: -0.5rem;
|
669 |
+
left: 0.1rem;
|
670 |
+
}
|
671 |
+
|
672 |
+
/* add number in front of the sidenote */
|
673 |
+
.sidenote-number ~ .sidenote::before {
|
674 |
+
content: counter(sidenote-counter) ' ';
|
675 |
+
font-size: 0.7rem;
|
676 |
+
top: -0.5rem;
|
677 |
+
}
|
678 |
+
|
679 |
+
label.sidenote-toggle:not(.sidenote-number) {
|
680 |
+
display: none;
|
681 |
+
}
|
682 |
+
|
683 |
+
/* sidenotes inside blockquotes are indented more */
|
684 |
+
blockquote .sidenote {
|
685 |
+
margin-right: -24vw;
|
686 |
+
width: 18vw;
|
687 |
+
}
|
688 |
+
|
689 |
+
|
690 |
+
label.sidenote-toggle {
|
691 |
+
display: inline;
|
692 |
+
cursor: pointer;
|
693 |
+
}
|
694 |
+
|
695 |
+
input.sidenote-toggle {
|
696 |
+
display: none;
|
697 |
+
}
|
698 |
+
|
699 |
+
@media (max-width: 1050px) {
|
700 |
+
label.sidenote-toggle:not(.sidenote-number) {
|
701 |
+
display: inline;
|
702 |
+
}
|
703 |
+
.sidenote {
|
704 |
+
display: none;
|
705 |
+
}
|
706 |
+
.sidenote-toggle:checked + .sidenote {
|
707 |
+
display: block;
|
708 |
+
margin: 0.5rem 1.25rem 1rem 0.5rem;
|
709 |
+
float: left;
|
710 |
+
left: 1rem;
|
711 |
+
clear: both;
|
712 |
+
width: 95%;
|
713 |
+
}
|
714 |
+
|
715 |
+
/* All sidenotes explicitly aligned to left for narrow screens */
|
716 |
+
.sidenote-toggle:checked + .sidenote.left {
|
717 |
+
text-align: left;
|
718 |
+
}
|
719 |
+
|
720 |
+
/* Justify all sidenotes for narrow screens when `text-justify` class is used */
|
721 |
+
body.text-justify .sidenote-toggle:checked + .sidenote.left {
|
722 |
+
text-align: justify;
|
723 |
+
}
|
724 |
+
|
725 |
+
/* tweak indentation of sidenote inside a blockquote */
|
726 |
+
blockquote .sidenote {
|
727 |
+
margin-right: -25vw;
|
728 |
+
width: 16vw;
|
729 |
+
}
|
730 |
+
}
|
731 |
+
|
732 |
+
/* Make footnote text smaller */
|
733 |
+
.footnotes p {
|
734 |
+
line-height: 1.5;
|
735 |
+
font-size: 85%;
|
736 |
+
margin-bottom: 0.4rem;
|
737 |
+
}
|
738 |
+
.footnotes {
|
739 |
+
border-top: 1px solid var(--footnotes-border-color);
|
740 |
+
}
|
741 |
+
|
742 |
+
/* Center title and paragraph */
|
743 |
+
.abstract,
|
744 |
+
.abstract p {
|
745 |
+
text-align: center;
|
746 |
+
margin-top: 0;
|
747 |
+
}
|
748 |
+
.abstract {
|
749 |
+
margin: 2.25rem 0;
|
750 |
+
}
|
751 |
+
.abstract > h2 {
|
752 |
+
font-size: 1rem;
|
753 |
+
margin-bottom: -0.2rem;
|
754 |
+
}
|
755 |
+
|
756 |
+
/* Format the LaTeX symbol correctly (a higher up, e lower) */
|
757 |
+
.latex span:nth-child(1) {
|
758 |
+
text-transform: uppercase;
|
759 |
+
font-size: 0.75em;
|
760 |
+
vertical-align: 0.28em;
|
761 |
+
margin-left: -0.48em;
|
762 |
+
margin-right: -0.15em;
|
763 |
+
line-height: 1ex;
|
764 |
+
}
|
765 |
+
|
766 |
+
.latex span:nth-child(2) {
|
767 |
+
text-transform: uppercase;
|
768 |
+
vertical-align: -0.5ex;
|
769 |
+
margin-left: -0.1667em;
|
770 |
+
margin-right: -0.125em;
|
771 |
+
line-height: 1ex;
|
772 |
+
}
|
773 |
+
|
774 |
+
/* Prevent line breaks inside an element */
|
775 |
+
.whitespace-nowrap {
|
776 |
+
white-space: nowrap;
|
777 |
+
}
|
778 |
+
|
779 |
+
/* Heading typography */
|
780 |
+
h1 {
|
781 |
+
font-size: 2.5rem;
|
782 |
+
line-height: 3.25rem;
|
783 |
+
margin-bottom: 1.625rem;
|
784 |
+
}
|
785 |
+
|
786 |
+
h2 {
|
787 |
+
font-size: 1.8rem;
|
788 |
+
line-height: 2.1rem;
|
789 |
+
margin-top: 4rem;
|
790 |
+
}
|
791 |
+
|
792 |
+
h3 {
|
793 |
+
font-size: 1.4rem;
|
794 |
+
margin-top: 3rem;
|
795 |
+
}
|
796 |
+
|
797 |
+
h4 {
|
798 |
+
font-size: 1.2rem;
|
799 |
+
margin-top: 2rem;
|
800 |
+
}
|
801 |
+
|
802 |
+
h5 {
|
803 |
+
font-size: 1rem;
|
804 |
+
margin-top: 1.8rem;
|
805 |
+
}
|
806 |
+
|
807 |
+
h6 {
|
808 |
+
font-size: 1rem;
|
809 |
+
font-style: italic;
|
810 |
+
font-weight: normal;
|
811 |
+
margin-top: 2.5rem;
|
812 |
+
}
|
813 |
+
|
814 |
+
h3,
|
815 |
+
h4,
|
816 |
+
h5,
|
817 |
+
h6 {
|
818 |
+
line-height: 1.625rem;
|
819 |
+
}
|
820 |
+
|
821 |
+
h1 + h2 {
|
822 |
+
margin-top: 1.625rem;
|
823 |
+
}
|
824 |
+
|
825 |
+
h2 + h3,
|
826 |
+
h3 + h4,
|
827 |
+
h4 + h5 {
|
828 |
+
margin-top: 1.4rem;
|
829 |
+
}
|
830 |
+
|
831 |
+
h5 + h6 {
|
832 |
+
margin-top: -0.8rem;
|
833 |
+
}
|
834 |
+
|
835 |
+
h2,
|
836 |
+
h3,
|
837 |
+
h4,
|
838 |
+
h5,
|
839 |
+
h6 {
|
840 |
+
margin-bottom: 1rem;
|
841 |
+
}
|