Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -153,85 +153,85 @@ with iface:
|
|
153 |
</h1>
|
154 |
"""
|
155 |
)
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
)
|
165 |
-
negative_textbox = gr.Textbox(
|
166 |
-
value="low quality, average quality",
|
167 |
-
max_lines=1,
|
168 |
-
label="Negative prompt",
|
169 |
-
info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
|
170 |
-
elem_id="prompt-in",
|
171 |
-
)
|
172 |
-
|
173 |
-
with gr.Accordion("Click to modify detailed configurations", open=False):
|
174 |
-
seed = gr.Number(
|
175 |
-
value=45,
|
176 |
-
label="Seed",
|
177 |
-
info="Change this value (any integer number) will lead to a different generation result.",
|
178 |
)
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
label="Guidance scale",
|
186 |
-
info="Large => better quality and relevancy to text; Small => better diversity",
|
187 |
)
|
188 |
-
n_candidates = gr.Slider(
|
189 |
-
1,
|
190 |
-
3,
|
191 |
-
value=3,
|
192 |
-
step=1,
|
193 |
-
label="Number waveforms to generate",
|
194 |
-
info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
|
195 |
-
)
|
196 |
-
|
197 |
-
outputs = gr.Video(label="Output", elem_id="output-video")
|
198 |
-
btn = gr.Button("Submit").style(full_width=True)
|
199 |
-
|
200 |
-
with gr.Group(elem_id="share-btn-container", visible=False):
|
201 |
-
community_icon = gr.HTML(community_icon_html)
|
202 |
-
loading_icon = gr.HTML(loading_icon_html)
|
203 |
-
share_button = gr.Button("Share to community", elem_id="share-btn")
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
"""
|
237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
</h1>
|
154 |
"""
|
155 |
)
|
156 |
+
with gr.Group():
|
157 |
+
with gr.Box():
|
158 |
+
textbox = gr.Textbox(
|
159 |
+
value="A hammer is hitting a wooden surface",
|
160 |
+
max_lines=1,
|
161 |
+
label="Input text",
|
162 |
+
info="Your text is important for the audio quality. Please ensure it is descriptive by using more adjectives.",
|
163 |
+
elem_id="prompt-in",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
)
|
165 |
+
negative_textbox = gr.Textbox(
|
166 |
+
value="low quality, average quality",
|
167 |
+
max_lines=1,
|
168 |
+
label="Negative prompt",
|
169 |
+
info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
|
170 |
+
elem_id="prompt-in",
|
|
|
|
|
171 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
+
with gr.Accordion("Click to modify detailed configurations", open=False):
|
174 |
+
seed = gr.Number(
|
175 |
+
value=45,
|
176 |
+
label="Seed",
|
177 |
+
info="Change this value (any integer number) will lead to a different generation result.",
|
178 |
+
)
|
179 |
+
duration = gr.Slider(2.5, 10, value=5, step=2.5, label="Duration (seconds)")
|
180 |
+
guidance_scale = gr.Slider(
|
181 |
+
0,
|
182 |
+
5,
|
183 |
+
value=3.5,
|
184 |
+
step=0.5,
|
185 |
+
label="Guidance scale",
|
186 |
+
info="Large => better quality and relevancy to text; Small => better diversity",
|
187 |
+
)
|
188 |
+
n_candidates = gr.Slider(
|
189 |
+
1,
|
190 |
+
3,
|
191 |
+
value=3,
|
192 |
+
step=1,
|
193 |
+
label="Number waveforms to generate",
|
194 |
+
info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
|
195 |
+
)
|
196 |
+
|
197 |
+
outputs = gr.Video(label="Output", elem_id="output-video")
|
198 |
+
btn = gr.Button("Submit").style(full_width=True)
|
199 |
+
|
200 |
+
with gr.Group(elem_id="share-btn-container", visible=False):
|
201 |
+
community_icon = gr.HTML(community_icon_html)
|
202 |
+
loading_icon = gr.HTML(loading_icon_html)
|
203 |
+
share_button = gr.Button("Share to community", elem_id="share-btn")
|
204 |
+
|
205 |
+
btn.click(
|
206 |
+
text2audio,
|
207 |
+
inputs=[textbox, negative_textbox, duration, guidance_scale, seed, n_candidates],
|
208 |
+
outputs=[outputs],
|
209 |
+
)
|
210 |
+
|
211 |
+
share_button.click(None, [], [], _js=share_js)
|
212 |
+
gr.HTML(
|
213 |
+
gr.Examples(
|
214 |
+
[
|
215 |
+
["A hammer is hitting a wooden surface", "low quality, average quality", 5, 2.5, 45, 3],
|
216 |
+
["Peaceful and calming ambient music with singing bowl and other instruments.", "low quality, average quality", 5, 2.5, 45, 3],
|
217 |
+
["A man is speaking in a small room.", "low quality, average quality", 5, 2.5, 45, 3],
|
218 |
+
["A female is speaking followed by footstep sound", "low quality, average quality", 5, 2.5, 45, 3],
|
219 |
+
["Wooden table tapping sound followed by water pouring sound.", "low quality, average quality", 5, 2.5, 45, 3],
|
220 |
+
],
|
221 |
+
fn=text2audio,
|
222 |
+
inputs=[textbox, negative_textbox, duration, guidance_scale, seed, n_candidates],
|
223 |
+
outputs=[outputs],
|
224 |
+
cache_examples=True,
|
225 |
+
)
|
226 |
+
gr.HTML(
|
227 |
"""
|
228 |
+
<div class="acknowledgements"> <p>Essential Tricks for Enhancing the Quality of Your Generated
|
229 |
+
Audio</p> <p>1. Try to use more adjectives to describe your sound. For example: "A man is speaking
|
230 |
+
clearly and slowly in a large room" is better than "A man is speaking". This can make sure AudioLDM
|
231 |
+
understands what you want.</p> <p>2. Try to use different random seeds, which can affect the generation
|
232 |
+
quality significantly sometimes.</p> <p>3. It's better to use general terms like 'man' or 'woman'
|
233 |
+
instead of specific names for individuals or abstract objects that humans may not be familiar with,
|
234 |
+
such as 'mummy'.</p> <p>4. Using a negative prompt to not guide the diffusion process can improve the
|
235 |
+
audio quality significantly. Try using negative prompts like 'low quality'.</p> </div>
|
236 |
+
"""
|
237 |
+
)
|