ak0601 commited on
Commit
da666f1
·
verified ·
1 Parent(s): e605e02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -78
app.py CHANGED
@@ -153,85 +153,85 @@ with iface:
153
  </h1>
154
  """
155
  )
156
- with gr.Group():
157
- with gr.Box():
158
- textbox = gr.Textbox(
159
- value="A hammer is hitting a wooden surface",
160
- max_lines=1,
161
- label="Input text",
162
- info="Your text is important for the audio quality. Please ensure it is descriptive by using more adjectives.",
163
- elem_id="prompt-in",
164
- )
165
- negative_textbox = gr.Textbox(
166
- value="low quality, average quality",
167
- max_lines=1,
168
- label="Negative prompt",
169
- info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
170
- elem_id="prompt-in",
171
- )
172
-
173
- with gr.Accordion("Click to modify detailed configurations", open=False):
174
- seed = gr.Number(
175
- value=45,
176
- label="Seed",
177
- info="Change this value (any integer number) will lead to a different generation result.",
178
  )
179
- duration = gr.Slider(2.5, 10, value=5, step=2.5, label="Duration (seconds)")
180
- guidance_scale = gr.Slider(
181
- 0,
182
- 5,
183
- value=3.5,
184
- step=0.5,
185
- label="Guidance scale",
186
- info="Large => better quality and relevancy to text; Small => better diversity",
187
  )
188
- n_candidates = gr.Slider(
189
- 1,
190
- 3,
191
- value=3,
192
- step=1,
193
- label="Number waveforms to generate",
194
- info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
195
- )
196
-
197
- outputs = gr.Video(label="Output", elem_id="output-video")
198
- btn = gr.Button("Submit").style(full_width=True)
199
-
200
- with gr.Group(elem_id="share-btn-container", visible=False):
201
- community_icon = gr.HTML(community_icon_html)
202
- loading_icon = gr.HTML(loading_icon_html)
203
- share_button = gr.Button("Share to community", elem_id="share-btn")
204
 
205
- btn.click(
206
- text2audio,
207
- inputs=[textbox, negative_textbox, duration, guidance_scale, seed, n_candidates],
208
- outputs=[outputs],
209
- )
210
-
211
- share_button.click(None, [], [], _js=share_js)
212
- gr.HTML(
213
- gr.Examples(
214
- [
215
- ["A hammer is hitting a wooden surface", "low quality, average quality", 5, 2.5, 45, 3],
216
- ["Peaceful and calming ambient music with singing bowl and other instruments.", "low quality, average quality", 5, 2.5, 45, 3],
217
- ["A man is speaking in a small room.", "low quality, average quality", 5, 2.5, 45, 3],
218
- ["A female is speaking followed by footstep sound", "low quality, average quality", 5, 2.5, 45, 3],
219
- ["Wooden table tapping sound followed by water pouring sound.", "low quality, average quality", 5, 2.5, 45, 3],
220
- ],
221
- fn=text2audio,
222
- inputs=[textbox, negative_textbox, duration, guidance_scale, seed, n_candidates],
223
- outputs=[outputs],
224
- cache_examples=True,
225
- )
226
- gr.HTML(
227
- """
228
- <div class="acknowledgements"> <p>Essential Tricks for Enhancing the Quality of Your Generated
229
- Audio</p> <p>1. Try to use more adjectives to describe your sound. For example: "A man is speaking
230
- clearly and slowly in a large room" is better than "A man is speaking". This can make sure AudioLDM
231
- understands what you want.</p> <p>2. Try to use different random seeds, which can affect the generation
232
- quality significantly sometimes.</p> <p>3. It's better to use general terms like 'man' or 'woman'
233
- instead of specific names for individuals or abstract objects that humans may not be familiar with,
234
- such as 'mummy'.</p> <p>4. Using a negative prompt to not guide the diffusion process can improve the
235
- audio quality significantly. Try using negative prompts like 'low quality'.</p> </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  """
237
- )
 
 
 
 
 
 
 
 
 
 
153
  </h1>
154
  """
155
  )
156
+ with gr.Group():
157
+ with gr.Box():
158
+ textbox = gr.Textbox(
159
+ value="A hammer is hitting a wooden surface",
160
+ max_lines=1,
161
+ label="Input text",
162
+ info="Your text is important for the audio quality. Please ensure it is descriptive by using more adjectives.",
163
+ elem_id="prompt-in",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  )
165
+ negative_textbox = gr.Textbox(
166
+ value="low quality, average quality",
167
+ max_lines=1,
168
+ label="Negative prompt",
169
+ info="Enter a negative prompt not to guide the audio generation. Selecting appropriate negative prompts can improve the audio quality significantly.",
170
+ elem_id="prompt-in",
 
 
171
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
+ with gr.Accordion("Click to modify detailed configurations", open=False):
174
+ seed = gr.Number(
175
+ value=45,
176
+ label="Seed",
177
+ info="Change this value (any integer number) will lead to a different generation result.",
178
+ )
179
+ duration = gr.Slider(2.5, 10, value=5, step=2.5, label="Duration (seconds)")
180
+ guidance_scale = gr.Slider(
181
+ 0,
182
+ 5,
183
+ value=3.5,
184
+ step=0.5,
185
+ label="Guidance scale",
186
+ info="Large => better quality and relevancy to text; Small => better diversity",
187
+ )
188
+ n_candidates = gr.Slider(
189
+ 1,
190
+ 3,
191
+ value=3,
192
+ step=1,
193
+ label="Number waveforms to generate",
194
+ info="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation",
195
+ )
196
+
197
+ outputs = gr.Video(label="Output", elem_id="output-video")
198
+ btn = gr.Button("Submit").style(full_width=True)
199
+
200
+ with gr.Group(elem_id="share-btn-container", visible=False):
201
+ community_icon = gr.HTML(community_icon_html)
202
+ loading_icon = gr.HTML(loading_icon_html)
203
+ share_button = gr.Button("Share to community", elem_id="share-btn")
204
+
205
+ btn.click(
206
+ text2audio,
207
+ inputs=[textbox, negative_textbox, duration, guidance_scale, seed, n_candidates],
208
+ outputs=[outputs],
209
+ )
210
+
211
+ share_button.click(None, [], [], _js=share_js)
212
+ gr.HTML(
213
+ gr.Examples(
214
+ [
215
+ ["A hammer is hitting a wooden surface", "low quality, average quality", 5, 2.5, 45, 3],
216
+ ["Peaceful and calming ambient music with singing bowl and other instruments.", "low quality, average quality", 5, 2.5, 45, 3],
217
+ ["A man is speaking in a small room.", "low quality, average quality", 5, 2.5, 45, 3],
218
+ ["A female is speaking followed by footstep sound", "low quality, average quality", 5, 2.5, 45, 3],
219
+ ["Wooden table tapping sound followed by water pouring sound.", "low quality, average quality", 5, 2.5, 45, 3],
220
+ ],
221
+ fn=text2audio,
222
+ inputs=[textbox, negative_textbox, duration, guidance_scale, seed, n_candidates],
223
+ outputs=[outputs],
224
+ cache_examples=True,
225
+ )
226
+ gr.HTML(
227
  """
228
+ <div class="acknowledgements"> <p>Essential Tricks for Enhancing the Quality of Your Generated
229
+ Audio</p> <p>1. Try to use more adjectives to describe your sound. For example: "A man is speaking
230
+ clearly and slowly in a large room" is better than "A man is speaking". This can make sure AudioLDM
231
+ understands what you want.</p> <p>2. Try to use different random seeds, which can affect the generation
232
+ quality significantly sometimes.</p> <p>3. It's better to use general terms like 'man' or 'woman'
233
+ instead of specific names for individuals or abstract objects that humans may not be familiar with,
234
+ such as 'mummy'.</p> <p>4. Using a negative prompt to not guide the diffusion process can improve the
235
+ audio quality significantly. Try using negative prompts like 'low quality'.</p> </div>
236
+ """
237
+ )