Bils commited on
Commit
b87869d
·
verified ·
1 Parent(s): bcd1e5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -47
app.py CHANGED
@@ -50,7 +50,6 @@ def get_llama_pipeline(model_id: str, token: str):
50
  """
51
  if model_id in LLAMA_PIPELINES:
52
  return LLAMA_PIPELINES[model_id]
53
-
54
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
55
  model = AutoModelForCausalLM.from_pretrained(
56
  model_id,
@@ -63,7 +62,6 @@ def get_llama_pipeline(model_id: str, token: str):
63
  LLAMA_PIPELINES[model_id] = text_pipeline
64
  return text_pipeline
65
 
66
-
67
  def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
68
  """
69
  Returns a cached MusicGen model if available; otherwise, loads it.
@@ -71,7 +69,6 @@ def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
71
  """
72
  if model_key in MUSICGEN_MODELS:
73
  return MUSICGEN_MODELS[model_key]
74
-
75
  model = MusicgenForConditionalGeneration.from_pretrained(model_key)
76
  processor = AutoProcessor.from_pretrained(model_key)
77
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -79,19 +76,16 @@ def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
79
  MUSICGEN_MODELS[model_key] = (model, processor)
80
  return model, processor
81
 
82
-
83
  def get_tts_model(model_name: str = "tts_models/en/ljspeech/tacotron2-DDC"):
84
  """
85
  Returns a cached TTS model if available; otherwise, loads it.
86
  """
87
  if model_name in TTS_MODELS:
88
  return TTS_MODELS[model_name]
89
-
90
  tts_model = TTS(model_name)
91
  TTS_MODELS[model_name] = tts_model
92
  return tts_model
93
 
94
-
95
  # ---------------------------------------------------------------------
96
  # Script Generation Function
97
  # ---------------------------------------------------------------------
@@ -125,7 +119,7 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
125
  voice_script = "No voice-over script found."
126
  sound_design = "No sound design suggestions found."
127
  music_suggestions = "No music suggestions found."
128
- # Voice-Over Script
129
  if "Voice-Over Script:" in generated_text:
130
  parts = generated_text.split("Voice-Over Script:")
131
  voice_script_part = parts[1]
@@ -133,7 +127,6 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
133
  voice_script = voice_script_part.split("Sound Design Suggestions:")[0].strip()
134
  else:
135
  voice_script = voice_script_part.strip()
136
- # Sound Design
137
  if "Sound Design Suggestions:" in generated_text:
138
  parts = generated_text.split("Sound Design Suggestions:")
139
  sound_design_part = parts[1]
@@ -141,7 +134,6 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
141
  sound_design = sound_design_part.split("Music Suggestions:")[0].strip()
142
  else:
143
  sound_design = sound_design_part.strip()
144
- # Music Suggestions
145
  if "Music Suggestions:" in generated_text:
146
  parts = generated_text.split("Music Suggestions:")
147
  music_suggestions = parts[1].strip()
@@ -149,7 +141,6 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
149
  except Exception as e:
150
  return f"Error generating script: {e}", "", ""
151
 
152
-
153
  # ---------------------------------------------------------------------
154
  # Ad Promo Idea Generation Function
155
  # ---------------------------------------------------------------------
@@ -181,7 +172,6 @@ def generate_ad_promo_idea(user_prompt: str, model_id: str, token: str):
181
  except Exception as e:
182
  return f"Error generating ad promo idea: {e}"
183
 
184
-
185
  # ---------------------------------------------------------------------
186
  # Voice-Over Generation Function
187
  # ---------------------------------------------------------------------
@@ -202,7 +192,6 @@ def generate_voice(script: str, tts_model_name: str = "tts_models/en/ljspeech/ta
202
  except Exception as e:
203
  return f"Error generating voice: {e}"
204
 
205
-
206
  # ---------------------------------------------------------------------
207
  # Music Generation Function
208
  # ---------------------------------------------------------------------
@@ -229,7 +218,6 @@ def generate_music(prompt: str, audio_length: int):
229
  except Exception as e:
230
  return f"Error generating music: {e}"
231
 
232
-
233
  # ---------------------------------------------------------------------
234
  # Audio Blending with Duration Sync & Ducking
235
  # ---------------------------------------------------------------------
@@ -264,7 +252,6 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
264
  except Exception as e:
265
  return f"Error blending audio: {e}"
266
 
267
-
268
  # ---------------------------------------------------------------------
269
  # Gradio Interface with Enhanced UI
270
  # ---------------------------------------------------------------------
@@ -284,19 +271,23 @@ with gr.Blocks(css="""
284
  }
285
  .header h1 {
286
  margin: 0;
287
- font-size: 2.5rem;
288
  }
289
  .header p {
290
  font-size: 1.2rem;
291
  }
 
 
 
 
 
 
 
292
  .gradio-container {
293
  background: #2e2e2e;
294
  border-radius: 10px;
295
  padding: 1rem;
296
- }
297
- .tab-title {
298
- font-size: 1.1rem;
299
- font-weight: bold;
300
  }
301
  .footer {
302
  text-align: center;
@@ -305,6 +296,11 @@ with gr.Blocks(css="""
305
  padding: 1rem;
306
  color: #cccccc;
307
  }
 
 
 
 
 
308
  """) as demo:
309
 
310
  # Custom Header
@@ -315,22 +311,23 @@ with gr.Blocks(css="""
315
  """)
316
 
317
  gr.Markdown("""
318
- Welcome to **AI Ads Promo (Demo MVP)**! This platform leverages state-of-the-art AI models to help you generate:
319
-
320
- - **Ad Promo Ideas**: Generate creative ad concepts.
321
- - **Script**: Produce a compelling voice-over script with LLaMA.
322
- - **Voice Synthesis**: Create natural-sounding voice-overs using Coqui TTS.
323
- - **Music Production**: Generate custom music tracks with MusicGen.
324
- - **Audio Blending**: Seamlessly combine voice and music with ducking options.
325
  """)
326
 
327
  with gr.Tabs():
328
- # New Tab: Generate Ad Promo Idea
329
  with gr.Tab("💡 Ad Promo Idea"):
 
330
  with gr.Row():
331
  ad_concept = gr.Textbox(
332
  label="Ad Concept",
333
- placeholder="Enter your ad concept or idea...",
334
  lines=2
335
  )
336
  with gr.Row():
@@ -339,20 +336,24 @@ with gr.Blocks(css="""
339
  value="meta-llama/Meta-Llama-3-8B-Instruct",
340
  placeholder="Enter a valid Hugging Face model ID"
341
  )
342
- generate_ad_idea_button = gr.Button("Generate Ad Promo Idea", variant="primary")
 
 
343
  ad_idea_output = gr.Textbox(label="Generated Ad Promo Idea", lines=5, interactive=False)
344
  generate_ad_idea_button.click(
345
  fn=lambda concept, model_id: generate_ad_promo_idea(concept, model_id, HF_TOKEN),
346
  inputs=[ad_concept, llama_model_id_idea],
347
  outputs=ad_idea_output
348
  )
 
349
 
350
- # Step 1: Generate Script
351
  with gr.Tab("📝 Script Generation"):
 
352
  with gr.Row():
353
  user_prompt = gr.Textbox(
354
  label="Promo Idea",
355
- placeholder="E.g., A 30-second promo for a morning show...",
356
  lines=2
357
  )
358
  with gr.Row():
@@ -368,19 +369,22 @@ with gr.Blocks(css="""
368
  step=15,
369
  value=30
370
  )
371
- generate_script_button = gr.Button("Generate Script", variant="primary")
372
- script_output = gr.Textbox(label="Generated Voice-Over Script", lines=5, interactive=False)
 
 
373
  sound_design_output = gr.Textbox(label="Sound Design Suggestions", lines=3, interactive=False)
374
  music_suggestion_output = gr.Textbox(label="Music Suggestions", lines=3, interactive=False)
375
  generate_script_button.click(
376
  fn=lambda user_prompt, model_id, dur: generate_script(user_prompt, model_id, HF_TOKEN, dur),
377
  inputs=[user_prompt, llama_model_id, duration],
378
- outputs=[script_output, sound_design_output, music_suggestion_output],
379
  )
 
380
 
381
- # Step 2: Generate Voice
382
  with gr.Tab("🎤 Voice Synthesis"):
383
- gr.Markdown("Generate a natural-sounding voice-over using Coqui TTS.")
384
  selected_tts_model = gr.Dropdown(
385
  label="TTS Model",
386
  choices=[
@@ -391,17 +395,19 @@ with gr.Blocks(css="""
391
  value="tts_models/en/ljspeech/tacotron2-DDC",
392
  multiselect=False
393
  )
394
- generate_voice_button = gr.Button("Generate Voice-Over", variant="primary")
 
 
395
  voice_audio_output = gr.Audio(label="Voice-Over (WAV)", type="filepath")
396
  generate_voice_button.click(
397
  fn=lambda script, tts_model: generate_voice(script, tts_model),
398
- inputs=[script_output, selected_tts_model],
399
- outputs=voice_audio_output,
400
  )
 
401
 
402
- # Step 3: Generate Music
403
  with gr.Tab("🎶 Music Production"):
404
- gr.Markdown("Generate a custom music track using the **MusicGen Large** model.")
405
  audio_length = gr.Slider(
406
  label="Music Length (tokens)",
407
  minimum=128,
@@ -410,17 +416,20 @@ with gr.Blocks(css="""
410
  value=512,
411
  info="Increase tokens for longer audio (inference time may vary)."
412
  )
413
- generate_music_button = gr.Button("Generate Music", variant="primary")
 
 
414
  music_output = gr.Audio(label="Generated Music (WAV)", type="filepath")
415
  generate_music_button.click(
416
  fn=lambda music_suggestion, length: generate_music(music_suggestion, length),
417
  inputs=[music_suggestion_output, audio_length],
418
- outputs=[music_output],
419
  )
 
420
 
421
- # Step 4: Blend Audio
422
  with gr.Tab("🎚️ Audio Blending"):
423
- gr.Markdown("Blend your voice-over and music track. Music will be looped/truncated to match the voice duration. Enable ducking to lower the music during voice segments.")
424
  ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
425
  duck_level_slider = gr.Slider(
426
  label="Ducking Level (dB attenuation)",
@@ -429,13 +438,16 @@ with gr.Blocks(css="""
429
  step=1,
430
  value=10
431
  )
432
- blend_button = gr.Button("Blend Voice + Music", variant="primary")
 
 
433
  blended_output = gr.Audio(label="Final Blended Output (WAV)", type="filepath")
434
  blend_button.click(
435
  fn=blend_audio,
436
  inputs=[voice_audio_output, music_output, ducking_checkbox, duck_level_slider],
437
  outputs=blended_output
438
  )
 
439
 
440
  # Footer
441
  gr.Markdown("""
@@ -447,7 +459,6 @@ with gr.Blocks(css="""
447
  </div>
448
  """)
449
 
450
- # Visitor Badge
451
  gr.HTML("""
452
  <div style="text-align: center; margin-top: 1rem;">
453
  <a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold">
 
50
  """
51
  if model_id in LLAMA_PIPELINES:
52
  return LLAMA_PIPELINES[model_id]
 
53
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
54
  model = AutoModelForCausalLM.from_pretrained(
55
  model_id,
 
62
  LLAMA_PIPELINES[model_id] = text_pipeline
63
  return text_pipeline
64
 
 
65
  def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
66
  """
67
  Returns a cached MusicGen model if available; otherwise, loads it.
 
69
  """
70
  if model_key in MUSICGEN_MODELS:
71
  return MUSICGEN_MODELS[model_key]
 
72
  model = MusicgenForConditionalGeneration.from_pretrained(model_key)
73
  processor = AutoProcessor.from_pretrained(model_key)
74
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
76
  MUSICGEN_MODELS[model_key] = (model, processor)
77
  return model, processor
78
 
 
79
  def get_tts_model(model_name: str = "tts_models/en/ljspeech/tacotron2-DDC"):
80
  """
81
  Returns a cached TTS model if available; otherwise, loads it.
82
  """
83
  if model_name in TTS_MODELS:
84
  return TTS_MODELS[model_name]
 
85
  tts_model = TTS(model_name)
86
  TTS_MODELS[model_name] = tts_model
87
  return tts_model
88
 
 
89
  # ---------------------------------------------------------------------
90
  # Script Generation Function
91
  # ---------------------------------------------------------------------
 
119
  voice_script = "No voice-over script found."
120
  sound_design = "No sound design suggestions found."
121
  music_suggestions = "No music suggestions found."
122
+ # Extract sections if present
123
  if "Voice-Over Script:" in generated_text:
124
  parts = generated_text.split("Voice-Over Script:")
125
  voice_script_part = parts[1]
 
127
  voice_script = voice_script_part.split("Sound Design Suggestions:")[0].strip()
128
  else:
129
  voice_script = voice_script_part.strip()
 
130
  if "Sound Design Suggestions:" in generated_text:
131
  parts = generated_text.split("Sound Design Suggestions:")
132
  sound_design_part = parts[1]
 
134
  sound_design = sound_design_part.split("Music Suggestions:")[0].strip()
135
  else:
136
  sound_design = sound_design_part.strip()
 
137
  if "Music Suggestions:" in generated_text:
138
  parts = generated_text.split("Music Suggestions:")
139
  music_suggestions = parts[1].strip()
 
141
  except Exception as e:
142
  return f"Error generating script: {e}", "", ""
143
 
 
144
  # ---------------------------------------------------------------------
145
  # Ad Promo Idea Generation Function
146
  # ---------------------------------------------------------------------
 
172
  except Exception as e:
173
  return f"Error generating ad promo idea: {e}"
174
 
 
175
  # ---------------------------------------------------------------------
176
  # Voice-Over Generation Function
177
  # ---------------------------------------------------------------------
 
192
  except Exception as e:
193
  return f"Error generating voice: {e}"
194
 
 
195
  # ---------------------------------------------------------------------
196
  # Music Generation Function
197
  # ---------------------------------------------------------------------
 
218
  except Exception as e:
219
  return f"Error generating music: {e}"
220
 
 
221
  # ---------------------------------------------------------------------
222
  # Audio Blending with Duration Sync & Ducking
223
  # ---------------------------------------------------------------------
 
252
  except Exception as e:
253
  return f"Error blending audio: {e}"
254
 
 
255
  # ---------------------------------------------------------------------
256
  # Gradio Interface with Enhanced UI
257
  # ---------------------------------------------------------------------
 
271
  }
272
  .header h1 {
273
  margin: 0;
274
+ font-size: 2.8rem;
275
  }
276
  .header p {
277
  font-size: 1.2rem;
278
  }
279
+ .instructions {
280
+ background-color: #2e2e2e;
281
+ border-radius: 8px;
282
+ padding: 1rem;
283
+ margin-bottom: 1rem;
284
+ font-size: 0.95rem;
285
+ }
286
  .gradio-container {
287
  background: #2e2e2e;
288
  border-radius: 10px;
289
  padding: 1rem;
290
+ margin-bottom: 1rem;
 
 
 
291
  }
292
  .footer {
293
  text-align: center;
 
296
  padding: 1rem;
297
  color: #cccccc;
298
  }
299
+ .btn-clear {
300
+ margin-left: 1rem;
301
+ background: #ff5555;
302
+ color: #fff;
303
+ }
304
  """) as demo:
305
 
306
  # Custom Header
 
311
  """)
312
 
313
  gr.Markdown("""
314
+ Welcome to **AI Ads Promo (Demo MVP)**! This platform leverages state-of-the-art AI models to help you generate creative advertising content.
315
+ Use the tabs below to generate:
316
+ - **Ad Promo Ideas**
317
+ - **Voice-Over Scripts**
318
+ - **Natural-Sounding Voice-Overs**
319
+ - **Custom Music Tracks**
320
+ - **Blended Audio Ads**
321
  """)
322
 
323
  with gr.Tabs():
324
+ # Tab 1: Ad Promo Idea Generation
325
  with gr.Tab("💡 Ad Promo Idea"):
326
+ gr.Markdown("Enter a concept for your ad and let the system generate a creative ad promo idea with taglines and media suggestions.")
327
  with gr.Row():
328
  ad_concept = gr.Textbox(
329
  label="Ad Concept",
330
+ placeholder="E.g., A vibrant summer sale for a trendy clothing brand...",
331
  lines=2
332
  )
333
  with gr.Row():
 
336
  value="meta-llama/Meta-Llama-3-8B-Instruct",
337
  placeholder="Enter a valid Hugging Face model ID"
338
  )
339
+ with gr.Row():
340
+ generate_ad_idea_button = gr.Button("Generate Ad Promo Idea", variant="primary")
341
+ clear_ad_idea = gr.Button("Clear", variant="stop", elem_classes="btn-clear")
342
  ad_idea_output = gr.Textbox(label="Generated Ad Promo Idea", lines=5, interactive=False)
343
  generate_ad_idea_button.click(
344
  fn=lambda concept, model_id: generate_ad_promo_idea(concept, model_id, HF_TOKEN),
345
  inputs=[ad_concept, llama_model_id_idea],
346
  outputs=ad_idea_output
347
  )
348
+ clear_ad_idea.click(fn=lambda: "", inputs=None, outputs=ad_idea_output)
349
 
350
+ # Tab 2: Script Generation
351
  with gr.Tab("📝 Script Generation"):
352
+ gr.Markdown("Generate a voice-over script along with sound design and music suggestions based on your promo idea.")
353
  with gr.Row():
354
  user_prompt = gr.Textbox(
355
  label="Promo Idea",
356
+ placeholder="E.g., A 30-second energetic promo for a new product launch...",
357
  lines=2
358
  )
359
  with gr.Row():
 
369
  step=15,
370
  value=30
371
  )
372
+ with gr.Row():
373
+ generate_script_button = gr.Button("Generate Script", variant="primary")
374
+ clear_script = gr.Button("Clear", variant="stop", elem_classes="btn-clear")
375
+ script_output = gr.Textbox(label="Voice-Over Script", lines=5, interactive=False)
376
  sound_design_output = gr.Textbox(label="Sound Design Suggestions", lines=3, interactive=False)
377
  music_suggestion_output = gr.Textbox(label="Music Suggestions", lines=3, interactive=False)
378
  generate_script_button.click(
379
  fn=lambda user_prompt, model_id, dur: generate_script(user_prompt, model_id, HF_TOKEN, dur),
380
  inputs=[user_prompt, llama_model_id, duration],
381
+ outputs=[script_output, sound_design_output, music_suggestion_output]
382
  )
383
+ clear_script.click(fn=lambda: ["", "", ""], inputs=None, outputs=[script_output, sound_design_output, music_suggestion_output])
384
 
385
+ # Tab 3: Voice Synthesis
386
  with gr.Tab("🎤 Voice Synthesis"):
387
+ gr.Markdown("Convert your generated script into a natural-sounding voice-over using Coqui TTS.")
388
  selected_tts_model = gr.Dropdown(
389
  label="TTS Model",
390
  choices=[
 
395
  value="tts_models/en/ljspeech/tacotron2-DDC",
396
  multiselect=False
397
  )
398
+ with gr.Row():
399
+ generate_voice_button = gr.Button("Generate Voice-Over", variant="primary")
400
+ clear_voice = gr.Button("Clear", variant="stop", elem_classes="btn-clear")
401
  voice_audio_output = gr.Audio(label="Voice-Over (WAV)", type="filepath")
402
  generate_voice_button.click(
403
  fn=lambda script, tts_model: generate_voice(script, tts_model),
404
+ inputs=script_output, outputs=voice_audio_output
 
405
  )
406
+ clear_voice.click(fn=lambda: "", inputs=None, outputs=voice_audio_output)
407
 
408
+ # Tab 4: Music Production
409
  with gr.Tab("🎶 Music Production"):
410
+ gr.Markdown("Generate a custom music track based on the suggestions using the MusicGen model.")
411
  audio_length = gr.Slider(
412
  label="Music Length (tokens)",
413
  minimum=128,
 
416
  value=512,
417
  info="Increase tokens for longer audio (inference time may vary)."
418
  )
419
+ with gr.Row():
420
+ generate_music_button = gr.Button("Generate Music", variant="primary")
421
+ clear_music = gr.Button("Clear", variant="stop", elem_classes="btn-clear")
422
  music_output = gr.Audio(label="Generated Music (WAV)", type="filepath")
423
  generate_music_button.click(
424
  fn=lambda music_suggestion, length: generate_music(music_suggestion, length),
425
  inputs=[music_suggestion_output, audio_length],
426
+ outputs=[music_output]
427
  )
428
+ clear_music.click(fn=lambda: "", inputs=None, outputs=music_output)
429
 
430
+ # Tab 5: Audio Blending
431
  with gr.Tab("🎚️ Audio Blending"):
432
+ gr.Markdown("Blend your voice-over and music track. Music will be adjusted to match the voice duration with an option to enable ducking.")
433
  ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
434
  duck_level_slider = gr.Slider(
435
  label="Ducking Level (dB attenuation)",
 
438
  step=1,
439
  value=10
440
  )
441
+ with gr.Row():
442
+ blend_button = gr.Button("Blend Voice + Music", variant="primary")
443
+ clear_blend = gr.Button("Clear", variant="stop", elem_classes="btn-clear")
444
  blended_output = gr.Audio(label="Final Blended Output (WAV)", type="filepath")
445
  blend_button.click(
446
  fn=blend_audio,
447
  inputs=[voice_audio_output, music_output, ducking_checkbox, duck_level_slider],
448
  outputs=blended_output
449
  )
450
+ clear_blend.click(fn=lambda: "", inputs=None, outputs=blended_output)
451
 
452
  # Footer
453
  gr.Markdown("""
 
459
  </div>
460
  """)
461
 
 
462
  gr.HTML("""
463
  <div style="text-align: center; margin-top: 1rem;">
464
  <a href="https://visitorbadge.io/status?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2FBils%2Fradiogold">