ramimu commited on
Commit
3c59845
Β·
verified Β·
1 Parent(s): 73ab9c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -59
app.py CHANGED
@@ -321,73 +321,100 @@ def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pa
321
 
322
  def main():
323
  print("Starting Advanced Gradio interface...")
324
- iface = gr.Interface(
325
- fn=clone_voice, # The UI and default Gradio API will use clone_voice directly
326
- inputs=[
327
- gr.Textbox(
328
- label="Text to Speak",
329
- placeholder="Enter the text you want the cloned voice to say...",
330
- lines=3
331
- ),
332
- gr.Audio(
333
- type="filepath", # Gradio handles file upload/mic and provides a filepath
334
- label="Reference Audio (Upload a short .wav or .mp3 clip)",
335
- sources=["upload", "microphone"]
336
- ),
337
- gr.Slider(
338
- minimum=0.25,
339
- maximum=1.0,
340
- value=0.6,
341
- step=0.05,
342
- label="Exaggeration",
343
- info="Controls voice characteristic emphasis (0.5 = neutral, higher = more exaggerated)"
344
- ),
345
- gr.Slider(
346
- minimum=0.2,
347
- maximum=1.0,
348
- value=0.3,
349
- step=0.05,
350
- label="CFG/Pace",
351
- info="Classifier-free guidance weight (affects generation quality and pace)"
352
- ),
353
- gr.Number(
354
- value=0,
355
- label="Random Seed",
356
- info="Set to 0 for random results, or use a specific number for reproducible outputs",
357
- precision=0
358
- ),
359
- gr.Slider(
360
- minimum=0.05,
361
- maximum=2.0,
362
- value=0.6,
363
- step=0.05,
364
- label="Temperature",
365
- info="Controls randomness in generation (lower = more consistent, higher = more varied)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  )
367
- ],
368
- outputs=[
369
- gr.Audio(label="Generated Audio", type="numpy"),
370
- gr.Textbox(label="Status", lines=2)
371
- ],
372
- title="πŸŽ™οΈ Advanced Chatterbox Voice Cloning",
373
- description="Clone any voice using advanced AI technology with fine-tuned controls.",
374
- examples=[
375
- ["Hello, this is a test of the voice cloning system.", None, 0.5, 0.5, 0, 0.8],
376
- ["The quick brown fox jumps over the lazy dog.", None, 0.7, 0.3, 42, 0.6],
377
- ["Welcome to our AI voice cloning service. We hope you enjoy the experience!", None, 0.4, 0.7, 123, 1.0]
378
- ],
379
- api_name="clone_voice" # Add this line!
380
- )
381
  iface.launch(
382
  server_name="0.0.0.0",
383
  server_port=7860,
384
  show_error=True,
385
  quiet=False,
386
  favicon_path=None,
387
- share=False, # Set to True if you want a public link from your local machine
388
  auth=None
389
- # app_kwargs for FastAPI specific settings are not directly used by gr.Interface.launch
390
- # but if you were embedding in FastAPI, you'd pass them to FastAPI app.
391
  )
392
 
393
  if __name__ == "__main__":
 
321
 
322
  def main():
323
  print("Starting Advanced Gradio interface...")
324
+
325
+ with gr.Blocks(title="Advanced Chatterbox Voice Cloning", theme=gr.themes.Soft()) as iface:
326
+ gr.Markdown("# πŸŽ™οΈ Advanced Chatterbox Voice Cloning")
327
+ gr.Markdown("Clone any voice using advanced AI technology with fine-tuned controls.")
328
+
329
+ with gr.Row():
330
+ with gr.Column(scale=2):
331
+ text_input = gr.Textbox(
332
+ label="Text to Speak",
333
+ placeholder="Enter the text you want the cloned voice to say...",
334
+ lines=3
335
+ )
336
+ audio_input = gr.Audio(
337
+ type="filepath",
338
+ label="Reference Audio (Upload a short .wav or .mp3 clip)",
339
+ sources=["upload", "microphone"]
340
+ )
341
+ with gr.Accordion("πŸ”§ Advanced Settings", open=False):
342
+ with gr.Row():
343
+ exaggeration = gr.Slider(
344
+ minimum=0.25,
345
+ maximum=1.0,
346
+ value=0.6,
347
+ step=0.05,
348
+ label="Exaggeration",
349
+ info="Controls voice characteristic emphasis"
350
+ )
351
+ cfg_pace = gr.Slider(
352
+ minimum=0.2,
353
+ maximum=1.0,
354
+ value=0.3,
355
+ step=0.05,
356
+ label="CFG/Pace",
357
+ info="Classifier-free guidance weight"
358
+ )
359
+ with gr.Row():
360
+ random_seed = gr.Number(
361
+ value=0,
362
+ label="Random Seed",
363
+ info="Set to 0 for random results",
364
+ precision=0
365
+ )
366
+ temperature = gr.Slider(
367
+ minimum=0.05,
368
+ maximum=2.0,
369
+ value=0.6,
370
+ step=0.05,
371
+ label="Temperature",
372
+ info="Controls randomness in generation"
373
+ )
374
+ generate_btn = gr.Button("🎡 Generate Voice Clone", variant="primary", size="lg")
375
+
376
+ with gr.Column(scale=1):
377
+ audio_output = gr.Audio(
378
+ label="Generated Audio",
379
+ type="numpy",
380
+ interactive=False
381
+ )
382
+ status_output = gr.Textbox(
383
+ label="Status",
384
+ interactive=False,
385
+ lines=2
386
+ )
387
+
388
+ # This is the key part - create the API endpoint properly
389
+ generate_btn.click(
390
+ fn=clone_voice_api, # Use the API-ready function
391
+ inputs=[text_input, audio_input, exaggeration, cfg_pace, random_seed, temperature],
392
+ outputs=[audio_output, status_output],
393
+ api_name="predict" # This creates /api/predict endpoint
394
+ )
395
+
396
+ with gr.Accordion("πŸ“ Examples", open=False):
397
+ gr.Examples(
398
+ examples=[
399
+ ["Hello, this is a test of the voice cloning system.", None, 0.5, 0.5, 0, 0.8],
400
+ ["The quick brown fox jumps over the lazy dog.", None, 0.7, 0.3, 42, 0.6],
401
+ ["Welcome to our AI voice cloning service. We hope you enjoy the experience!", None, 0.4, 0.7, 123, 1.0]
402
+ ],
403
+ inputs=[text_input, audio_input, exaggeration, cfg_pace, random_seed, temperature],
404
+ outputs=[audio_output, status_output],
405
+ fn=clone_voice_api,
406
+ cache_examples=False
407
  )
408
+
409
+ # Launch the interface
 
 
 
 
 
 
 
 
 
 
 
 
410
  iface.launch(
411
  server_name="0.0.0.0",
412
  server_port=7860,
413
  show_error=True,
414
  quiet=False,
415
  favicon_path=None,
416
+ share=False,
417
  auth=None
 
 
418
  )
419
 
420
  if __name__ == "__main__":