Spaces:

ramimu
/

voice_cloning

Running

App Files Files Community

ramimu commited on May 31

Commit

3c59845

verified ·

1 Parent(s): 73ab9c0

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -59

app.py CHANGED Viewed

@@ -321,73 +321,100 @@ def clone_voice_api(text_to_speak, reference_audio_url, exaggeration=0.6, cfg_pa
 def main():
     print("Starting Advanced Gradio interface...")
-    iface = gr.Interface(
-        fn=clone_voice, # The UI and default Gradio API will use clone_voice directly
-        inputs=[
-            gr.Textbox(
-                label="Text to Speak",
-                placeholder="Enter the text you want the cloned voice to say...",
-                lines=3
-            ),
-            gr.Audio(
-                type="filepath", # Gradio handles file upload/mic and provides a filepath
-                label="Reference Audio (Upload a short .wav or .mp3 clip)",
-                sources=["upload", "microphone"]
-            ),
-            gr.Slider(
-                minimum=0.25,
-                maximum=1.0,
-                value=0.6,
-                step=0.05,
-                label="Exaggeration",
-                info="Controls voice characteristic emphasis (0.5 = neutral, higher = more exaggerated)"
-            ),
-            gr.Slider(
-                minimum=0.2,
-                maximum=1.0,
-                value=0.3,
-                step=0.05,
-                label="CFG/Pace",
-                info="Classifier-free guidance weight (affects generation quality and pace)"
-            ),
-            gr.Number(
-                value=0,
-                label="Random Seed",
-                info="Set to 0 for random results, or use a specific number for reproducible outputs",
-                precision=0
-            ),
-            gr.Slider(
-                minimum=0.05,
-                maximum=2.0,
-                value=0.6,
-                step=0.05,
-                label="Temperature",
-                info="Controls randomness in generation (lower = more consistent, higher = more varied)"
             )
-        ],
-        outputs=[
-            gr.Audio(label="Generated Audio", type="numpy"),
-            gr.Textbox(label="Status", lines=2)
-        ],
-        title="🎙️ Advanced Chatterbox Voice Cloning",
-        description="Clone any voice using advanced AI technology with fine-tuned controls.",
-        examples=[
-            ["Hello, this is a test of the voice cloning system.", None, 0.5, 0.5, 0, 0.8],
-            ["The quick brown fox jumps over the lazy dog.", None, 0.7, 0.3, 42, 0.6],
-            ["Welcome to our AI voice cloning service. We hope you enjoy the experience!", None, 0.4, 0.7, 123, 1.0]
-        ],
-        api_name="clone_voice"  # Add this line!
-    )
     iface.launch(
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,
         quiet=False,
         favicon_path=None,
-        share=False, # Set to True if you want a public link from your local machine
         auth=None
-        # app_kwargs for FastAPI specific settings are not directly used by gr.Interface.launch
-        # but if you were embedding in FastAPI, you'd pass them to FastAPI app.
     )
 if __name__ == "__main__":

 def main():
     print("Starting Advanced Gradio interface...")
+    with gr.Blocks(title="Advanced Chatterbox Voice Cloning", theme=gr.themes.Soft()) as iface:
+        gr.Markdown("# 🎙️ Advanced Chatterbox Voice Cloning")
+        gr.Markdown("Clone any voice using advanced AI technology with fine-tuned controls.")
+        with gr.Row():
+            with gr.Column(scale=2):
+                text_input = gr.Textbox(
+                    label="Text to Speak",
+                    placeholder="Enter the text you want the cloned voice to say...",
+                    lines=3
+                )
+                audio_input = gr.Audio(
+                    type="filepath",
+                    label="Reference Audio (Upload a short .wav or .mp3 clip)",
+                    sources=["upload", "microphone"]
+                )
+                with gr.Accordion("🔧 Advanced Settings", open=False):
+                    with gr.Row():
+                        exaggeration = gr.Slider(
+                            minimum=0.25,
+                            maximum=1.0,
+                            value=0.6,
+                            step=0.05,
+                            label="Exaggeration",
+                            info="Controls voice characteristic emphasis"
+                        )
+                        cfg_pace = gr.Slider(
+                            minimum=0.2,
+                            maximum=1.0,
+                            value=0.3,
+                            step=0.05,
+                            label="CFG/Pace",
+                            info="Classifier-free guidance weight"
+                        )
+                    with gr.Row():
+                        random_seed = gr.Number(
+                            value=0,
+                            label="Random Seed",
+                            info="Set to 0 for random results",
+                            precision=0
+                        )
+                        temperature = gr.Slider(
+                            minimum=0.05,
+                            maximum=2.0,
+                            value=0.6,
+                            step=0.05,
+                            label="Temperature",
+                            info="Controls randomness in generation"
+                        )
+                generate_btn = gr.Button("🎵 Generate Voice Clone", variant="primary", size="lg")
+            with gr.Column(scale=1):
+                audio_output = gr.Audio(
+                    label="Generated Audio",
+                    type="numpy",
+                    interactive=False
+                )
+                status_output = gr.Textbox(
+                    label="Status",
+                    interactive=False,
+                    lines=2
+                )
+        # This is the key part - create the API endpoint properly
+        generate_btn.click(
+            fn=clone_voice_api,  # Use the API-ready function
+            inputs=[text_input, audio_input, exaggeration, cfg_pace, random_seed, temperature],
+            outputs=[audio_output, status_output],
+            api_name="predict"  # This creates /api/predict endpoint
+        )
+        with gr.Accordion("📝 Examples", open=False):
+            gr.Examples(
+                examples=[
+                    ["Hello, this is a test of the voice cloning system.", None, 0.5, 0.5, 0, 0.8],
+                    ["The quick brown fox jumps over the lazy dog.", None, 0.7, 0.3, 42, 0.6],
+                    ["Welcome to our AI voice cloning service. We hope you enjoy the experience!", None, 0.4, 0.7, 123, 1.0]
+                ],
+                inputs=[text_input, audio_input, exaggeration, cfg_pace, random_seed, temperature],
+                outputs=[audio_output, status_output],
+                fn=clone_voice_api,
+                cache_examples=False
             )
+    # Launch the interface
     iface.launch(
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,
         quiet=False,
         favicon_path=None,
+        share=False,
         auth=None
     )
 if __name__ == "__main__":