Spaces:

shukdevdatta123
/

VocalForge-AI

Running

App Files Files Community

shukdevdatta123 commited on about 1 month ago

Commit

4999708

verified ·

1 Parent(s): f75668a

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -89

app.py CHANGED Viewed

@@ -1,105 +1,52 @@
-# !pip install TTS gradio numpy librosa torch soundfile
-from TTS.api import TTS
 import gradio as gr
-import numpy as np
-import librosa
-import torch
 import tempfile
-import os
-import soundfile as sf  # Added for better audio handling
-# Check device availability
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Initialize TTS model with device parameter
-model_name = "tts_models/multilingual/multi-dataset/your_tts"
-tts = TTS(model_name=model_name).to(device)  # This line is the problem
-def process_audio(audio_path, max_duration=10):
-    """Load and trim audio to specified duration"""
-    y, sr = librosa.load(audio_path, sr=16000, mono=True)
-    max_samples = max_duration * sr
-    if len(y) > max_samples:
-        y = y[:int(max_samples)]
-    return y, sr
-def generate_speech(audio_file, text):
-    # Create temp files
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as ref_file, \
-         tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_file:
-        ref_path = ref_file.name
-        out_path = out_file.name
-    # Process reference audio
-    y, sr = process_audio(audio_file)
-    sf.write(ref_path, y, sr)  # Using soundfile instead of librosa for writing
-    # Generate speech
-    try:
-        tts.tts_to_file(
-            text=text,
-            speaker_wav=ref_path,
-            language="en",
-            file_path=out_path
-        )
-        # Clean up temporary files
-        os.unlink(ref_path)
-        return out_path
-    except Exception as e:
-        print(f"Error: {e}")
-        return None
-# Gradio interface
-with gr.Blocks(title="Voice Clone TTS") as demo:
-    gr.Markdown("""
-    # 🎤 Voice Clone Text-to-Speech
-    1. Upload a short English voice sample (5-10 seconds)
-    2. Enter text you want to speak
-    3. Generate audio in your voice!
-    """)
     with gr.Row():
-        with gr.Column():
-            audio_input = gr.Audio(
-                sources=["upload", "microphone"],
-                type="filepath",
-                label="Upload Voice Sample",
-                interactive=True
-            )
-            text_input = gr.Textbox(
-                label="Text to Speak",
-                placeholder="Enter English text here...",
-                lines=4
-            )
-            btn = gr.Button("Generate Speech", variant="primary")
-        with gr.Column():
-            audio_output = gr.Audio(
-                label="Generated Speech",
-                interactive=False
-            )
-            error_output = gr.Textbox(label="Processing Info", visible=False)
-    # Example inputs
-    gr.Examples(
-        examples=[
-            ["examples/sample_voice.wav", "Hello! Welcome to the future of voice cloning technology"],
-            ["examples/sample_voice2.wav", "This text is spoken in a completely cloned voice"]
-        ],
-        inputs=[audio_input, text_input],
-        outputs=audio_output,
-        fn=generate_speech,
-        cache_examples=False  # Disabled cache to avoid potential issues
-    )
-    btn.click(
         fn=generate_speech,
         inputs=[audio_input, text_input],
         outputs=audio_output
     )
-if __name__ == "__main__":
-    demo.launch(server_port=7860, share=True)

 import gradio as gr
+from bark import SAMPLE_RATE, generate_audio, preload_models
+from scipy.io.wavfile import write as write_wav
 import tempfile
+# Preload the models at startup
+preload_models()
+def generate_speech(reference_audio, text):
+    """
+    Generate speech audio mimicking the voice from the reference audio using Bark.
+    Parameters:
+    reference_audio (str): Filepath to the uploaded voice sample.
+    text (str): Text to convert to speech.
+    Returns:
+    str: Path to the generated audio file
+    """
+    # Generate speech using the reference audio and text
+    audio_array = generate_audio(text, history_prompt=reference_audio)
+    # Create a temporary file to save the audio
+    temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    temp_file_path = temp_file.name
+    # Save the audio to the temporary file
+    write_wav(temp_file_path, SAMPLE_RATE, audio_array)
+    temp_file.close()
+    return temp_file_path
+# Build the Gradio interface
+with gr.Blocks(title="Voice Cloning TTS with Bark") as app:
+    gr.Markdown("## Voice Cloning Text-to-Speech with Bark")
+    gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
     with gr.Row():
+        audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
+        text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
+    generate_btn = gr.Button("Generate Speech")
+    audio_output = gr.Audio(label="Generated Speech", interactive=False)
+    # Connect the button to the generation function
+    generate_btn.click(
         fn=generate_speech,
         inputs=[audio_input, text_input],
         outputs=audio_output
     )
+# Launch the application
+app.launch()