Spaces:

shukdevdatta123
/

VocalForge-AI

Running

App Files Files Community

shukdevdatta123 commited on 16 days ago

Commit

4760b00

verified ·

1 Parent(s): af961e5

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -39

app.py CHANGED Viewed

@@ -1,57 +1,104 @@
-import gradio as gr
 from TTS.api import TTS
 import numpy as np
-from scipy.io import wavfile
 import tempfile
 import os
-# Load the YourTTS model once at startup
-tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False)
-sample_rate = tts.synthesizer.output_sample_rate
-def generate_speech(reference_audio, text):
-    """
-    Generate speech audio mimicking the voice from the reference audio.
-    Parameters:
-    reference_audio (str): Filepath to the uploaded voice sample.
-    text (str): Text to convert to speech.
-    Returns:
-    str: Path to the generated audio file
-    """
-    # Generate speech using the reference audio and text
-    wav = tts.tts(text=text, speaker_wav=reference_audio, language="en")
-    # Convert list to numpy array
-    wav_np = np.array(wav, dtype=np.float32)
-    # Create a temporary file to save the audio
-    temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-    temp_file_path = temp_file.name
-    # Save the audio to the temporary file
-    wavfile.write(temp_file_path, sample_rate, wav_np)
-    temp_file.close()
-    return temp_file_path
-# Build the Gradio interface
-with gr.Blocks(title="Voice Cloning TTS") as app:
-    gr.Markdown("## Voice Cloning Text-to-Speech")
-    gr.Markdown("Upload a short voice sample in English, then enter text to hear it in your voice!")
     with gr.Row():
-        audio_input = gr.Audio(type="filepath", label="Upload Your Voice Sample (English)")
-        text_input = gr.Textbox(label="Enter Text to Convert to Speech", placeholder="e.g., I love chocolate")
-    generate_btn = gr.Button("Generate Speech")
-    audio_output = gr.Audio(label="Generated Speech", interactive=False)
-    # Connect the button to the generation function
-    generate_btn.click(
         fn=generate_speech,
         inputs=[audio_input, text_input],
         outputs=audio_output
     )
-# Launch the application
-app.launch()

+# !pip install TTS gradio numpy librosa torch
 from TTS.api import TTS
+import gradio as gr
 import numpy as np
+import librosa
+import torch
 import tempfile
 import os
+# Check device availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Initialize TTS model
+model_name = "tts_models/multilingual/multi-dataset/your_tts"
+tts = TTS(model_name=model_name).to(device)
+def process_audio(audio_path, max_duration=10):
+    """Load and trim audio to specified duration"""
+    y, sr = librosa.load(audio_path, sr=16000, mono=True)
+    max_samples = max_duration * sr
+    if len(y) > max_samples:
+        y = y[:int(max_samples)]
+    return y, sr
+def generate_speech(audio_file, text):
+    # Create temp files
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as ref_file, \
+         tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_file:
+        ref_path = ref_file.name
+        out_path = out_file.name
+    # Process reference audio
+    y, sr = process_audio(audio_file)
+    librosa.output.write_wav(ref_path, y, sr)
+    # Generate speech
+    try:
+        tts.tts_to_file(
+            text=text,
+            speaker_wav=ref_path,
+            language="en",
+            file_path=out_path
+        )
+        # Clean up temporary files
+        os.unlink(ref_path)
+        return out_path
+    except Exception as e:
+        print(f"Error: {e}")
+        return None
+# Gradio interface
+with gr.Blocks(title="Voice Clone TTS") as demo:
+    gr.Markdown("""
+    # 🎤 Voice Clone Text-to-Speech
+    1. Upload a short English voice sample (5-10 seconds)
+    2. Enter text you want to speak
+    3. Generate audio in your voice!
+    """)
     with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                sources=["upload", "microphone"],
+                type="filepath",
+                label="Upload Voice Sample",
+                interactive=True
+            )
+            text_input = gr.Textbox(
+                label="Text to Speak",
+                placeholder="Enter English text here...",
+                lines=4
+            )
+            btn = gr.Button("Generate Speech", variant="primary")
+        with gr.Column():
+            audio_output = gr.Audio(
+                label="Generated Speech",
+                interactive=False
+            )
+            error_output = gr.Textbox(label="Processing Info", visible=False)
+    # Example inputs
+    gr.Examples(
+        examples=[
+            ["examples/sample_voice.wav", "Hello! Welcome to the future of voice cloning technology"],
+            ["examples/sample_voice2.wav", "This text is spoken in a completely cloned voice"]
+        ],
+        inputs=[audio_input, text_input],
+        outputs=audio_output,
+        fn=generate_speech,
+        cache_examples=True
+    )
+    btn.click(
         fn=generate_speech,
         inputs=[audio_input, text_input],
         outputs=audio_output
     )
+if __name__ == "__main__":
+    demo.launch(server_port=7860, share=True)