Spaces:

Hev832
/

gtts

Sleeping

File size: 2,497 Bytes

310a3ae
 
 
 
 
7ab9f1b
310a3ae
 
be4dbd3
 
 
 
 
310a3ae
 
 
 
 
7ab9f1b
 
310a3ae
 
 
 
7ab9f1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310a3ae
 
 
 
be4dbd3
 
 
 
 
310a3ae
be4dbd3
310a3ae
 
be4dbd3
310a3ae
 
 
 
be4dbd3
310a3ae
 
 
 
 
be4dbd3
310a3ae
 
 
 
be4dbd3
310a3ae

import gradio as gr
from gtts import gTTS
import io
import os
import tempfile
import subprocess

def text_to_speech(text, language, pitch):
    if not text:
        raise ValueError("Please enter some text to convert to speech.")
    
    language = language or "en"  # Default to English if no language is selected
    
    tts = gTTS(text=text, lang=language, slow=False)
    
    # Save to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
        tts.save(fp.name)
        input_file = fp.name
    
    # Apply pitch shift using ffmpeg
    output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
    pitch_shift = pitch - 1  # Adjust pitch shift value
    
    ffmpeg_command = [
        "ffmpeg",
        "-y",  # Overwrite output file without asking
        "-i", input_file,
        "-af", f"asetrate=44100*{2**pitch_shift},aresample=44100",
        output_file
    ]
    
    try:
        subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        print(f"FFmpeg error: {e.stderr}")
        raise RuntimeError(f"FFmpeg failed: {e.stderr}")
    finally:
        # Clean up the original file
        os.unlink(input_file)
    
    return output_file

def gradio_tts_interface(text, language, pitch):
    try:
        audio_file = text_to_speech(text, language, pitch)
        return audio_file, None  # Return audio file and no error message
    except Exception as e:
        return None, str(e)  # Return no audio file and the error message

iface = gr.Blocks()

with iface:
    gr.Markdown("# Text-to-Speech Demo with Pitch Control")
    
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(label="Enter text to convert to speech", lines=3)
            language_input = gr.Dropdown(["en", "fr", "es", "de", "it"], label="Select Language", value="en")
            pitch_input = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Pitch (0.5 for lower/male, 1.0 for normal, 2.0 for higher/female)")
            submit_button = gr.Button("Convert to Speech")
        
        with gr.Column():
            audio_output = gr.Audio(label="Generated Speech")
            error_output = gr.Textbox(label="Error (if any)", visible=True)
    
    submit_button.click(
        fn=gradio_tts_interface,
        inputs=[text_input, language_input, pitch_input],
        outputs=[audio_output, error_output]
    )

iface.launch()