File size: 2,497 Bytes
310a3ae
 
 
 
 
7ab9f1b
310a3ae
 
be4dbd3
 
 
 
 
310a3ae
 
 
 
 
7ab9f1b
 
310a3ae
 
 
 
7ab9f1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310a3ae
 
 
 
be4dbd3
 
 
 
 
310a3ae
be4dbd3
310a3ae
 
be4dbd3
310a3ae
 
 
 
be4dbd3
310a3ae
 
 
 
 
be4dbd3
310a3ae
 
 
 
be4dbd3
310a3ae
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
from gtts import gTTS
import io
import os
import tempfile
import subprocess

def text_to_speech(text, language, pitch):
    if not text:
        raise ValueError("Please enter some text to convert to speech.")
    
    language = language or "en"  # Default to English if no language is selected
    
    tts = gTTS(text=text, lang=language, slow=False)
    
    # Save to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
        tts.save(fp.name)
        input_file = fp.name
    
    # Apply pitch shift using ffmpeg
    output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
    pitch_shift = pitch - 1  # Adjust pitch shift value
    
    ffmpeg_command = [
        "ffmpeg",
        "-y",  # Overwrite output file without asking
        "-i", input_file,
        "-af", f"asetrate=44100*{2**pitch_shift},aresample=44100",
        output_file
    ]
    
    try:
        subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        print(f"FFmpeg error: {e.stderr}")
        raise RuntimeError(f"FFmpeg failed: {e.stderr}")
    finally:
        # Clean up the original file
        os.unlink(input_file)
    
    return output_file

def gradio_tts_interface(text, language, pitch):
    try:
        audio_file = text_to_speech(text, language, pitch)
        return audio_file, None  # Return audio file and no error message
    except Exception as e:
        return None, str(e)  # Return no audio file and the error message

iface = gr.Blocks()

with iface:
    gr.Markdown("# Text-to-Speech Demo with Pitch Control")
    
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(label="Enter text to convert to speech", lines=3)
            language_input = gr.Dropdown(["en", "fr", "es", "de", "it"], label="Select Language", value="en")
            pitch_input = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Pitch (0.5 for lower/male, 1.0 for normal, 2.0 for higher/female)")
            submit_button = gr.Button("Convert to Speech")
        
        with gr.Column():
            audio_output = gr.Audio(label="Generated Speech")
            error_output = gr.Textbox(label="Error (if any)", visible=True)
    
    submit_button.click(
        fn=gradio_tts_interface,
        inputs=[text_input, language_input, pitch_input],
        outputs=[audio_output, error_output]
    )

iface.launch()