|
import gradio as gr |
|
from gtts import gTTS |
|
import io |
|
import os |
|
import tempfile |
|
import subprocess |
|
|
|
def text_to_speech(text, language, pitch): |
|
if not text: |
|
raise ValueError("Please enter some text to convert to speech.") |
|
|
|
language = language or "en" |
|
|
|
tts = gTTS(text=text, lang=language, slow=False) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp: |
|
tts.save(fp.name) |
|
input_file = fp.name |
|
|
|
|
|
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name |
|
pitch_shift = pitch - 1 |
|
|
|
ffmpeg_command = [ |
|
"ffmpeg", |
|
"-y", |
|
"-i", input_file, |
|
"-af", f"asetrate=44100*{2**pitch_shift},aresample=44100", |
|
output_file |
|
] |
|
|
|
try: |
|
subprocess.run(ffmpeg_command, check=True, capture_output=True, text=True) |
|
except subprocess.CalledProcessError as e: |
|
print(f"FFmpeg error: {e.stderr}") |
|
raise RuntimeError(f"FFmpeg failed: {e.stderr}") |
|
finally: |
|
|
|
os.unlink(input_file) |
|
|
|
return output_file |
|
|
|
def gradio_tts_interface(text, language, pitch): |
|
try: |
|
audio_file = text_to_speech(text, language, pitch) |
|
return audio_file, None |
|
except Exception as e: |
|
return None, str(e) |
|
|
|
iface = gr.Blocks() |
|
|
|
with iface: |
|
gr.Markdown("# Text-to-Speech Demo with Pitch Control") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
text_input = gr.Textbox(label="Enter text to convert to speech", lines=3) |
|
language_input = gr.Dropdown(["en", "fr", "es", "de", "it"], label="Select Language", value="en") |
|
pitch_input = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Pitch (0.5 for lower/male, 1.0 for normal, 2.0 for higher/female)") |
|
submit_button = gr.Button("Convert to Speech") |
|
|
|
with gr.Column(): |
|
audio_output = gr.Audio(label="Generated Speech") |
|
error_output = gr.Textbox(label="Error (if any)", visible=True) |
|
|
|
submit_button.click( |
|
fn=gradio_tts_interface, |
|
inputs=[text_input, language_input, pitch_input], |
|
outputs=[audio_output, error_output] |
|
) |
|
|
|
iface.launch() |