File size: 2,491 Bytes
f5436ef
 
 
6bf1c66
f5436ef
6bf1c66
 
 
 
f5436ef
6bf1c66
 
 
 
 
 
 
 
 
 
 
f5436ef
6bf1c66
 
f5436ef
 
 
 
 
 
6bf1c66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5436ef
 
 
 
 
 
 
6bf1c66
f5436ef
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
import pyttsx3
import os
import warnings

# Suppress pygame welcome message
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    import pygame

def get_voices():
    """Initialize engine and get available voices with error handling"""
    try:
        engine = pyttsx3.init()
        voices = engine.getProperty('voices')
        return {f"{voice.name} ({voice.languages[0] if voice.languages else 'Unknown'})": voice.id 
               for voice in voices}
    except Exception as e:
        print(f"Error initializing TTS engine: {e}")
        # Return a default voice if engine fails
        return {"Default Voice": None}

# Get available voices
voice_map = get_voices()

def text_to_speech(text, voice_name, rate=200):
    """
    Convert text to speech with selected voice and rate.
    Saves output as an audio file and returns the file path.
    """
    try:
        # Initialize engine for each call
        engine = pyttsx3.init()
        
        # Set voice if available
        voice_id = voice_map.get(voice_name)
        if voice_id:
            engine.setProperty('voice', voice_id)
        
        # Set speech rate
        engine.setProperty('rate', rate)
        
        # Save audio to a file
        output_file = "output.wav"
        engine.save_to_file(text, output_file)
        engine.runAndWait()
        
        # Ensure file exists before returning
        if os.path.exists(output_file):
            return output_file
        return None
    except Exception as e:
        print(f"Error in text_to_speech: {e}")
        return None

# Gradio interface
with gr.Blocks(title="Text-to-Speech with Different Voices") as demo:
    gr.Markdown("# Text-to-Speech Converter")
    gr.Markdown("Enter text and select a voice to convert it to speech with different voices and accents.")
    
    text_input = gr.Textbox(label="Enter Text", placeholder="Type your text here...")
    voice_dropdown = gr.Dropdown(choices=list(voice_map.keys()), label="Select Voice/Accent", value=list(voice_map.keys())[0] if voice_map else None)
    rate_slider = gr.Slider(minimum=100, maximum=300, value=200, step=10, label="Speech Rate")
    
    convert_button = gr.Button("Convert to Speech")
    audio_output = gr.Audio(label="Generated Speech")
    
    convert_button.click(
        fn=text_to_speech,
        inputs=[text_input, voice_dropdown, rate_slider],
        outputs=audio_output
    )

if __name__ == "__main__":
    demo.launch()