File size: 2,856 Bytes
3b1f0f3
 
 
6ead22c
3b1f0f3
 
5bd7020
 
 
 
 
 
 
 
a7c380d
 
3b1f0f3
8a1ba2b
a368b88
8a1ba2b
770a448
8a1ba2b
8aa19f9
3b1f0f3
 
 
 
8aa19f9
65a5db5
 
 
3b1f0f3
1b1cf5a
86251d5
f524459
380e323
a922d9a
91e78d9
e98beac
 
 
dc05a19
e98beac
 
 
90e48a9
e98beac
 
 
 
 
 
 
69f4fe8
 
 
8f7f6f9
69f4fe8
 
 
3b1f0f3
f524459
 
6ead22c
 
f524459
2fd9e09
f524459
6c82621
f524459
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1f0f3
2fd9e09
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import random 
import gradio as gr 
import numpy as np 
import time
from elevenlabs import voices, generate, set_api_key, UnauthenticatedRateLimitError

def pad_buffer(audio):
    # Pad buffer to multiple of 2 bytes
    buffer_size = len(audio)
    element_size = np.dtype(np.int16).itemsize
    if buffer_size % element_size != 0:
        audio = audio + b'\0' * (element_size - (buffer_size % element_size))
    return audio 

def generate_voice(text, voice_name, api_key):
    set_api_key(api_key) #set API key
    try:
        audio = generate(
            text[:4000], # Limit to 4000 characters
            voice=voice_name, 
            model="eleven_multilingual_v2"
        )
        return (44100, np.frombuffer(pad_buffer(audio), dtype=np.int16))
    except UnauthenticatedRateLimitError as e:
        raise gr.Error("Thanks for trying out ElevenLabs TTS! You've reached the free tier limit. Please provide an API key to continue.") 
    except Exception as e:
        raise gr.Error(e)
    
    # description = """
    # Eleven Multilingual V2 is the world's best Text-to-Speech model. Features 38 voices and supports 28 languages. Sign up on [ElevenLabs](https://elevenlabs.io/?from=partnerpierce7156) to get an API Key.
    # """


with gr.Blocks(theme="syddharth/gray-minimal") as block:
    #gr.Markdown('[ ![ElevenLabs](https://user-images.githubusercontent.com/12028621/262629275-4f85c9cf-85b6-435e-ab50-5b8c7c4e9dd2.png) ](https://elevenlabs.io)')
    #gr.Markdown("# <center> ElevenLabs </center>")
    #gr.Markdown(description)
        
    with gr.Row(variant='panel'):
        input_api_key = gr.Textbox(
        type='password', 
        label='ElevenLabs API Key', 
        placeholder='Enter your API key',
        elem_id="input_api_key"
        )

        all_voices = voices() 
        input_voice = gr.Dropdown(
            [ voice.name for voice in all_voices ], 
            value="Rachel",
            label="Voice", 
            elem_id="input_voice"
        )

    input_text = gr.Textbox(
        label="Input Text (4000 characters max)", 
        lines=1, 
        value="Hello! 你好! Hola! नमस्ते! Bonjour! こんにちは! مرحبا! 안녕하세요! Ciao! Cześć! Привіт! Γειά σας! Здравей! வணக்கம்!",
        elem_id="input_text"
    )
    
    run_button = gr.Button(
        text="Generate Voice", 
        type="button", 
        variant="primary"
    )

    out_audio = gr.Audio(
        label="Speech Output",
        type="numpy", 
        elem_id="out_audio",
        format="mp3"
    )
        
    inputs = [input_text, input_voice, input_api_key]
    outputs = [out_audio]
    
    run_button.click(
        fn=generate_voice, 
        inputs=inputs, 
        outputs=outputs, 
        queue=True
    )

block.queue(concurrency_count=5).launch(debug=True)