File size: 1,355 Bytes
4300fed
610f79e
4300fed
61c12f6
7ffbb2d
 
4300fed
 
532dc11
610f79e
61c12f6
610f79e
 
1821dd9
61c12f6
1821dd9
61c12f6
3c963f8
610f79e
61c12f6
 
 
 
 
 
3c963f8
 
 
61c12f6
610f79e
4300fed
610f79e
4300fed
610f79e
 
4300fed
610f79e
70cbf96
881961f
532dc11
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio as gr
import spaces
import os, torch, io
import json
os.system('python -m unidic download')
# print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
from melo.api import TTS
import tempfile

@spaces.GPU
def synthesize(conversation_text, speed, progress=gr.Progress()):
    speed = 1.0
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    models = {
        'EN': TTS(language='EN', device=device),
    }
    speakers = ['EN-US', 'EN-Default']
    final_audio = io.BytesIO()

    conversation = json.loads(conversation_text)
    for i, turn in enumerate(conversation["conversation"]):
        bio = io.BytesIO()  
        text = turn["text"]  
        speaker = speakers[i % 2] 
        speaker_id = models['EN'].hps.data.spk2id[speaker]
        models['EN'].tts_to_file(text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format='wav') 
        final_audio.write(bio.getvalue())
        
    return final_bio.getvalue()
  
with gr.Blocks() as demo:
    gr.Markdown('# Article to Podcast')
    with gr.Group():
        text = gr.Textbox(label="Article Link")
    btn = gr.Button('Podcasitfy', variant='primary')
    aud = gr.Audio(interactive=False)
    btn.click(synthesize, inputs=[text], outputs=[aud])

demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True)