rakhlin commited on
Commit
98c21c3
·
1 Parent(s): bebed53

Delete app.bak.py

Browse files
Files changed (1) hide show
  1. app.bak.py +0 -160
app.bak.py DELETED
@@ -1,160 +0,0 @@
1
- import gradio as gr
2
- import numpy as np
3
- import torch
4
- import torch.nn.functional as F
5
- from pathlib import Path
6
-
7
- from TTS.api import TTS
8
- from TTS.utils.manage import ModelManager
9
-
10
-
11
- title = ""
12
- description = """"""
13
- article = """"""
14
-
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
16
- GPU = device == "cuda"
17
- INT16MAX = np.iinfo(np.int16).max
18
-
19
- model_ids = ModelManager(verbose=False).list_models()
20
- model_tts_ids = [model for model in model_ids if 'tts_models' in model and ('/multilingual/' in model or '/en/' in model)]
21
- model_voc_ids = [model for model in model_ids if 'vocoder_models' in model and ('/universal/' in model or '/en/' in model)]
22
- model_vc_ids = [model for model in model_ids if 'voice_conversion_models' in model and ('/multilingual/' in model or '/en/' in model)]
23
- examples_pt = 'examples'
24
- allowed_extentions = ['.mp3', '.wav']
25
- examples = {f.name: f for f in Path(examples_pt).glob('*') if f.suffix in allowed_extentions}
26
- verse = """Mary had a little lamb,
27
- Its fleece was white as snow.
28
- Everywhere the child went,
29
- The little lamb was sure to go."""
30
-
31
-
32
-
33
- def on_model_tts_select(model_name, tts_var):
34
- if tts_var is None or tts_var.model_name != model_name:
35
- print(f'Loading TTS model from {model_name}')
36
- tts_var = TTS(model_name=model_name, progress_bar=False, gpu=GPU)
37
- else:
38
- print(f'Passing through TTS model {tts_var.model_name}')
39
- languages = tts_var.languages if tts_var.is_multi_lingual else ['']
40
- speakers = [s.replace('\n', '-n') for s in tts_var.speakers] if tts_var.is_multi_speaker else [''] # there's weird speaker formatting
41
- language = languages[0]
42
- speaker = speakers[0]
43
- return tts_var, gr.update(choices=languages, value=language, interactive=tts_var.is_multi_lingual),\
44
- gr.update(choices=speakers, value=speaker, interactive=tts_var.is_multi_speaker)
45
-
46
-
47
- def on_model_vc_select(model_name, vc_var):
48
- if vc_var is None or vc_var.model_name != model_name:
49
- print(f'Loading voice conversion model from {model_name}')
50
- vc_var = TTS(model_name=model_name, progress_bar=False, gpu=GPU)
51
- else:
52
- print(f'Passing through voice conversion model {vc_var.model_name}')
53
- return vc_var
54
-
55
-
56
- def on_voicedropdown(x):
57
- return examples[x]
58
-
59
-
60
- def text_to_speech(text, tts_model, language, speaker, target_wav, use_original_voice):
61
- if len(text.strip()) == 0 or tts_model is None or (target_wav is None and not use_original_voice):
62
- return (16000, np.zeros(0).astype(np.int16))
63
-
64
- sample_rate = tts_model.synthesizer.output_sample_rate
65
- if tts_model.is_multi_speaker:
66
- speaker = {s.replace('\n', '-n'): s for s in tts_model.speakers}[speaker] # there's weird speaker formatting
67
- print(f'model: {tts_model.model_name}\nlanguage: {language}\nspeaker: {speaker}')
68
-
69
- language = None if language == '' else language
70
- speaker = None if speaker == '' else speaker
71
- if use_original_voice:
72
- print('Using original voice')
73
- speech = tts_model.tts(text, language=language, speaker=speaker)
74
- elif tts_model.synthesizer.tts_model.speaker_manager:
75
- print('voice cloning with the tts')
76
- speech = tts_model.tts(text, language=language, speaker_wav=target_wav)
77
- else:
78
- print('voice cloning with the voice conversion model')
79
- speech = tts_model.tts_with_vc(text, language=language, speaker_wav=target_wav)
80
-
81
- speech = (np.array(speech) * INT16MAX).astype(np.int16)
82
- return (sample_rate, speech)
83
-
84
-
85
- def voice_clone(vc_model, source_wav, target_wav):
86
- print(f'model: {vc_model.model_name}\nsource_wav: {source_wav}\ntarget_wav: {target_wav}')
87
- sample_rate = vc_model.voice_converter.output_sample_rate
88
- if vc_model is None or source_wav is None or target_wav is None:
89
- return (sample_rate, np.zeros(0).astype(np.int16))
90
-
91
- speech = vc_model.voice_conversion(source_wav=source_wav, target_wav=target_wav)
92
- speech = (np.array(speech) * INT16MAX).astype(np.int16)
93
- return (sample_rate, speech)
94
-
95
-
96
- with gr.Blocks() as demo:
97
- tts_model = gr.State(None)
98
- vc_model = gr.State(None)
99
- def activate(*args):
100
- return gr.update(interactive=True) if len(args) == 1 else [gr.update(interactive=True)] * len(args)
101
- def deactivate(*args):
102
- return gr.update(interactive=False) if len(args) == 1 else [gr.update(interactive=False)] * len(args)
103
-
104
- gr.Markdown(description)
105
-
106
- with gr.Row(equal_height=True):
107
- with gr.Column(scale=5, min_width=50):
108
- model_tts_dropdown = gr.Dropdown(model_tts_ids, value=model_tts_ids[3], label='Text-to-speech model', interactive=True)
109
- with gr.Column(scale=1, min_width=10):
110
- language_dropdown = gr.Dropdown(None, value=None, label='Language', interactive=False, visible=True)
111
- with gr.Column(scale=1, min_width=10):
112
- speaker_dropdown = gr.Dropdown(None, value=None, label='Speaker', interactive=False, visible=True)
113
- with gr.Column(scale=5, min_width=50):
114
- with gr.Row(equal_height=True):
115
- # model_vocoder_dropdown = gr.Dropdown(model_voc_ids, label='Select vocoder model', interactive=True)
116
- model_vc_dropdown = gr.Dropdown(model_vc_ids, value=model_vc_ids[0], label='Voice conversion model', interactive=True)
117
-
118
- with gr.Accordion("Target voice", open=False) as accordion:
119
- gr.Markdown("Upload target voice...")
120
- with gr.Row(equal_height=True):
121
- voice_upload = gr.Audio(label='Upload target voice', source='upload', type='filepath')
122
- voice_dropdown = gr.Dropdown(examples, label='Examples', interactive=True)
123
-
124
- with gr.Row(equal_height=True):
125
- with gr.Column(scale=2):
126
- with gr.Row(equal_height=True):
127
- with gr.Column():
128
- text_to_convert = gr.Textbox(verse)
129
- orig_voice = gr.Checkbox(label='Use original voice')
130
- voice_to_convert = gr.Audio(label="Upload voice to convert", source='upload', type='filepath')
131
- with gr.Row(equal_height=True):
132
- button_text = gr.Button('Text to speech', interactive=True)
133
- button_audio = gr.Button('Convert audio', interactive=True)
134
- with gr.Row(equal_height=True):
135
- speech = gr.Audio(label='Converted Speech', type='numpy', visible=True, interactive=False)
136
-
137
- # actions
138
- model_tts_dropdown.change(deactivate, [button_text, button_audio], [button_text, button_audio]).\
139
- then(fn=on_model_tts_select, inputs=[model_tts_dropdown, tts_model], outputs=[tts_model, language_dropdown, speaker_dropdown]).\
140
- then(activate, [button_text, button_audio], [button_text, button_audio])
141
- model_vc_dropdown.change(deactivate, [button_text, button_audio], [button_text, button_audio]).\
142
- then(fn=on_model_vc_select, inputs=[model_vc_dropdown, vc_model], outputs=vc_model).\
143
- then(activate, [button_text, button_audio], [button_text, button_audio])
144
- voice_dropdown.change(deactivate, [button_text, button_audio], [button_text, button_audio]).\
145
- then(fn=on_voicedropdown, inputs=voice_dropdown, outputs=voice_upload).\
146
- then(activate, [button_text, button_audio], [button_text, button_audio])
147
-
148
- button_text.click(deactivate, [button_text, button_audio], [button_text, button_audio]).\
149
- then(fn=on_model_tts_select, inputs=[model_tts_dropdown, tts_model], outputs=[tts_model, language_dropdown, speaker_dropdown]).\
150
- then(fn=text_to_speech, inputs=[text_to_convert, tts_model, language_dropdown, speaker_dropdown, voice_upload, orig_voice],
151
- outputs=speech).\
152
- then(activate, [button_text, button_audio], [button_text, button_audio])
153
-
154
- button_audio.click(deactivate, [button_text, button_audio], [button_text, button_audio]).\
155
- then(fn=on_model_vc_select, inputs=[model_vc_dropdown, vc_model], outputs=vc_model).\
156
- then(fn=voice_clone, inputs=[vc_model, voice_to_convert, voice_upload], outputs=speech).\
157
- then(activate, [button_text, button_audio], [button_text, button_audio])
158
-
159
- gr.HTML(article)
160
- demo.launch(share=False)