Spaces:
Running
Running
Update main/app/app.py
Browse files- main/app/app.py +211 -211
main/app/app.py
CHANGED
@@ -830,6 +830,217 @@ with gr.Blocks(title=" Ultimate RVC Maker ⚡", theme=theme) as app:
|
|
830 |
api_name="upload_pretrain_d"
|
831 |
)
|
832 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
833 |
with gr.TabItem(translations["audio_editing"], visible=configs.get("audioldm2", True)):
|
834 |
gr.Markdown(translations["audio_editing_info"])
|
835 |
with gr.Row():
|
@@ -1151,217 +1362,6 @@ with gr.Blocks(title=" Ultimate RVC Maker ⚡", theme=theme) as app:
|
|
1151 |
api_name="create_dataset"
|
1152 |
)
|
1153 |
|
1154 |
-
with gr.TabItem(translations["training_model"], visible=configs.get("training_tab", True)):
|
1155 |
-
gr.Markdown(f"## {translations['training_model']}")
|
1156 |
-
with gr.Row():
|
1157 |
-
gr.Markdown(translations["training_markdown"])
|
1158 |
-
with gr.Row():
|
1159 |
-
with gr.Column():
|
1160 |
-
with gr.Row():
|
1161 |
-
with gr.Column():
|
1162 |
-
training_name = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
|
1163 |
-
training_sr = gr.Radio(label=translations["sample_rate"], info=translations["sample_rate_info"], choices=["32k", "40k", "48k"], value="48k", interactive=True)
|
1164 |
-
training_ver = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
|
1165 |
-
with gr.Row():
|
1166 |
-
clean_dataset = gr.Checkbox(label=translations["clear_dataset"], value=False, interactive=True)
|
1167 |
-
preprocess_cut = gr.Checkbox(label=translations["split_audio"], value=True, interactive=True)
|
1168 |
-
process_effects = gr.Checkbox(label=translations["preprocess_effect"], value=False, interactive=True)
|
1169 |
-
checkpointing1 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
1170 |
-
training_f0 = gr.Checkbox(label=translations["training_pitch"], value=True, interactive=True)
|
1171 |
-
upload = gr.Checkbox(label=translations["upload_dataset"], value=False, interactive=True)
|
1172 |
-
with gr.Row():
|
1173 |
-
clean_dataset_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.7, step=0.1, interactive=True, visible=clean_dataset.value)
|
1174 |
-
with gr.Column():
|
1175 |
-
preprocess_button = gr.Button(translations["preprocess_button"], scale=2)
|
1176 |
-
upload_dataset = gr.Files(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"], visible=upload.value)
|
1177 |
-
preprocess_info = gr.Textbox(label=translations["preprocess_info"], value="", interactive=False)
|
1178 |
-
with gr.Column():
|
1179 |
-
with gr.Row():
|
1180 |
-
with gr.Column():
|
1181 |
-
with gr.Accordion(label=translations["f0_method"], open=False):
|
1182 |
-
with gr.Group():
|
1183 |
-
with gr.Row():
|
1184 |
-
onnx_f0_mode2 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
1185 |
-
unlock_full_method4 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
1186 |
-
extract_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
1187 |
-
extract_hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
|
1188 |
-
with gr.Accordion(label=translations["hubert_model"], open=False):
|
1189 |
-
with gr.Group():
|
1190 |
-
embed_mode2 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
1191 |
-
extract_embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
1192 |
-
with gr.Row():
|
1193 |
-
extract_embedders_custom = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=extract_embedders.value == "custom")
|
1194 |
-
with gr.Column():
|
1195 |
-
extract_button = gr.Button(translations["extract_button"], scale=2)
|
1196 |
-
extract_info = gr.Textbox(label=translations["extract_info"], value="", interactive=False)
|
1197 |
-
with gr.Column():
|
1198 |
-
with gr.Row():
|
1199 |
-
with gr.Column():
|
1200 |
-
total_epochs = gr.Slider(label=translations["total_epoch"], info=translations["total_epoch_info"], minimum=1, maximum=10000, value=300, step=1, interactive=True)
|
1201 |
-
save_epochs = gr.Slider(label=translations["save_epoch"], info=translations["save_epoch_info"], minimum=1, maximum=10000, value=50, step=1, interactive=True)
|
1202 |
-
with gr.Column():
|
1203 |
-
with gr.Row():
|
1204 |
-
index_button = gr.Button(f"3. {translations['create_index']}", variant="primary", scale=2)
|
1205 |
-
training_button = gr.Button(f"4. {translations['training_model']}", variant="primary", scale=2)
|
1206 |
-
with gr.Row():
|
1207 |
-
with gr.Accordion(label=translations["setting"], open=False):
|
1208 |
-
with gr.Row():
|
1209 |
-
index_algorithm = gr.Radio(label=translations["index_algorithm"], info=translations["index_algorithm_info"], choices=["Auto", "Faiss", "KMeans"], value="Auto", interactive=True)
|
1210 |
-
with gr.Row():
|
1211 |
-
custom_dataset = gr.Checkbox(label=translations["custom_dataset"], info=translations["custom_dataset_info"], value=False, interactive=True)
|
1212 |
-
overtraining_detector = gr.Checkbox(label=translations["overtraining_detector"], info=translations["overtraining_detector_info"], value=False, interactive=True)
|
1213 |
-
clean_up = gr.Checkbox(label=translations["cleanup_training"], info=translations["cleanup_training_info"], value=False, interactive=True)
|
1214 |
-
cache_in_gpu = gr.Checkbox(label=translations["cache_in_gpu"], info=translations["cache_in_gpu_info"], value=False, interactive=True)
|
1215 |
-
with gr.Column():
|
1216 |
-
dataset_path = gr.Textbox(label=translations["dataset_folder"], value="dataset", interactive=True, visible=custom_dataset.value)
|
1217 |
-
with gr.Column():
|
1218 |
-
threshold = gr.Slider(minimum=1, maximum=100, value=50, step=1, label=translations["threshold"], interactive=True, visible=overtraining_detector.value)
|
1219 |
-
with gr.Accordion(translations["setting_cpu_gpu"], open=False):
|
1220 |
-
with gr.Column():
|
1221 |
-
gpu_number = gr.Textbox(label=translations["gpu_number"], value=str("-".join(map(str, range(torch.cuda.device_count()))) if torch.cuda.is_available() else "-"), info=translations["gpu_number_info"], interactive=True)
|
1222 |
-
gpu_info = gr.Textbox(label=translations["gpu_info"], value=get_gpu_info(), info=translations["gpu_info_2"], interactive=False)
|
1223 |
-
cpu_core = gr.Slider(label=translations["cpu_core"], info=translations["cpu_core_info"], minimum=0, maximum=cpu_count(), value=cpu_count(), step=1, interactive=True)
|
1224 |
-
train_batch_size = gr.Slider(label=translations["batch_size"], info=translations["batch_size_info"], minimum=1, maximum=64, value=8, step=1, interactive=True)
|
1225 |
-
with gr.Row():
|
1226 |
-
save_only_latest = gr.Checkbox(label=translations["save_only_latest"], info=translations["save_only_latest_info"], value=True, interactive=True)
|
1227 |
-
save_every_weights = gr.Checkbox(label=translations["save_every_weights"], info=translations["save_every_weights_info"], value=True, interactive=True)
|
1228 |
-
not_use_pretrain = gr.Checkbox(label=translations["not_use_pretrain_2"], info=translations["not_use_pretrain_info"], value=False, interactive=True)
|
1229 |
-
custom_pretrain = gr.Checkbox(label=translations["custom_pretrain"], info=translations["custom_pretrain_info"], value=False, interactive=True)
|
1230 |
-
with gr.Row():
|
1231 |
-
vocoders = gr.Radio(label=translations["vocoder"], info=translations["vocoder_info"], choices=["Default", "MRF-HiFi-GAN", "RefineGAN"], value="Default", interactive=True)
|
1232 |
-
with gr.Row():
|
1233 |
-
deterministic = gr.Checkbox(label=translations["deterministic"], info=translations["deterministic_info"], value=False, interactive=True)
|
1234 |
-
benchmark = gr.Checkbox(label=translations["benchmark"], info=translations["benchmark_info"], value=False, interactive=True)
|
1235 |
-
with gr.Row():
|
1236 |
-
model_author = gr.Textbox(label=translations["training_author"], info=translations["training_author_info"], value="", placeholder=translations["training_author"], interactive=True)
|
1237 |
-
with gr.Row():
|
1238 |
-
with gr.Column():
|
1239 |
-
with gr.Accordion(translations["custom_pretrain_info"], open=False, visible=custom_pretrain.value and not not_use_pretrain.value) as pretrain_setting:
|
1240 |
-
pretrained_D = gr.Dropdown(label=translations["pretrain_file"].format(dg="D"), choices=pretrainedD, value=pretrainedD[0] if len(pretrainedD) > 0 else '', interactive=True, allow_custom_value=True)
|
1241 |
-
pretrained_G = gr.Dropdown(label=translations["pretrain_file"].format(dg="G"), choices=pretrainedG, value=pretrainedG[0] if len(pretrainedG) > 0 else '', interactive=True, allow_custom_value=True)
|
1242 |
-
refesh_pretrain = gr.Button(translations["refesh"], scale=2)
|
1243 |
-
with gr.Row():
|
1244 |
-
training_info = gr.Textbox(label=translations["train_info"], value="", interactive=False)
|
1245 |
-
with gr.Row():
|
1246 |
-
with gr.Column():
|
1247 |
-
with gr.Accordion(translations["export_model"], open=False):
|
1248 |
-
with gr.Row():
|
1249 |
-
model_file= gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
1250 |
-
index_file = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
1251 |
-
with gr.Row():
|
1252 |
-
refesh_file = gr.Button(f"1. {translations['refesh']}", scale=2)
|
1253 |
-
zip_model = gr.Button(translations["zip_model"], variant="primary", scale=2)
|
1254 |
-
with gr.Row():
|
1255 |
-
zip_output = gr.File(label=translations["output_zip"], file_types=[".zip"], interactive=False, visible=False)
|
1256 |
-
with gr.Row():
|
1257 |
-
vocoders.change(fn=pitch_guidance_lock, inputs=[vocoders], outputs=[training_f0])
|
1258 |
-
training_f0.change(fn=vocoders_lock, inputs=[training_f0, vocoders], outputs=[vocoders])
|
1259 |
-
unlock_full_method4.change(fn=unlock_f0, inputs=[unlock_full_method4], outputs=[extract_method])
|
1260 |
-
with gr.Row():
|
1261 |
-
refesh_file.click(fn=change_models_choices, inputs=[], outputs=[model_file, index_file])
|
1262 |
-
zip_model.click(fn=zip_file, inputs=[training_name, model_file, index_file], outputs=[zip_output])
|
1263 |
-
dataset_path.change(fn=lambda folder: os.makedirs(folder, exist_ok=True), inputs=[dataset_path], outputs=[])
|
1264 |
-
with gr.Row():
|
1265 |
-
upload.change(fn=visible, inputs=[upload], outputs=[upload_dataset])
|
1266 |
-
overtraining_detector.change(fn=visible, inputs=[overtraining_detector], outputs=[threshold])
|
1267 |
-
clean_dataset.change(fn=visible, inputs=[clean_dataset], outputs=[clean_dataset_strength])
|
1268 |
-
with gr.Row():
|
1269 |
-
custom_dataset.change(fn=lambda custom_dataset: [visible(custom_dataset), "dataset"],inputs=[custom_dataset], outputs=[dataset_path, dataset_path])
|
1270 |
-
training_ver.change(fn=unlock_vocoder, inputs=[training_ver, vocoders], outputs=[vocoders])
|
1271 |
-
vocoders.change(fn=unlock_ver, inputs=[training_ver, vocoders], outputs=[training_ver])
|
1272 |
-
upload_dataset.upload(
|
1273 |
-
fn=lambda files, folder: [shutil.move(f.name, os.path.join(folder, os.path.split(f.name)[1])) for f in files] if folder != "" else gr_warning(translations["dataset_folder1"]),
|
1274 |
-
inputs=[upload_dataset, dataset_path],
|
1275 |
-
outputs=[],
|
1276 |
-
api_name="upload_dataset"
|
1277 |
-
)
|
1278 |
-
with gr.Row():
|
1279 |
-
not_use_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
1280 |
-
custom_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
1281 |
-
refesh_pretrain.click(fn=change_pretrained_choices, inputs=[], outputs=[pretrained_D, pretrained_G])
|
1282 |
-
with gr.Row():
|
1283 |
-
preprocess_button.click(
|
1284 |
-
fn=preprocess,
|
1285 |
-
inputs=[
|
1286 |
-
training_name,
|
1287 |
-
training_sr,
|
1288 |
-
cpu_core,
|
1289 |
-
preprocess_cut,
|
1290 |
-
process_effects,
|
1291 |
-
dataset_path,
|
1292 |
-
clean_dataset,
|
1293 |
-
clean_dataset_strength
|
1294 |
-
],
|
1295 |
-
outputs=[preprocess_info],
|
1296 |
-
api_name="preprocess"
|
1297 |
-
)
|
1298 |
-
with gr.Row():
|
1299 |
-
embed_mode2.change(fn=visible_embedders, inputs=[embed_mode2], outputs=[extract_embedders])
|
1300 |
-
extract_method.change(fn=hoplength_show, inputs=[extract_method], outputs=[extract_hop_length])
|
1301 |
-
extract_embedders.change(fn=lambda extract_embedders: visible(extract_embedders == "custom"), inputs=[extract_embedders], outputs=[extract_embedders_custom])
|
1302 |
-
with gr.Row():
|
1303 |
-
extract_button.click(
|
1304 |
-
fn=extract,
|
1305 |
-
inputs=[
|
1306 |
-
training_name,
|
1307 |
-
training_ver,
|
1308 |
-
extract_method,
|
1309 |
-
training_f0,
|
1310 |
-
extract_hop_length,
|
1311 |
-
cpu_core,
|
1312 |
-
gpu_number,
|
1313 |
-
training_sr,
|
1314 |
-
extract_embedders,
|
1315 |
-
extract_embedders_custom,
|
1316 |
-
onnx_f0_mode2,
|
1317 |
-
embed_mode2
|
1318 |
-
],
|
1319 |
-
outputs=[extract_info],
|
1320 |
-
api_name="extract"
|
1321 |
-
)
|
1322 |
-
with gr.Row():
|
1323 |
-
index_button.click(
|
1324 |
-
fn=create_index,
|
1325 |
-
inputs=[
|
1326 |
-
training_name,
|
1327 |
-
training_ver,
|
1328 |
-
index_algorithm
|
1329 |
-
],
|
1330 |
-
outputs=[training_info],
|
1331 |
-
api_name="create_index"
|
1332 |
-
)
|
1333 |
-
with gr.Row():
|
1334 |
-
training_button.click(
|
1335 |
-
fn=training,
|
1336 |
-
inputs=[
|
1337 |
-
training_name,
|
1338 |
-
training_ver,
|
1339 |
-
save_epochs,
|
1340 |
-
save_only_latest,
|
1341 |
-
save_every_weights,
|
1342 |
-
total_epochs,
|
1343 |
-
training_sr,
|
1344 |
-
train_batch_size,
|
1345 |
-
gpu_number,
|
1346 |
-
training_f0,
|
1347 |
-
not_use_pretrain,
|
1348 |
-
custom_pretrain,
|
1349 |
-
pretrained_G,
|
1350 |
-
pretrained_D,
|
1351 |
-
overtraining_detector,
|
1352 |
-
threshold,
|
1353 |
-
clean_up,
|
1354 |
-
cache_in_gpu,
|
1355 |
-
model_author,
|
1356 |
-
vocoders,
|
1357 |
-
checkpointing1,
|
1358 |
-
deterministic,
|
1359 |
-
benchmark
|
1360 |
-
],
|
1361 |
-
outputs=[training_info],
|
1362 |
-
api_name="training_model"
|
1363 |
-
)
|
1364 |
-
|
1365 |
with gr.TabItem(translations["fushion"], visible=configs.get("fushion_tab", True)):
|
1366 |
gr.Markdown(translations["fushion_markdown"])
|
1367 |
with gr.Row():
|
|
|
830 |
api_name="upload_pretrain_d"
|
831 |
)
|
832 |
|
833 |
+
with gr.TabItem(translations["training_model"], visible=configs.get("training_tab", True)):
|
834 |
+
gr.Markdown(f"## {translations['training_model']}")
|
835 |
+
with gr.Row():
|
836 |
+
gr.Markdown(translations["training_markdown"])
|
837 |
+
with gr.Row():
|
838 |
+
with gr.Column():
|
839 |
+
with gr.Row():
|
840 |
+
with gr.Column():
|
841 |
+
training_name = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
|
842 |
+
training_sr = gr.Radio(label=translations["sample_rate"], info=translations["sample_rate_info"], choices=["32k", "40k", "48k"], value="48k", interactive=True)
|
843 |
+
training_ver = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
|
844 |
+
with gr.Row():
|
845 |
+
clean_dataset = gr.Checkbox(label=translations["clear_dataset"], value=False, interactive=True)
|
846 |
+
preprocess_cut = gr.Checkbox(label=translations["split_audio"], value=True, interactive=True)
|
847 |
+
process_effects = gr.Checkbox(label=translations["preprocess_effect"], value=False, interactive=True)
|
848 |
+
checkpointing1 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
849 |
+
training_f0 = gr.Checkbox(label=translations["training_pitch"], value=True, interactive=True)
|
850 |
+
upload = gr.Checkbox(label=translations["upload_dataset"], value=False, interactive=True)
|
851 |
+
with gr.Row():
|
852 |
+
clean_dataset_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.7, step=0.1, interactive=True, visible=clean_dataset.value)
|
853 |
+
with gr.Column():
|
854 |
+
preprocess_button = gr.Button(translations["preprocess_button"], scale=2)
|
855 |
+
upload_dataset = gr.Files(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"], visible=upload.value)
|
856 |
+
preprocess_info = gr.Textbox(label=translations["preprocess_info"], value="", interactive=False)
|
857 |
+
with gr.Column():
|
858 |
+
with gr.Row():
|
859 |
+
with gr.Column():
|
860 |
+
with gr.Accordion(label=translations["f0_method"], open=False):
|
861 |
+
with gr.Group():
|
862 |
+
with gr.Row():
|
863 |
+
onnx_f0_mode2 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
864 |
+
unlock_full_method4 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
865 |
+
extract_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
866 |
+
extract_hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
|
867 |
+
with gr.Accordion(label=translations["hubert_model"], open=False):
|
868 |
+
with gr.Group():
|
869 |
+
embed_mode2 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
870 |
+
extract_embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
871 |
+
with gr.Row():
|
872 |
+
extract_embedders_custom = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=extract_embedders.value == "custom")
|
873 |
+
with gr.Column():
|
874 |
+
extract_button = gr.Button(translations["extract_button"], scale=2)
|
875 |
+
extract_info = gr.Textbox(label=translations["extract_info"], value="", interactive=False)
|
876 |
+
with gr.Column():
|
877 |
+
with gr.Row():
|
878 |
+
with gr.Column():
|
879 |
+
total_epochs = gr.Slider(label=translations["total_epoch"], info=translations["total_epoch_info"], minimum=1, maximum=10000, value=300, step=1, interactive=True)
|
880 |
+
save_epochs = gr.Slider(label=translations["save_epoch"], info=translations["save_epoch_info"], minimum=1, maximum=10000, value=50, step=1, interactive=True)
|
881 |
+
with gr.Column():
|
882 |
+
with gr.Row():
|
883 |
+
index_button = gr.Button(f"3. {translations['create_index']}", variant="primary", scale=2)
|
884 |
+
training_button = gr.Button(f"4. {translations['training_model']}", variant="primary", scale=2)
|
885 |
+
with gr.Row():
|
886 |
+
with gr.Accordion(label=translations["setting"], open=False):
|
887 |
+
with gr.Row():
|
888 |
+
index_algorithm = gr.Radio(label=translations["index_algorithm"], info=translations["index_algorithm_info"], choices=["Auto", "Faiss", "KMeans"], value="Auto", interactive=True)
|
889 |
+
with gr.Row():
|
890 |
+
custom_dataset = gr.Checkbox(label=translations["custom_dataset"], info=translations["custom_dataset_info"], value=False, interactive=True)
|
891 |
+
overtraining_detector = gr.Checkbox(label=translations["overtraining_detector"], info=translations["overtraining_detector_info"], value=False, interactive=True)
|
892 |
+
clean_up = gr.Checkbox(label=translations["cleanup_training"], info=translations["cleanup_training_info"], value=False, interactive=True)
|
893 |
+
cache_in_gpu = gr.Checkbox(label=translations["cache_in_gpu"], info=translations["cache_in_gpu_info"], value=False, interactive=True)
|
894 |
+
with gr.Column():
|
895 |
+
dataset_path = gr.Textbox(label=translations["dataset_folder"], value="dataset", interactive=True, visible=custom_dataset.value)
|
896 |
+
with gr.Column():
|
897 |
+
threshold = gr.Slider(minimum=1, maximum=100, value=50, step=1, label=translations["threshold"], interactive=True, visible=overtraining_detector.value)
|
898 |
+
with gr.Accordion(translations["setting_cpu_gpu"], open=False):
|
899 |
+
with gr.Column():
|
900 |
+
gpu_number = gr.Textbox(label=translations["gpu_number"], value=str("-".join(map(str, range(torch.cuda.device_count()))) if torch.cuda.is_available() else "-"), info=translations["gpu_number_info"], interactive=True)
|
901 |
+
gpu_info = gr.Textbox(label=translations["gpu_info"], value=get_gpu_info(), info=translations["gpu_info_2"], interactive=False)
|
902 |
+
cpu_core = gr.Slider(label=translations["cpu_core"], info=translations["cpu_core_info"], minimum=0, maximum=cpu_count(), value=cpu_count(), step=1, interactive=True)
|
903 |
+
train_batch_size = gr.Slider(label=translations["batch_size"], info=translations["batch_size_info"], minimum=1, maximum=64, value=8, step=1, interactive=True)
|
904 |
+
with gr.Row():
|
905 |
+
save_only_latest = gr.Checkbox(label=translations["save_only_latest"], info=translations["save_only_latest_info"], value=True, interactive=True)
|
906 |
+
save_every_weights = gr.Checkbox(label=translations["save_every_weights"], info=translations["save_every_weights_info"], value=True, interactive=True)
|
907 |
+
not_use_pretrain = gr.Checkbox(label=translations["not_use_pretrain_2"], info=translations["not_use_pretrain_info"], value=False, interactive=True)
|
908 |
+
custom_pretrain = gr.Checkbox(label=translations["custom_pretrain"], info=translations["custom_pretrain_info"], value=False, interactive=True)
|
909 |
+
with gr.Row():
|
910 |
+
vocoders = gr.Radio(label=translations["vocoder"], info=translations["vocoder_info"], choices=["Default", "MRF-HiFi-GAN", "RefineGAN"], value="Default", interactive=True)
|
911 |
+
with gr.Row():
|
912 |
+
deterministic = gr.Checkbox(label=translations["deterministic"], info=translations["deterministic_info"], value=False, interactive=True)
|
913 |
+
benchmark = gr.Checkbox(label=translations["benchmark"], info=translations["benchmark_info"], value=False, interactive=True)
|
914 |
+
with gr.Row():
|
915 |
+
model_author = gr.Textbox(label=translations["training_author"], info=translations["training_author_info"], value="", placeholder=translations["training_author"], interactive=True)
|
916 |
+
with gr.Row():
|
917 |
+
with gr.Column():
|
918 |
+
with gr.Accordion(translations["custom_pretrain_info"], open=False, visible=custom_pretrain.value and not not_use_pretrain.value) as pretrain_setting:
|
919 |
+
pretrained_D = gr.Dropdown(label=translations["pretrain_file"].format(dg="D"), choices=pretrainedD, value=pretrainedD[0] if len(pretrainedD) > 0 else '', interactive=True, allow_custom_value=True)
|
920 |
+
pretrained_G = gr.Dropdown(label=translations["pretrain_file"].format(dg="G"), choices=pretrainedG, value=pretrainedG[0] if len(pretrainedG) > 0 else '', interactive=True, allow_custom_value=True)
|
921 |
+
refesh_pretrain = gr.Button(translations["refesh"], scale=2)
|
922 |
+
with gr.Row():
|
923 |
+
training_info = gr.Textbox(label=translations["train_info"], value="", interactive=False)
|
924 |
+
with gr.Row():
|
925 |
+
with gr.Column():
|
926 |
+
with gr.Accordion(translations["export_model"], open=False):
|
927 |
+
with gr.Row():
|
928 |
+
model_file= gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
929 |
+
index_file = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
930 |
+
with gr.Row():
|
931 |
+
refesh_file = gr.Button(f"1. {translations['refesh']}", scale=2)
|
932 |
+
zip_model = gr.Button(translations["zip_model"], variant="primary", scale=2)
|
933 |
+
with gr.Row():
|
934 |
+
zip_output = gr.File(label=translations["output_zip"], file_types=[".zip"], interactive=False, visible=False)
|
935 |
+
with gr.Row():
|
936 |
+
vocoders.change(fn=pitch_guidance_lock, inputs=[vocoders], outputs=[training_f0])
|
937 |
+
training_f0.change(fn=vocoders_lock, inputs=[training_f0, vocoders], outputs=[vocoders])
|
938 |
+
unlock_full_method4.change(fn=unlock_f0, inputs=[unlock_full_method4], outputs=[extract_method])
|
939 |
+
with gr.Row():
|
940 |
+
refesh_file.click(fn=change_models_choices, inputs=[], outputs=[model_file, index_file])
|
941 |
+
zip_model.click(fn=zip_file, inputs=[training_name, model_file, index_file], outputs=[zip_output])
|
942 |
+
dataset_path.change(fn=lambda folder: os.makedirs(folder, exist_ok=True), inputs=[dataset_path], outputs=[])
|
943 |
+
with gr.Row():
|
944 |
+
upload.change(fn=visible, inputs=[upload], outputs=[upload_dataset])
|
945 |
+
overtraining_detector.change(fn=visible, inputs=[overtraining_detector], outputs=[threshold])
|
946 |
+
clean_dataset.change(fn=visible, inputs=[clean_dataset], outputs=[clean_dataset_strength])
|
947 |
+
with gr.Row():
|
948 |
+
custom_dataset.change(fn=lambda custom_dataset: [visible(custom_dataset), "dataset"],inputs=[custom_dataset], outputs=[dataset_path, dataset_path])
|
949 |
+
training_ver.change(fn=unlock_vocoder, inputs=[training_ver, vocoders], outputs=[vocoders])
|
950 |
+
vocoders.change(fn=unlock_ver, inputs=[training_ver, vocoders], outputs=[training_ver])
|
951 |
+
upload_dataset.upload(
|
952 |
+
fn=lambda files, folder: [shutil.move(f.name, os.path.join(folder, os.path.split(f.name)[1])) for f in files] if folder != "" else gr_warning(translations["dataset_folder1"]),
|
953 |
+
inputs=[upload_dataset, dataset_path],
|
954 |
+
outputs=[],
|
955 |
+
api_name="upload_dataset"
|
956 |
+
)
|
957 |
+
with gr.Row():
|
958 |
+
not_use_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
959 |
+
custom_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
960 |
+
refesh_pretrain.click(fn=change_pretrained_choices, inputs=[], outputs=[pretrained_D, pretrained_G])
|
961 |
+
with gr.Row():
|
962 |
+
preprocess_button.click(
|
963 |
+
fn=preprocess,
|
964 |
+
inputs=[
|
965 |
+
training_name,
|
966 |
+
training_sr,
|
967 |
+
cpu_core,
|
968 |
+
preprocess_cut,
|
969 |
+
process_effects,
|
970 |
+
dataset_path,
|
971 |
+
clean_dataset,
|
972 |
+
clean_dataset_strength
|
973 |
+
],
|
974 |
+
outputs=[preprocess_info],
|
975 |
+
api_name="preprocess"
|
976 |
+
)
|
977 |
+
with gr.Row():
|
978 |
+
embed_mode2.change(fn=visible_embedders, inputs=[embed_mode2], outputs=[extract_embedders])
|
979 |
+
extract_method.change(fn=hoplength_show, inputs=[extract_method], outputs=[extract_hop_length])
|
980 |
+
extract_embedders.change(fn=lambda extract_embedders: visible(extract_embedders == "custom"), inputs=[extract_embedders], outputs=[extract_embedders_custom])
|
981 |
+
with gr.Row():
|
982 |
+
extract_button.click(
|
983 |
+
fn=extract,
|
984 |
+
inputs=[
|
985 |
+
training_name,
|
986 |
+
training_ver,
|
987 |
+
extract_method,
|
988 |
+
training_f0,
|
989 |
+
extract_hop_length,
|
990 |
+
cpu_core,
|
991 |
+
gpu_number,
|
992 |
+
training_sr,
|
993 |
+
extract_embedders,
|
994 |
+
extract_embedders_custom,
|
995 |
+
onnx_f0_mode2,
|
996 |
+
embed_mode2
|
997 |
+
],
|
998 |
+
outputs=[extract_info],
|
999 |
+
api_name="extract"
|
1000 |
+
)
|
1001 |
+
with gr.Row():
|
1002 |
+
index_button.click(
|
1003 |
+
fn=create_index,
|
1004 |
+
inputs=[
|
1005 |
+
training_name,
|
1006 |
+
training_ver,
|
1007 |
+
index_algorithm
|
1008 |
+
],
|
1009 |
+
outputs=[training_info],
|
1010 |
+
api_name="create_index"
|
1011 |
+
)
|
1012 |
+
with gr.Row():
|
1013 |
+
training_button.click(
|
1014 |
+
fn=training,
|
1015 |
+
inputs=[
|
1016 |
+
training_name,
|
1017 |
+
training_ver,
|
1018 |
+
save_epochs,
|
1019 |
+
save_only_latest,
|
1020 |
+
save_every_weights,
|
1021 |
+
total_epochs,
|
1022 |
+
training_sr,
|
1023 |
+
train_batch_size,
|
1024 |
+
gpu_number,
|
1025 |
+
training_f0,
|
1026 |
+
not_use_pretrain,
|
1027 |
+
custom_pretrain,
|
1028 |
+
pretrained_G,
|
1029 |
+
pretrained_D,
|
1030 |
+
overtraining_detector,
|
1031 |
+
threshold,
|
1032 |
+
clean_up,
|
1033 |
+
cache_in_gpu,
|
1034 |
+
model_author,
|
1035 |
+
vocoders,
|
1036 |
+
checkpointing1,
|
1037 |
+
deterministic,
|
1038 |
+
benchmark
|
1039 |
+
],
|
1040 |
+
outputs=[training_info],
|
1041 |
+
api_name="training_model"
|
1042 |
+
)
|
1043 |
+
|
1044 |
with gr.TabItem(translations["audio_editing"], visible=configs.get("audioldm2", True)):
|
1045 |
gr.Markdown(translations["audio_editing_info"])
|
1046 |
with gr.Row():
|
|
|
1362 |
api_name="create_dataset"
|
1363 |
)
|
1364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1365 |
with gr.TabItem(translations["fushion"], visible=configs.get("fushion_tab", True)):
|
1366 |
gr.Markdown(translations["fushion_markdown"])
|
1367 |
with gr.Row():
|