Delete main
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- main/app/app.py +0 -87
- main/app/core/downloads.py +0 -187
- main/app/core/editing.py +0 -96
- main/app/core/f0_extract.py +0 -54
- main/app/core/inference.py +0 -387
- main/app/core/model_utils.py +0 -162
- main/app/core/presets.py +0 -165
- main/app/core/process.py +0 -134
- main/app/core/restart.py +0 -48
- main/app/core/separate.py +0 -35
- main/app/core/training.py +0 -219
- main/app/core/tts.py +0 -99
- main/app/core/ui.py +0 -179
- main/app/core/utils.py +0 -97
- main/app/parser.py +0 -319
- main/app/run_tensorboard.py +0 -33
- main/app/tabs/downloads/downloads.py +0 -119
- main/app/tabs/editing/child/audio_effects.py +0 -393
- main/app/tabs/editing/child/quirk.py +0 -48
- main/app/tabs/editing/editing.py +0 -20
- main/app/tabs/extra/child/convert_model.py +0 -31
- main/app/tabs/extra/child/f0_extract.py +0 -51
- main/app/tabs/extra/child/fushion.py +0 -45
- main/app/tabs/extra/child/read_model.py +0 -29
- main/app/tabs/extra/child/report_bugs.py +0 -24
- main/app/tabs/extra/child/settings.py +0 -61
- main/app/tabs/extra/extra.py +0 -40
- main/app/tabs/inference/child/convert.py +0 -313
- main/app/tabs/inference/child/convert_tts.py +0 -171
- main/app/tabs/inference/child/convert_with_whisper.py +0 -160
- main/app/tabs/inference/child/separate.py +0 -108
- main/app/tabs/inference/inference.py +0 -30
- main/app/tabs/training/child/create_dataset.py +0 -71
- main/app/tabs/training/child/training.py +0 -237
- main/app/tabs/training/training.py +0 -20
- main/app/variables.py +0 -106
- main/configs/config.json +0 -584
- main/configs/config.py +0 -101
- main/configs/v1/32000.json +0 -46
- main/configs/v1/40000.json +0 -46
- main/configs/v1/48000.json +0 -46
- main/configs/v2/32000.json +0 -42
- main/configs/v2/40000.json +0 -42
- main/configs/v2/48000.json +0 -42
- main/inference/audio_effects.py +0 -185
- main/inference/conversion/convert.py +0 -300
- main/inference/conversion/pipeline.py +0 -251
- main/inference/conversion/utils.py +0 -66
- main/inference/create_dataset.py +0 -212
- main/inference/create_index.py +0 -73
main/app/app.py
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import io
|
| 3 |
-
import ssl
|
| 4 |
-
import sys
|
| 5 |
-
import time
|
| 6 |
-
import codecs
|
| 7 |
-
import logging
|
| 8 |
-
import warnings
|
| 9 |
-
|
| 10 |
-
import gradio as gr
|
| 11 |
-
|
| 12 |
-
sys.path.append(os.getcwd())
|
| 13 |
-
start_time = time.time()
|
| 14 |
-
|
| 15 |
-
from main.app.tabs.extra.extra import extra_tab
|
| 16 |
-
from main.app.tabs.editing.editing import editing_tab
|
| 17 |
-
from main.app.tabs.training.training import training_tab
|
| 18 |
-
from main.app.tabs.downloads.downloads import download_tab
|
| 19 |
-
from main.app.tabs.inference.inference import inference_tab
|
| 20 |
-
from main.app.variables import logger, config, translations, theme, font, configs, language, allow_disk
|
| 21 |
-
|
| 22 |
-
ssl._create_default_https_context = ssl._create_unverified_context
|
| 23 |
-
|
| 24 |
-
warnings.filterwarnings("ignore")
|
| 25 |
-
for l in ["httpx", "gradio", "uvicorn", "httpcore", "urllib3"]:
|
| 26 |
-
logging.getLogger(l).setLevel(logging.ERROR)
|
| 27 |
-
|
| 28 |
-
with gr.Blocks(title="📱 Vietnamese-RVC GUI BY ANH", theme=theme, css="<style> @import url('{fonts}'); * {{font-family: 'Courgette', cursive !important;}} body, html {{font-family: 'Courgette', cursive !important;}} h1, h2, h3, h4, h5, h6, p, button, input, textarea, label, span, div, select {{font-family: 'Courgette', cursive !important;}} </style>".format(fonts=font or "https://fonts.googleapis.com/css2?family=Courgette&display=swap")) as app:
|
| 29 |
-
gr.HTML("<h1 style='text-align: center;'>🎵VIETNAMESE RVC BY ANH🎵</h1>")
|
| 30 |
-
gr.HTML(f"<h3 style='text-align: center;'>{translations['title']}</h3>")
|
| 31 |
-
|
| 32 |
-
with gr.Tabs():
|
| 33 |
-
inference_tab()
|
| 34 |
-
editing_tab()
|
| 35 |
-
training_tab()
|
| 36 |
-
download_tab()
|
| 37 |
-
extra_tab(app)
|
| 38 |
-
|
| 39 |
-
with gr.Row():
|
| 40 |
-
gr.Markdown(translations["rick_roll"].format(rickroll=codecs.decode('uggcf://jjj.lbhghor.pbz/jngpu?i=qDj4j9JtKpD', 'rot13')))
|
| 41 |
-
|
| 42 |
-
with gr.Row():
|
| 43 |
-
gr.Markdown(translations["terms_of_use"])
|
| 44 |
-
|
| 45 |
-
with gr.Row():
|
| 46 |
-
gr.Markdown(translations["exemption"])
|
| 47 |
-
|
| 48 |
-
logger.info(config.device)
|
| 49 |
-
logger.info(translations["start_app"])
|
| 50 |
-
logger.info(translations["set_lang"].format(lang=language))
|
| 51 |
-
|
| 52 |
-
port = configs.get("app_port", 7860)
|
| 53 |
-
server_name = configs.get("server_name", "0.0.0.0")
|
| 54 |
-
share = "--share" in sys.argv
|
| 55 |
-
|
| 56 |
-
original_stdout = sys.stdout
|
| 57 |
-
sys.stdout = io.StringIO()
|
| 58 |
-
|
| 59 |
-
for i in range(configs.get("num_of_restart", 5)):
|
| 60 |
-
try:
|
| 61 |
-
_, _, share_url = app.queue().launch(
|
| 62 |
-
favicon_path=configs["ico_path"],
|
| 63 |
-
server_name=server_name,
|
| 64 |
-
server_port=port,
|
| 65 |
-
show_error=configs.get("app_show_error", False),
|
| 66 |
-
inbrowser="--open" in sys.argv,
|
| 67 |
-
share=share,
|
| 68 |
-
allowed_paths=allow_disk,
|
| 69 |
-
prevent_thread_lock=True,
|
| 70 |
-
quiet=True
|
| 71 |
-
)
|
| 72 |
-
break
|
| 73 |
-
except OSError:
|
| 74 |
-
logger.debug(translations["port"].format(port=port))
|
| 75 |
-
port -= 1
|
| 76 |
-
except Exception as e:
|
| 77 |
-
logger.error(translations["error_occurred"].format(e=e))
|
| 78 |
-
sys.exit(1)
|
| 79 |
-
|
| 80 |
-
sys.stdout = original_stdout
|
| 81 |
-
logger.info(f"{translations['running_local_url']}: {server_name}:{port}")
|
| 82 |
-
|
| 83 |
-
if share: logger.info(f"{translations['running_share_url']}: {share_url}")
|
| 84 |
-
logger.info(f"{translations['gradio_start']}: {(time.time() - start_time):.2f}s")
|
| 85 |
-
|
| 86 |
-
while 1:
|
| 87 |
-
time.sleep(5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/downloads.py
DELETED
|
@@ -1,187 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
import sys
|
| 4 |
-
import json
|
| 5 |
-
import codecs
|
| 6 |
-
import shutil
|
| 7 |
-
import yt_dlp
|
| 8 |
-
import warnings
|
| 9 |
-
import requests
|
| 10 |
-
|
| 11 |
-
from bs4 import BeautifulSoup
|
| 12 |
-
|
| 13 |
-
sys.path.append(os.getcwd())
|
| 14 |
-
|
| 15 |
-
from main.tools import huggingface, gdown, meganz, mediafire, pixeldrain
|
| 16 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error, process_output
|
| 17 |
-
from main.app.variables import logger, translations, model_options, configs
|
| 18 |
-
from main.app.core.process import move_files_from_directory, fetch_pretrained_data, extract_name_model
|
| 19 |
-
|
| 20 |
-
def download_url(url):
|
| 21 |
-
if not url: return gr_warning(translations["provide_url"])
|
| 22 |
-
if not os.path.exists(configs["audios_path"]): os.makedirs(configs["audios_path"], exist_ok=True)
|
| 23 |
-
|
| 24 |
-
with warnings.catch_warnings():
|
| 25 |
-
warnings.filterwarnings("ignore")
|
| 26 |
-
ydl_opts = {
|
| 27 |
-
"format": "bestaudio/best",
|
| 28 |
-
"postprocessors": [{
|
| 29 |
-
"key": "FFmpegExtractAudio",
|
| 30 |
-
"preferredcodec": "wav",
|
| 31 |
-
"preferredquality": "192"
|
| 32 |
-
}],
|
| 33 |
-
"quiet": True,
|
| 34 |
-
"no_warnings": True,
|
| 35 |
-
"noplaylist": True,
|
| 36 |
-
"verbose": False
|
| 37 |
-
}
|
| 38 |
-
|
| 39 |
-
gr_info(translations["start"].format(start=translations["download_music"]))
|
| 40 |
-
|
| 41 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 42 |
-
audio_output = os.path.join(configs["audios_path"], re.sub(r'\s+', '-', re.sub(r'[^\w\s\u4e00-\u9fff\uac00-\ud7af\u0400-\u04FF\u1100-\u11FF]', '', ydl.extract_info(url, download=False).get('title', 'video')).strip()))
|
| 43 |
-
if os.path.exists(audio_output): shutil.rmtree(audio_output, ignore_errors=True)
|
| 44 |
-
|
| 45 |
-
ydl_opts['outtmpl'] = audio_output
|
| 46 |
-
|
| 47 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 48 |
-
audio_output = process_output(audio_output + ".wav")
|
| 49 |
-
|
| 50 |
-
ydl.download([url])
|
| 51 |
-
|
| 52 |
-
gr_info(translations["success"])
|
| 53 |
-
return [audio_output, audio_output, translations["success"]]
|
| 54 |
-
|
| 55 |
-
def move_file(file, download_dir, model):
|
| 56 |
-
weights_dir = configs["weights_path"]
|
| 57 |
-
logs_dir = configs["logs_path"]
|
| 58 |
-
|
| 59 |
-
if not os.path.exists(weights_dir): os.makedirs(weights_dir, exist_ok=True)
|
| 60 |
-
if not os.path.exists(logs_dir): os.makedirs(logs_dir, exist_ok=True)
|
| 61 |
-
|
| 62 |
-
if file.endswith(".zip"): shutil.unpack_archive(file, download_dir)
|
| 63 |
-
move_files_from_directory(download_dir, weights_dir, logs_dir, model)
|
| 64 |
-
|
| 65 |
-
def download_model(url=None, model=None):
|
| 66 |
-
if not url: return gr_warning(translations["provide_url"])
|
| 67 |
-
|
| 68 |
-
url = url.replace("/blob/", "/resolve/").replace("?download=true", "").strip()
|
| 69 |
-
download_dir = "download_model"
|
| 70 |
-
|
| 71 |
-
os.makedirs(download_dir, exist_ok=True)
|
| 72 |
-
|
| 73 |
-
try:
|
| 74 |
-
gr_info(translations["start"].format(start=translations["download"]))
|
| 75 |
-
|
| 76 |
-
if "huggingface.co" in url: file = huggingface.HF_download_file(url, download_dir)
|
| 77 |
-
elif "google.com" in url: file = gdown.gdown_download(url, download_dir)
|
| 78 |
-
elif "mediafire.com" in url: file = mediafire.Mediafire_Download(url, download_dir)
|
| 79 |
-
elif "pixeldrain.com" in url: file = pixeldrain.pixeldrain(url, download_dir)
|
| 80 |
-
elif "mega.nz" in url: file = meganz.mega_download_url(url, download_dir)
|
| 81 |
-
else:
|
| 82 |
-
gr_warning(translations["not_support_url"])
|
| 83 |
-
return translations["not_support_url"]
|
| 84 |
-
|
| 85 |
-
if not model:
|
| 86 |
-
modelname = os.path.basename(file)
|
| 87 |
-
model = extract_name_model(modelname) if modelname.endswith(".index") else os.path.splitext(modelname)[0]
|
| 88 |
-
if model is None: model = os.path.splitext(modelname)[0]
|
| 89 |
-
|
| 90 |
-
model = model.replace(".onnx", "").replace(".pth", "").replace(".index", "").replace(".zip", "").replace(" ", "_").replace("(", "").replace(")", "").replace("[", "").replace("]", "").replace("{", "").replace("}", "").replace(",", "").replace('"', "").replace("'", "").replace("|", "").strip()
|
| 91 |
-
|
| 92 |
-
move_file(file, download_dir, model)
|
| 93 |
-
gr_info(translations["success"])
|
| 94 |
-
|
| 95 |
-
return translations["success"]
|
| 96 |
-
except Exception as e:
|
| 97 |
-
gr_error(message=translations["error_occurred"].format(e=e))
|
| 98 |
-
return translations["error_occurred"].format(e=e)
|
| 99 |
-
finally:
|
| 100 |
-
shutil.rmtree(download_dir, ignore_errors=True)
|
| 101 |
-
|
| 102 |
-
def download_pretrained_model(choices, model, sample_rate):
|
| 103 |
-
pretraineds_custom_path = configs["pretrained_custom_path"]
|
| 104 |
-
|
| 105 |
-
if choices == translations["list_model"]:
|
| 106 |
-
paths = fetch_pretrained_data()[model][sample_rate]
|
| 107 |
-
|
| 108 |
-
if not os.path.exists(pretraineds_custom_path): os.makedirs(pretraineds_custom_path, exist_ok=True)
|
| 109 |
-
url = codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/cergenvarq_phfgbz/", "rot13") + paths
|
| 110 |
-
|
| 111 |
-
gr_info(translations["download_pretrain"])
|
| 112 |
-
file = huggingface.HF_download_file(url.replace("/blob/", "/resolve/").replace("?download=true", "").strip(), os.path.join(pretraineds_custom_path, paths))
|
| 113 |
-
|
| 114 |
-
if file.endswith(".zip"):
|
| 115 |
-
shutil.unpack_archive(file, pretraineds_custom_path)
|
| 116 |
-
os.remove(file)
|
| 117 |
-
|
| 118 |
-
gr_info(translations["success"])
|
| 119 |
-
return translations["success"], None
|
| 120 |
-
elif choices == translations["download_url"]:
|
| 121 |
-
if not model: return gr_warning(translations["provide_pretrain"].format(dg="D"))
|
| 122 |
-
if not sample_rate: return gr_warning(translations["provide_pretrain"].format(dg="G"))
|
| 123 |
-
|
| 124 |
-
gr_info(translations["download_pretrain"])
|
| 125 |
-
|
| 126 |
-
for url in [model, sample_rate]:
|
| 127 |
-
url = url.replace("/blob/", "/resolve/").replace("?download=true", "").strip()
|
| 128 |
-
|
| 129 |
-
if "huggingface.co" in url: huggingface.HF_download_file(url, pretraineds_custom_path)
|
| 130 |
-
elif "google.com" in url: gdown.gdown_download(url, pretraineds_custom_path)
|
| 131 |
-
elif "mediafire.com" in url: mediafire.Mediafire_Download(url, pretraineds_custom_path)
|
| 132 |
-
elif "pixeldrain.com" in url: pixeldrain.pixeldrain(url, pretraineds_custom_path)
|
| 133 |
-
elif "mega.nz" in url: meganz.mega_download_url(url, pretraineds_custom_path)
|
| 134 |
-
else:
|
| 135 |
-
gr_warning(translations["not_support_url"])
|
| 136 |
-
return translations["not_support_url"], translations["not_support_url"]
|
| 137 |
-
|
| 138 |
-
gr_info(translations["success"])
|
| 139 |
-
return translations["success"], translations["success"]
|
| 140 |
-
|
| 141 |
-
def fetch_models_data(search):
|
| 142 |
-
all_table_data = []
|
| 143 |
-
page = 1
|
| 144 |
-
|
| 145 |
-
while 1:
|
| 146 |
-
try:
|
| 147 |
-
response = requests.post(url=codecs.decode("uggcf://ibvpr-zbqryf.pbz/srgpu_qngn.cuc", "rot13"), data={"page": page, "search": search})
|
| 148 |
-
|
| 149 |
-
if response.status_code == 200:
|
| 150 |
-
table_data = response.json().get("table", "")
|
| 151 |
-
if not table_data.strip(): break
|
| 152 |
-
|
| 153 |
-
all_table_data.append(table_data)
|
| 154 |
-
page += 1
|
| 155 |
-
else:
|
| 156 |
-
logger.debug(f"{translations['code_error']} {response.status_code}")
|
| 157 |
-
break
|
| 158 |
-
except json.JSONDecodeError:
|
| 159 |
-
logger.debug(translations["json_error"])
|
| 160 |
-
break
|
| 161 |
-
except requests.RequestException as e:
|
| 162 |
-
logger.debug(translations["requests_error"].format(e=e))
|
| 163 |
-
break
|
| 164 |
-
|
| 165 |
-
return all_table_data
|
| 166 |
-
|
| 167 |
-
def search_models(name):
|
| 168 |
-
if not name: return gr_warning(translations["provide_name"])
|
| 169 |
-
gr_info(translations["start"].format(start=translations["search"]))
|
| 170 |
-
|
| 171 |
-
tables = fetch_models_data(name)
|
| 172 |
-
|
| 173 |
-
if len(tables) == 0:
|
| 174 |
-
gr_info(translations["not_found"].format(name=name))
|
| 175 |
-
return [None]*2
|
| 176 |
-
else:
|
| 177 |
-
model_options.clear()
|
| 178 |
-
|
| 179 |
-
for table in tables:
|
| 180 |
-
for row in BeautifulSoup(table, "html.parser").select("tr"):
|
| 181 |
-
name_tag, url_tag = row.find("a", {"class": "fs-5"}), row.find("a", {"class": "btn btn-sm fw-bold btn-light ms-0 p-1 ps-2 pe-2"})
|
| 182 |
-
url = url_tag["href"].replace("https://easyaivoice.com/run?url=", "")
|
| 183 |
-
if "huggingface" in url:
|
| 184 |
-
if name_tag and url_tag: model_options[name_tag.text.replace(".onnx", "").replace(".pth", "").replace(".index", "").replace(".zip", "").replace(" ", "_").replace("(", "").replace(")", "").replace("[", "").replace("]", "").replace(",", "").replace('"', "").replace("'", "").replace("|", "_").replace("-_-", "_").replace("_-_", "_").replace("-", "_").replace("---", "_").replace("___", "_").strip()] = url
|
| 185 |
-
|
| 186 |
-
gr_info(translations["found"].format(results=len(model_options)))
|
| 187 |
-
return [{"value": "", "choices": model_options, "interactive": True, "visible": True, "__type__": "update"}, {"value": translations["downloads"], "visible": True, "__type__": "update"}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/editing.py
DELETED
|
@@ -1,96 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import random
|
| 4 |
-
import librosa
|
| 5 |
-
import subprocess
|
| 6 |
-
|
| 7 |
-
import numpy as np
|
| 8 |
-
import soundfile as sf
|
| 9 |
-
|
| 10 |
-
sys.path.append(os.getcwd())
|
| 11 |
-
|
| 12 |
-
from main.app.core.ui import gr_info, gr_warning, process_output
|
| 13 |
-
from main.app.variables import python, translations, configs, config
|
| 14 |
-
|
| 15 |
-
def audio_effects(input_path, output_path, resample, resample_sr, chorus_depth, chorus_rate, chorus_mix, chorus_delay, chorus_feedback, distortion_drive, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift, delay_seconds, delay_feedback, delay_mix, compressor_threshold, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold, limiter_release, gain_db, bitcrush_bit_depth, clipping_threshold, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost_db, bass_boost_frequency, treble_boost_db, treble_boost_frequency, fade_in_duration, fade_out_duration, export_format, chorus, distortion, reverb, delay, compressor, limiter, gain, bitcrush, clipping, phaser, treble_bass_boost, fade_in_out, audio_combination, audio_combination_input, main_vol, combine_vol):
|
| 16 |
-
if not input_path or not os.path.exists(input_path) or os.path.isdir(input_path):
|
| 17 |
-
gr_warning(translations["input_not_valid"])
|
| 18 |
-
return None
|
| 19 |
-
|
| 20 |
-
if not output_path:
|
| 21 |
-
gr_warning(translations["output_not_valid"])
|
| 22 |
-
return None
|
| 23 |
-
|
| 24 |
-
if os.path.isdir(output_path): output_path = os.path.join(output_path, f"audio_effects.{export_format}")
|
| 25 |
-
output_dir = os.path.dirname(output_path) or output_path
|
| 26 |
-
|
| 27 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 28 |
-
output_path = process_output(output_path)
|
| 29 |
-
|
| 30 |
-
gr_info(translations["start"].format(start=translations["apply_effect"]))
|
| 31 |
-
|
| 32 |
-
if config.debug_mode: subprocess.run([python, configs["audio_effects_path"], "--input_path", input_path, "--output_path", output_path, "--resample", str(resample), "--resample_sr", str(resample_sr), "--chorus_depth", str(chorus_depth), "--chorus_rate", str(chorus_rate), "--chorus_mix", str(chorus_mix), "--chorus_delay", str(chorus_delay), "--chorus_feedback", str(chorus_feedback), "--drive_db", str(distortion_drive), "--reverb_room_size", str(reverb_room_size), "--reverb_damping", str(reverb_damping), "--reverb_wet_level", str(reverb_wet_level), "--reverb_dry_level", str(reverb_dry_level), "--reverb_width", str(reverb_width), "--reverb_freeze_mode", str(reverb_freeze_mode), "--pitch_shift", str(pitch_shift), "--delay_seconds", str(delay_seconds), "--delay_feedback", str(delay_feedback), "--delay_mix", str(delay_mix), "--compressor_threshold", str(compressor_threshold), "--compressor_ratio", str(compressor_ratio), "--compressor_attack_ms", str(compressor_attack_ms), "--compressor_release_ms", str(compressor_release_ms), "--limiter_threshold", str(limiter_threshold), "--limiter_release", str(limiter_release), "--gain_db", str(gain_db), "--bitcrush_bit_depth", str(bitcrush_bit_depth), "--clipping_threshold", str(clipping_threshold), "--phaser_rate_hz", str(phaser_rate_hz), "--phaser_depth", str(phaser_depth), "--phaser_centre_frequency_hz", str(phaser_centre_frequency_hz), "--phaser_feedback", str(phaser_feedback), "--phaser_mix", str(phaser_mix), "--bass_boost_db", str(bass_boost_db), "--bass_boost_frequency", str(bass_boost_frequency), "--treble_boost_db", str(treble_boost_db), "--treble_boost_frequency", str(treble_boost_frequency), "--fade_in_duration", str(fade_in_duration), "--fade_out_duration", str(fade_out_duration), "--export_format", export_format, "--chorus", str(chorus), "--distortion", str(distortion), "--reverb", str(reverb), "--pitchshift", str(pitch_shift != 0), "--delay", str(delay), "--compressor", str(compressor), "--limiter", str(limiter), "--gain", str(gain), "--bitcrush", str(bitcrush), "--clipping", str(clipping), "--phaser", str(phaser), "--treble_bass_boost", str(treble_bass_boost), "--fade_in_out", str(fade_in_out), "--audio_combination", str(audio_combination), "--audio_combination_input", audio_combination_input, "--main_volume", str(main_vol), "--combination_volume", str(combine_vol)])
|
| 33 |
-
else:
|
| 34 |
-
from main.inference.audio_effects import process_audio
|
| 35 |
-
|
| 36 |
-
process_audio(input_path, output_path, resample, resample_sr, chorus_depth, chorus_rate, chorus_mix, chorus_delay, chorus_feedback, distortion_drive, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift, delay_seconds, delay_feedback, delay_mix, compressor_threshold, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold, limiter_release, gain_db, bitcrush_bit_depth, clipping_threshold, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost_db, bass_boost_frequency, treble_boost_db, treble_boost_frequency, fade_in_duration, fade_out_duration, export_format, chorus, distortion, reverb, pitch_shift != 0, delay, compressor, limiter, gain, bitcrush, clipping, phaser, treble_bass_boost, fade_in_out, audio_combination, audio_combination_input, main_vol, combine_vol)
|
| 37 |
-
|
| 38 |
-
gr_info(translations["success"])
|
| 39 |
-
return output_path.replace("wav", export_format)
|
| 40 |
-
|
| 41 |
-
def vibrato(y, sr, freq=5, depth=0.003):
|
| 42 |
-
return y[np.clip((np.arange(len(y)) + (depth * np.sin(2 * np.pi * freq * (np.arange(len(y)) / sr))) * sr).astype(int), 0, len(y) - 1)]
|
| 43 |
-
|
| 44 |
-
def apply_voice_quirk(audio_path, mode, output_path, export_format):
|
| 45 |
-
if not audio_path or not os.path.exists(audio_path) or os.path.isdir(audio_path):
|
| 46 |
-
gr_warning(translations["input_not_valid"])
|
| 47 |
-
return None
|
| 48 |
-
|
| 49 |
-
if not output_path:
|
| 50 |
-
gr_warning(translations["output_not_valid"])
|
| 51 |
-
return None
|
| 52 |
-
|
| 53 |
-
if os.path.isdir(output_path): output_path = os.path.join(output_path, f"audio_quirk.{export_format}")
|
| 54 |
-
output_dir = os.path.dirname(output_path) or output_path
|
| 55 |
-
|
| 56 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 57 |
-
output_path = process_output(output_path)
|
| 58 |
-
|
| 59 |
-
gr_info(translations["start"].format(start=translations["apply_effect"]))
|
| 60 |
-
|
| 61 |
-
y, sr = librosa.load(audio_path, sr=None)
|
| 62 |
-
output_path = output_path.replace("wav", export_format)
|
| 63 |
-
|
| 64 |
-
mode = translations["quirk_choice"][mode]
|
| 65 |
-
if mode == 0: mode = random.randint(1, 16)
|
| 66 |
-
|
| 67 |
-
if mode == 1: y *= np.random.uniform(0.5, 0.8, size=len(y))
|
| 68 |
-
elif mode == 2: y = librosa.effects.pitch_shift(y=y + np.random.normal(0, 0.01, y.shape), sr=sr, n_steps=np.random.uniform(-1.5, -3.5))
|
| 69 |
-
elif mode == 3: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=3), rate=1.2)
|
| 70 |
-
elif mode == 4: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=8), rate=1.3)
|
| 71 |
-
elif mode == 5: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=-3), rate=0.75)
|
| 72 |
-
elif mode == 6: y *= np.sin(np.linspace(0, np.pi * 20, len(y))) * 0.5 + 0.5
|
| 73 |
-
elif mode == 7: y = librosa.effects.time_stretch(vibrato(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=-4), sr, freq=3, depth=0.004), rate=0.85)
|
| 74 |
-
elif mode == 8: y *= 0.6 + np.pad(y, (sr // 2, 0), mode='constant')[:len(y)] * 0.4
|
| 75 |
-
elif mode == 9: y = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=2) + np.sin(np.linspace(0, np.pi * 20, len(y))) * 0.02
|
| 76 |
-
elif mode == 10: y = vibrato(y, sr, freq=8, depth=0.005)
|
| 77 |
-
elif mode == 11: y = librosa.effects.time_stretch(librosa.effects.pitch_shift(y=y, sr=sr, n_steps=4), rate=1.25)
|
| 78 |
-
elif mode == 12: y = np.hstack([np.pad(f, (0, int(len(f)*0.3)), mode='edge') for f in librosa.util.frame(y, frame_length=2048, hop_length=512).T])
|
| 79 |
-
elif mode == 13: y = np.concatenate([y, np.sin(2 * np.pi * np.linspace(0, 1, int(0.05 * sr))) * 0.02])
|
| 80 |
-
elif mode == 14: y += np.random.normal(0, 0.005, len(y))
|
| 81 |
-
elif mode == 15:
|
| 82 |
-
frame = int(sr * 0.2)
|
| 83 |
-
chunks = [y[i:i + frame] for i in range(0, len(y), frame)]
|
| 84 |
-
|
| 85 |
-
np.random.shuffle(chunks)
|
| 86 |
-
y = np.concatenate(chunks)
|
| 87 |
-
elif mode == 16:
|
| 88 |
-
frame = int(sr * 0.3)
|
| 89 |
-
|
| 90 |
-
for i in range(0, len(y), frame * 2):
|
| 91 |
-
y[i:i+frame] = y[i:i+frame][::-1]
|
| 92 |
-
|
| 93 |
-
sf.write(output_path, y, sr, format=export_format)
|
| 94 |
-
gr_info(translations["success"])
|
| 95 |
-
|
| 96 |
-
return output_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/f0_extract.py
DELETED
|
@@ -1,54 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import librosa
|
| 4 |
-
|
| 5 |
-
import numpy as np
|
| 6 |
-
import matplotlib.pyplot as plt
|
| 7 |
-
|
| 8 |
-
sys.path.append(os.getcwd())
|
| 9 |
-
|
| 10 |
-
from main.library.utils import check_assets
|
| 11 |
-
from main.app.core.ui import gr_info, gr_warning
|
| 12 |
-
from main.library.predictors.Generator import Generator
|
| 13 |
-
from main.app.variables import config, translations, configs
|
| 14 |
-
|
| 15 |
-
def f0_extract(audio, f0_method, f0_onnx):
|
| 16 |
-
if not audio or not os.path.exists(audio) or os.path.isdir(audio):
|
| 17 |
-
gr_warning(translations["input_not_valid"])
|
| 18 |
-
return [None]*2
|
| 19 |
-
|
| 20 |
-
check_assets(f0_method, None, f0_onnx, None)
|
| 21 |
-
|
| 22 |
-
f0_path = os.path.join(configs["f0_path"], os.path.splitext(os.path.basename(audio))[0])
|
| 23 |
-
image_path = os.path.join(f0_path, "f0.png")
|
| 24 |
-
txt_path = os.path.join(f0_path, "f0.txt")
|
| 25 |
-
|
| 26 |
-
gr_info(translations["start_extract"])
|
| 27 |
-
|
| 28 |
-
if not os.path.exists(f0_path): os.makedirs(f0_path, exist_ok=True)
|
| 29 |
-
|
| 30 |
-
y, sr = librosa.load(audio, sr=None)
|
| 31 |
-
|
| 32 |
-
f0_generator = Generator(sr, 160, 50, 1600, is_half=config.is_half, device=config.device, f0_onnx_mode=f0_onnx, del_onnx_model=f0_onnx)
|
| 33 |
-
_, pitchf = f0_generator.calculator(config.x_pad, f0_method, y, 0, None, 3, False, 0, None, False)
|
| 34 |
-
|
| 35 |
-
F_temp = np.array(pitchf, dtype=np.float32)
|
| 36 |
-
F_temp[F_temp == 0] = np.nan
|
| 37 |
-
|
| 38 |
-
f0 = 1200 * np.log2(F_temp / librosa.midi_to_hz(0))
|
| 39 |
-
|
| 40 |
-
plt.figure(figsize=(10, 4))
|
| 41 |
-
plt.plot(f0)
|
| 42 |
-
plt.title(f0_method)
|
| 43 |
-
plt.xlabel(translations["time_frames"])
|
| 44 |
-
plt.ylabel(translations["Frequency"])
|
| 45 |
-
plt.savefig(image_path)
|
| 46 |
-
plt.close()
|
| 47 |
-
|
| 48 |
-
with open(txt_path, "w") as f:
|
| 49 |
-
for i, f0_value in enumerate(f0):
|
| 50 |
-
f.write(f"{i * sr / 160},{f0_value}\n")
|
| 51 |
-
|
| 52 |
-
gr_info(translations["extract_done"])
|
| 53 |
-
|
| 54 |
-
return [txt_path, image_path]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/inference.py
DELETED
|
@@ -1,387 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
import sys
|
| 4 |
-
import shutil
|
| 5 |
-
import librosa
|
| 6 |
-
import datetime
|
| 7 |
-
import subprocess
|
| 8 |
-
|
| 9 |
-
import numpy as np
|
| 10 |
-
|
| 11 |
-
sys.path.append(os.getcwd())
|
| 12 |
-
|
| 13 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error, process_output
|
| 14 |
-
from main.app.variables import logger, config, configs, translations, python
|
| 15 |
-
|
| 16 |
-
def convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0_method, input_path, output_path, pth_path, index_path, f0_autotune, clean_audio, clean_strength, export_format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, f0_onnx, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold):
|
| 17 |
-
if config.debug_mode: subprocess.run([python, configs["convert_path"], "--pitch", str(pitch), "--filter_radius", str(filter_radius), "--index_rate", str(index_rate), "--rms_mix_rate", str(rms_mix_rate), "--protect", str(protect), "--hop_length", str(hop_length), "--f0_method", f0_method, "--input_path", input_path, "--output_path", output_path, "--pth_path", pth_path, "--index_path", index_path, "--f0_autotune", str(f0_autotune), "--clean_audio", str(clean_audio), "--clean_strength", str(clean_strength), "--export_format", export_format, "--embedder_model", embedder_model, "--resample_sr", str(resample_sr), "--split_audio", str(split_audio), "--f0_autotune_strength", str(f0_autotune_strength), "--checkpointing", str(checkpointing), "--f0_onnx", str(f0_onnx), "--embedders_mode", embedders_mode, "--formant_shifting", str(formant_shifting), "--formant_qfrency", str(formant_qfrency), "--formant_timbre", str(formant_timbre), "--f0_file", f0_file, "--proposal_pitch", str(proposal_pitch), "--proposal_pitch_threshold", str(proposal_pitch_threshold)])
|
| 18 |
-
else:
|
| 19 |
-
from main.inference.conversion.convert import run_convert_script
|
| 20 |
-
|
| 21 |
-
run_convert_script(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0_method, input_path, output_path, pth_path, index_path, f0_autotune, f0_autotune_strength, clean_audio, clean_strength, export_format, embedder_model, resample_sr, split_audio, checkpointing, f0_file, f0_onnx, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, proposal_pitch, proposal_pitch_threshold)
|
| 22 |
-
|
| 23 |
-
def convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, input_audio_name, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold):
|
| 24 |
-
model_path = os.path.join(configs["weights_path"], model) if not os.path.exists(model) else model
|
| 25 |
-
|
| 26 |
-
return_none = [None]*6
|
| 27 |
-
return_none[5] = {"visible": True, "__type__": "update"}
|
| 28 |
-
|
| 29 |
-
if not use_audio:
|
| 30 |
-
if merge_instrument or not_merge_backing or convert_backing or use_original:
|
| 31 |
-
gr_warning(translations["turn_on_use_audio"])
|
| 32 |
-
return return_none
|
| 33 |
-
|
| 34 |
-
if use_original:
|
| 35 |
-
if convert_backing:
|
| 36 |
-
gr_warning(translations["turn_off_convert_backup"])
|
| 37 |
-
return return_none
|
| 38 |
-
elif not_merge_backing:
|
| 39 |
-
gr_warning(translations["turn_off_merge_backup"])
|
| 40 |
-
return return_none
|
| 41 |
-
|
| 42 |
-
if not model or not os.path.exists(model_path) or os.path.isdir(model_path) or not model.endswith((".pth", ".onnx")):
|
| 43 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 44 |
-
return return_none
|
| 45 |
-
|
| 46 |
-
f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
|
| 47 |
-
|
| 48 |
-
if use_audio:
|
| 49 |
-
output_audio = os.path.join(configs["audios_path"], input_audio_name)
|
| 50 |
-
|
| 51 |
-
from main.library.utils import pydub_load
|
| 52 |
-
|
| 53 |
-
def get_audio_file(label):
|
| 54 |
-
matching_files = [f for f in os.listdir(output_audio) if label in f]
|
| 55 |
-
|
| 56 |
-
if not matching_files: return translations["notfound"]
|
| 57 |
-
return os.path.join(output_audio, matching_files[0])
|
| 58 |
-
|
| 59 |
-
output_path = os.path.join(output_audio, f"Convert_Vocals.{format}")
|
| 60 |
-
output_backing = os.path.join(output_audio, f"Convert_Backing.{format}")
|
| 61 |
-
output_merge_backup = os.path.join(output_audio, f"Vocals+Backing.{format}")
|
| 62 |
-
output_merge_instrument = os.path.join(output_audio, f"Vocals+Instruments.{format}")
|
| 63 |
-
|
| 64 |
-
if os.path.exists(output_audio): os.makedirs(output_audio, exist_ok=True)
|
| 65 |
-
output_path = process_output(output_path)
|
| 66 |
-
|
| 67 |
-
if use_original:
|
| 68 |
-
original_vocal = get_audio_file('Original_Vocals_No_Reverb.')
|
| 69 |
-
|
| 70 |
-
if original_vocal == translations["notfound"]: original_vocal = get_audio_file('Original_Vocals.')
|
| 71 |
-
|
| 72 |
-
if original_vocal == translations["notfound"]:
|
| 73 |
-
gr_warning(translations["not_found_original_vocal"])
|
| 74 |
-
return return_none
|
| 75 |
-
|
| 76 |
-
input_path = original_vocal
|
| 77 |
-
else:
|
| 78 |
-
main_vocal = get_audio_file('Main_Vocals_No_Reverb.')
|
| 79 |
-
backing_vocal = get_audio_file('Backing_Vocals_No_Reverb.')
|
| 80 |
-
|
| 81 |
-
if main_vocal == translations["notfound"]: main_vocal = get_audio_file('Main_Vocals.')
|
| 82 |
-
if not not_merge_backing and backing_vocal == translations["notfound"]: backing_vocal = get_audio_file('Backing_Vocals.')
|
| 83 |
-
|
| 84 |
-
if main_vocal == translations["notfound"]:
|
| 85 |
-
gr_warning(translations["not_found_main_vocal"])
|
| 86 |
-
return return_none
|
| 87 |
-
|
| 88 |
-
if not not_merge_backing and backing_vocal == translations["notfound"]:
|
| 89 |
-
gr_warning(translations["not_found_backing_vocal"])
|
| 90 |
-
return return_none
|
| 91 |
-
|
| 92 |
-
input_path = main_vocal
|
| 93 |
-
backing_path = backing_vocal
|
| 94 |
-
|
| 95 |
-
gr_info(translations["convert_vocal"])
|
| 96 |
-
|
| 97 |
-
convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input_path, output_path, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold)
|
| 98 |
-
|
| 99 |
-
gr_info(translations["convert_success"])
|
| 100 |
-
|
| 101 |
-
if convert_backing:
|
| 102 |
-
output_backing = process_output(output_backing)
|
| 103 |
-
|
| 104 |
-
gr_info(translations["convert_backup"])
|
| 105 |
-
|
| 106 |
-
convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, backing_path, output_backing, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold)
|
| 107 |
-
|
| 108 |
-
gr_info(translations["convert_backup_success"])
|
| 109 |
-
|
| 110 |
-
try:
|
| 111 |
-
if not not_merge_backing and not use_original:
|
| 112 |
-
backing_source = output_backing if convert_backing else backing_vocal
|
| 113 |
-
|
| 114 |
-
output_merge_backup = process_output(output_merge_backup)
|
| 115 |
-
|
| 116 |
-
gr_info(translations["merge_backup"])
|
| 117 |
-
|
| 118 |
-
pydub_load(output_path, volume=-4).overlay(pydub_load(backing_source, volume=-6)).export(output_merge_backup, format=format)
|
| 119 |
-
|
| 120 |
-
gr_info(translations["merge_success"])
|
| 121 |
-
|
| 122 |
-
if merge_instrument:
|
| 123 |
-
vocals = output_merge_backup if not not_merge_backing and not use_original else output_path
|
| 124 |
-
|
| 125 |
-
output_merge_instrument = process_output(output_merge_instrument)
|
| 126 |
-
|
| 127 |
-
gr_info(translations["merge_instruments_process"])
|
| 128 |
-
|
| 129 |
-
instruments = get_audio_file('Instruments.')
|
| 130 |
-
|
| 131 |
-
if instruments == translations["notfound"]:
|
| 132 |
-
gr_warning(translations["not_found_instruments"])
|
| 133 |
-
output_merge_instrument = None
|
| 134 |
-
else: pydub_load(instruments, volume=-7).overlay(pydub_load(vocals, volume=-4 if use_original else None)).export(output_merge_instrument, format=format)
|
| 135 |
-
|
| 136 |
-
gr_info(translations["merge_success"])
|
| 137 |
-
except:
|
| 138 |
-
return return_none
|
| 139 |
-
|
| 140 |
-
return [(None if use_original else output_path), output_backing, (None if not_merge_backing and use_original else output_merge_backup), (output_path if use_original else None), (output_merge_instrument if merge_instrument else None), {"visible": True, "__type__": "update"}]
|
| 141 |
-
else:
|
| 142 |
-
if not input or not os.path.exists(input) or os.path.isdir(input):
|
| 143 |
-
gr_warning(translations["input_not_valid"])
|
| 144 |
-
return return_none
|
| 145 |
-
|
| 146 |
-
if not output:
|
| 147 |
-
gr_warning(translations["output_not_valid"])
|
| 148 |
-
return return_none
|
| 149 |
-
|
| 150 |
-
output = output.replace("wav", format)
|
| 151 |
-
|
| 152 |
-
if os.path.isdir(input):
|
| 153 |
-
gr_info(translations["is_folder"])
|
| 154 |
-
|
| 155 |
-
if not [f for f in os.listdir(input) if f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"))]:
|
| 156 |
-
gr_warning(translations["not_found_in_folder"])
|
| 157 |
-
return return_none
|
| 158 |
-
|
| 159 |
-
gr_info(translations["batch_convert"])
|
| 160 |
-
|
| 161 |
-
output_dir = os.path.dirname(output) or output
|
| 162 |
-
convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output_dir, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold)
|
| 163 |
-
|
| 164 |
-
gr_info(translations["batch_convert_success"])
|
| 165 |
-
|
| 166 |
-
return return_none
|
| 167 |
-
else:
|
| 168 |
-
output_dir = os.path.dirname(output) or output
|
| 169 |
-
|
| 170 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 171 |
-
output = process_output(output)
|
| 172 |
-
|
| 173 |
-
gr_info(translations["convert_vocal"])
|
| 174 |
-
|
| 175 |
-
convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold)
|
| 176 |
-
|
| 177 |
-
gr_info(translations["convert_success"])
|
| 178 |
-
|
| 179 |
-
return_none[0] = output
|
| 180 |
-
return return_none
|
| 181 |
-
|
| 182 |
-
def convert_selection(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold):
|
| 183 |
-
if use_audio:
|
| 184 |
-
gr_info(translations["search_separate"])
|
| 185 |
-
choice = [f for f in os.listdir(configs["audios_path"]) if os.path.isdir(os.path.join(configs["audios_path"], f))] if config.debug_mode else [f for f in os.listdir(configs["audios_path"]) if os.path.isdir(os.path.join(configs["audios_path"], f)) and any(file.lower().endswith((".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")) for file in os.listdir(os.path.join(configs["audios_path"], f)))]
|
| 186 |
-
|
| 187 |
-
gr_info(translations["found_choice"].format(choice=len(choice)))
|
| 188 |
-
|
| 189 |
-
if len(choice) == 0:
|
| 190 |
-
gr_warning(translations["separator==0"])
|
| 191 |
-
|
| 192 |
-
return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, None, None, None, None, None, {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
|
| 193 |
-
elif len(choice) == 1:
|
| 194 |
-
convert_output = convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, None, None, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, choice[0], checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold)
|
| 195 |
-
|
| 196 |
-
return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, convert_output[0], convert_output[1], convert_output[2], convert_output[3], convert_output[4], {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
|
| 197 |
-
else: return [{"choices": choice, "value": choice[0], "interactive": True, "visible": True, "__type__": "update"}, None, None, None, None, None, {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"}]
|
| 198 |
-
else:
|
| 199 |
-
main_convert = convert_audio(clean, autotune, use_audio, use_original, convert_backing, not_merge_backing, merge_instrument, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, None, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold)
|
| 200 |
-
|
| 201 |
-
return [{"choices": [], "value": "", "interactive": False, "visible": False, "__type__": "update"}, main_convert[0], None, None, None, None, {"visible": True, "__type__": "update"}, {"visible": False, "__type__": "update"}]
|
| 202 |
-
|
| 203 |
-
def convert_with_whisper(num_spk, model_size, cleaner, clean_strength, autotune, f0_autotune_strength, checkpointing, model_1, model_2, model_index_1, model_index_2, pitch_1, pitch_2, index_strength_1, index_strength_2, export_format, input_audio, output_audio, onnx_f0_mode, method, hybrid_method, hop_length, embed_mode, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, formant_shifting, formant_qfrency_1, formant_timbre_1, formant_qfrency_2, formant_timbre_2, proposal_pitch, proposal_pitch_threshold):
|
| 204 |
-
from pydub import AudioSegment
|
| 205 |
-
from sklearn.cluster import AgglomerativeClustering
|
| 206 |
-
|
| 207 |
-
from main.library.speaker_diarization.audio import Audio
|
| 208 |
-
from main.library.speaker_diarization.segment import Segment
|
| 209 |
-
from main.library.speaker_diarization.whisper import load_model
|
| 210 |
-
from main.library.utils import check_spk_diarization, pydub_load
|
| 211 |
-
from main.library.speaker_diarization.embedding import SpeechBrainPretrainedSpeakerEmbedding
|
| 212 |
-
|
| 213 |
-
check_spk_diarization(model_size)
|
| 214 |
-
model_pth_1, model_pth_2 = os.path.join(configs["weights_path"], model_1) if not os.path.exists(model_1) else model_1, os.path.join(configs["weights_path"], model_2) if not os.path.exists(model_2) else model_2
|
| 215 |
-
|
| 216 |
-
if (not model_1 or not os.path.exists(model_pth_1) or os.path.isdir(model_pth_1) or not model_pth_1.endswith((".pth", ".onnx"))) and (not model_2 or not os.path.exists(model_pth_2) or os.path.isdir(model_pth_2) or not model_pth_2.endswith((".pth", ".onnx"))):
|
| 217 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 218 |
-
return None
|
| 219 |
-
|
| 220 |
-
if not model_1: model_pth_1 = model_pth_2
|
| 221 |
-
if not model_2: model_pth_2 = model_pth_1
|
| 222 |
-
|
| 223 |
-
if not input_audio or not os.path.exists(input_audio) or os.path.isdir(input_audio):
|
| 224 |
-
gr_warning(translations["input_not_valid"])
|
| 225 |
-
return None
|
| 226 |
-
|
| 227 |
-
if not output_audio:
|
| 228 |
-
gr_warning(translations["output_not_valid"])
|
| 229 |
-
return None
|
| 230 |
-
|
| 231 |
-
output_audio = process_output(output_audio)
|
| 232 |
-
gr_info(translations["start_whisper"])
|
| 233 |
-
|
| 234 |
-
try:
|
| 235 |
-
audio = Audio()
|
| 236 |
-
|
| 237 |
-
embedding_model = SpeechBrainPretrainedSpeakerEmbedding(embedding=os.path.join(configs["speaker_diarization_path"], "models", "speechbrain"), device=config.device)
|
| 238 |
-
segments = load_model(model_size, device=config.device).transcribe(input_audio, fp16=configs.get("fp16", False), word_timestamps=True)["segments"]
|
| 239 |
-
|
| 240 |
-
y, sr = librosa.load(input_audio, sr=None)
|
| 241 |
-
duration = len(y) / sr
|
| 242 |
-
|
| 243 |
-
def segment_embedding(segment):
|
| 244 |
-
waveform, _ = audio.crop(input_audio, Segment(segment["start"], min(duration, segment["end"])))
|
| 245 |
-
return embedding_model(waveform.mean(dim=0, keepdim=True)[None] if waveform.shape[0] == 2 else waveform[None])
|
| 246 |
-
|
| 247 |
-
def time(secs):
|
| 248 |
-
return datetime.timedelta(seconds=round(secs))
|
| 249 |
-
|
| 250 |
-
def merge_audio(files_list, time_stamps, original_file_path, output_path, format):
|
| 251 |
-
def extract_number(filename):
|
| 252 |
-
match = re.search(r'_(\d+)', filename)
|
| 253 |
-
return int(match.group(1)) if match else 0
|
| 254 |
-
|
| 255 |
-
total_duration = len(pydub_load(original_file_path))
|
| 256 |
-
combined = AudioSegment.empty()
|
| 257 |
-
current_position = 0
|
| 258 |
-
|
| 259 |
-
for file, (start_i, end_i) in zip(sorted(files_list, key=extract_number), time_stamps):
|
| 260 |
-
if start_i > current_position: combined += AudioSegment.silent(duration=start_i - current_position)
|
| 261 |
-
|
| 262 |
-
combined += pydub_load(file)
|
| 263 |
-
current_position = end_i
|
| 264 |
-
|
| 265 |
-
if current_position < total_duration: combined += AudioSegment.silent(duration=total_duration - current_position)
|
| 266 |
-
combined.export(output_path, format=format)
|
| 267 |
-
|
| 268 |
-
return output_path
|
| 269 |
-
|
| 270 |
-
embeddings = np.zeros(shape=(len(segments), 192))
|
| 271 |
-
for i, segment in enumerate(segments):
|
| 272 |
-
embeddings[i] = segment_embedding(segment)
|
| 273 |
-
|
| 274 |
-
labels = AgglomerativeClustering(num_spk).fit(np.nan_to_num(embeddings)).labels_
|
| 275 |
-
for i in range(len(segments)):
|
| 276 |
-
segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)
|
| 277 |
-
|
| 278 |
-
merged_segments, current_text = [], []
|
| 279 |
-
current_speaker, current_start = None, None
|
| 280 |
-
|
| 281 |
-
for i, segment in enumerate(segments):
|
| 282 |
-
speaker = segment["speaker"]
|
| 283 |
-
start_time = segment["start"]
|
| 284 |
-
text = segment["text"][1:]
|
| 285 |
-
|
| 286 |
-
if speaker == current_speaker:
|
| 287 |
-
current_text.append(text)
|
| 288 |
-
end_time = segment["end"]
|
| 289 |
-
else:
|
| 290 |
-
if current_speaker is not None: merged_segments.append({"speaker": current_speaker, "start": current_start, "end": end_time, "text": " ".join(current_text)})
|
| 291 |
-
|
| 292 |
-
current_speaker = speaker
|
| 293 |
-
current_start = start_time
|
| 294 |
-
current_text = [text]
|
| 295 |
-
end_time = segment["end"]
|
| 296 |
-
|
| 297 |
-
if current_speaker is not None: merged_segments.append({"speaker": current_speaker, "start": current_start, "end": end_time, "text": " ".join(current_text)})
|
| 298 |
-
|
| 299 |
-
gr_info(translations["whisper_done"])
|
| 300 |
-
|
| 301 |
-
x = ""
|
| 302 |
-
for segment in merged_segments:
|
| 303 |
-
x += f"\n{segment['speaker']} {str(time(segment['start']))} - {str(time(segment['end']))}\n"
|
| 304 |
-
x += segment["text"] + "\n"
|
| 305 |
-
|
| 306 |
-
logger.info(x)
|
| 307 |
-
|
| 308 |
-
gr_info(translations["process_audio"])
|
| 309 |
-
|
| 310 |
-
audio = pydub_load(input_audio)
|
| 311 |
-
output_folder = "audios_temp"
|
| 312 |
-
|
| 313 |
-
if os.path.exists(output_folder): shutil.rmtree(output_folder, ignore_errors=True)
|
| 314 |
-
for f in [output_folder, os.path.join(output_folder, "1"), os.path.join(output_folder, "2")]:
|
| 315 |
-
os.makedirs(f, exist_ok=True)
|
| 316 |
-
|
| 317 |
-
time_stamps, processed_segments = [], []
|
| 318 |
-
for i, segment in enumerate(merged_segments):
|
| 319 |
-
start_ms = int(segment["start"] * 1000)
|
| 320 |
-
end_ms = int(segment["end"] * 1000)
|
| 321 |
-
|
| 322 |
-
index = i + 1
|
| 323 |
-
|
| 324 |
-
segment_filename = os.path.join(output_folder, "1" if i % 2 == 1 else "2", f"segment_{index}.wav")
|
| 325 |
-
audio[start_ms:end_ms].export(segment_filename, format="wav")
|
| 326 |
-
|
| 327 |
-
processed_segments.append(os.path.join(output_folder, "1" if i % 2 == 1 else "2", f"segment_{index}_output.wav"))
|
| 328 |
-
time_stamps.append((start_ms, end_ms))
|
| 329 |
-
|
| 330 |
-
f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
|
| 331 |
-
|
| 332 |
-
gr_info(translations["process_done_start_convert"])
|
| 333 |
-
|
| 334 |
-
convert(pitch_1, filter_radius, index_strength_1, rms_mix_rate, protect, hop_length, f0method, os.path.join(output_folder, "1"), output_folder, model_pth_1, model_index_1, autotune, cleaner, clean_strength, "wav", embedder_model, resample_sr, False, f0_autotune_strength, checkpointing, onnx_f0_mode, embed_mode, formant_shifting, formant_qfrency_1, formant_timbre_1, "", proposal_pitch, proposal_pitch_threshold)
|
| 335 |
-
convert(pitch_2, filter_radius, index_strength_2, rms_mix_rate, protect, hop_length, f0method, os.path.join(output_folder, "2"), output_folder, model_pth_2, model_index_2, autotune, cleaner, clean_strength, "wav", embedder_model, resample_sr, False, f0_autotune_strength, checkpointing, onnx_f0_mode, embed_mode, formant_shifting, formant_qfrency_2, formant_timbre_2, "", proposal_pitch, proposal_pitch_threshold)
|
| 336 |
-
|
| 337 |
-
gr_info(translations["convert_success"])
|
| 338 |
-
return merge_audio(processed_segments, time_stamps, input_audio, output_audio.replace("wav", export_format), export_format)
|
| 339 |
-
except Exception as e:
|
| 340 |
-
gr_error(translations["error_occurred"].format(e=e))
|
| 341 |
-
import traceback
|
| 342 |
-
logger.debug(traceback.format_exc())
|
| 343 |
-
return None
|
| 344 |
-
finally:
|
| 345 |
-
if os.path.exists("audios_temp"): shutil.rmtree("audios_temp", ignore_errors=True)
|
| 346 |
-
|
| 347 |
-
def convert_tts(clean, autotune, pitch, clean_strength, model, index, index_rate, input, output, format, method, hybrid_method, hop_length, embedders, custom_embedders, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, embedders_mode, proposal_pitch, proposal_pitch_threshold):
|
| 348 |
-
model_path = os.path.join(configs["weights_path"], model) if not os.path.exists(model) else model
|
| 349 |
-
|
| 350 |
-
if not model_path or not os.path.exists(model_path) or os.path.isdir(model_path) or not model.endswith((".pth", ".onnx")):
|
| 351 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 352 |
-
return None
|
| 353 |
-
|
| 354 |
-
if not input or not os.path.exists(input):
|
| 355 |
-
gr_warning(translations["input_not_valid"])
|
| 356 |
-
return None
|
| 357 |
-
|
| 358 |
-
if os.path.isdir(input):
|
| 359 |
-
input_audio = [f for f in os.listdir(input) if "tts" in f and f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"))]
|
| 360 |
-
|
| 361 |
-
if not input_audio:
|
| 362 |
-
gr_warning(translations["not_found_in_folder"])
|
| 363 |
-
return None
|
| 364 |
-
|
| 365 |
-
input = os.path.join(input, input_audio[0])
|
| 366 |
-
|
| 367 |
-
if not output:
|
| 368 |
-
gr_warning(translations["output_not_valid"])
|
| 369 |
-
return None
|
| 370 |
-
|
| 371 |
-
output = output.replace("wav", format)
|
| 372 |
-
if os.path.isdir(output): output = os.path.join(output, f"tts.{format}")
|
| 373 |
-
|
| 374 |
-
output_dir = os.path.dirname(output)
|
| 375 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 376 |
-
|
| 377 |
-
output = process_output(output)
|
| 378 |
-
|
| 379 |
-
f0method = method if method != "hybrid" else hybrid_method
|
| 380 |
-
embedder_model = embedders if embedders != "custom" else custom_embedders
|
| 381 |
-
|
| 382 |
-
gr_info(translations["convert_vocal"])
|
| 383 |
-
|
| 384 |
-
convert(pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0method, input, output, model_path, index, autotune, clean, clean_strength, format, embedder_model, resample_sr, split_audio, f0_autotune_strength, checkpointing, onnx_f0_mode, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, f0_file, proposal_pitch, proposal_pitch_threshold)
|
| 385 |
-
|
| 386 |
-
gr_info(translations["convert_success"])
|
| 387 |
-
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/model_utils.py
DELETED
|
@@ -1,162 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import onnx
|
| 5 |
-
import torch
|
| 6 |
-
import datetime
|
| 7 |
-
|
| 8 |
-
from collections import OrderedDict
|
| 9 |
-
|
| 10 |
-
sys.path.append(os.getcwd())
|
| 11 |
-
|
| 12 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error
|
| 13 |
-
from main.library.algorithm.onnx_export import onnx_exporter
|
| 14 |
-
from main.app.variables import config, logger, translations, configs
|
| 15 |
-
|
| 16 |
-
def fushion_model_pth(name, pth_1, pth_2, ratio):
|
| 17 |
-
if not name.endswith(".pth"): name = name + ".pth"
|
| 18 |
-
|
| 19 |
-
if not pth_1 or not os.path.exists(pth_1) or not pth_1.endswith(".pth"):
|
| 20 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"] + " 1"))
|
| 21 |
-
return [translations["provide_file"].format(filename=translations["model"] + " 1"), None]
|
| 22 |
-
|
| 23 |
-
if not pth_2 or not os.path.exists(pth_2) or not pth_2.endswith(".pth"):
|
| 24 |
-
gr_warning(translations["provide_file"].format(filename=translations["model"] + " 2"))
|
| 25 |
-
return [translations["provide_file"].format(filename=translations["model"] + " 2"), None]
|
| 26 |
-
|
| 27 |
-
def extract(ckpt):
|
| 28 |
-
a = ckpt["model"]
|
| 29 |
-
opt = OrderedDict()
|
| 30 |
-
opt["weight"] = {}
|
| 31 |
-
|
| 32 |
-
for key in a.keys():
|
| 33 |
-
if "enc_q" in key: continue
|
| 34 |
-
|
| 35 |
-
opt["weight"][key] = a[key]
|
| 36 |
-
|
| 37 |
-
return opt
|
| 38 |
-
|
| 39 |
-
try:
|
| 40 |
-
ckpt1 = torch.load(pth_1, map_location="cpu", weights_only=True)
|
| 41 |
-
ckpt2 = torch.load(pth_2, map_location="cpu", weights_only=True)
|
| 42 |
-
|
| 43 |
-
if ckpt1["sr"] != ckpt2["sr"]:
|
| 44 |
-
gr_warning(translations["sr_not_same"])
|
| 45 |
-
return [translations["sr_not_same"], None]
|
| 46 |
-
|
| 47 |
-
cfg = ckpt1["config"]
|
| 48 |
-
cfg_f0 = ckpt1["f0"]
|
| 49 |
-
cfg_version = ckpt1["version"]
|
| 50 |
-
cfg_sr = ckpt1["sr"]
|
| 51 |
-
|
| 52 |
-
vocoder = ckpt1.get("vocoder", "Default")
|
| 53 |
-
rms_extract = ckpt1.get("energy", False)
|
| 54 |
-
|
| 55 |
-
ckpt1 = extract(ckpt1) if "model" in ckpt1 else ckpt1["weight"]
|
| 56 |
-
ckpt2 = extract(ckpt2) if "model" in ckpt2 else ckpt2["weight"]
|
| 57 |
-
|
| 58 |
-
if sorted(list(ckpt1.keys())) != sorted(list(ckpt2.keys())):
|
| 59 |
-
gr_warning(translations["architectures_not_same"])
|
| 60 |
-
return [translations["architectures_not_same"], None]
|
| 61 |
-
|
| 62 |
-
gr_info(translations["start"].format(start=translations["fushion_model"]))
|
| 63 |
-
|
| 64 |
-
opt = OrderedDict()
|
| 65 |
-
opt["weight"] = {}
|
| 66 |
-
|
| 67 |
-
for key in ckpt1.keys():
|
| 68 |
-
if key == "emb_g.weight" and ckpt1[key].shape != ckpt2[key].shape:
|
| 69 |
-
min_shape0 = min(ckpt1[key].shape[0], ckpt2[key].shape[0])
|
| 70 |
-
opt["weight"][key] = (ratio * (ckpt1[key][:min_shape0].float()) + (1 - ratio) * (ckpt2[key][:min_shape0].float())).half()
|
| 71 |
-
else: opt["weight"][key] = (ratio * (ckpt1[key].float()) + (1 - ratio) * (ckpt2[key].float())).half()
|
| 72 |
-
|
| 73 |
-
opt["config"] = cfg
|
| 74 |
-
opt["sr"] = cfg_sr
|
| 75 |
-
opt["f0"] = cfg_f0
|
| 76 |
-
opt["version"] = cfg_version
|
| 77 |
-
opt["infos"] = translations["model_fushion_info"].format(name=name, pth_1=pth_1, pth_2=pth_2, ratio=ratio)
|
| 78 |
-
opt["vocoder"] = vocoder
|
| 79 |
-
opt["energy"] = rms_extract
|
| 80 |
-
|
| 81 |
-
output_model = configs["weights_path"]
|
| 82 |
-
if not os.path.exists(output_model): os.makedirs(output_model, exist_ok=True)
|
| 83 |
-
|
| 84 |
-
torch.save(opt, os.path.join(output_model, name))
|
| 85 |
-
|
| 86 |
-
gr_info(translations["success"])
|
| 87 |
-
return [translations["success"], os.path.join(output_model, name)]
|
| 88 |
-
except Exception as e:
|
| 89 |
-
gr_error(message=translations["error_occurred"].format(e=e))
|
| 90 |
-
return [e, None]
|
| 91 |
-
|
| 92 |
-
def fushion_model(name, path_1, path_2, ratio):
|
| 93 |
-
if not name:
|
| 94 |
-
gr_warning(translations["provide_name_is_save"])
|
| 95 |
-
return [translations["provide_name_is_save"], None]
|
| 96 |
-
|
| 97 |
-
if path_1.endswith(".pth") and path_2.endswith(".pth"): return fushion_model_pth(name.replace(".onnx", ".pth"), path_1, path_2, ratio)
|
| 98 |
-
else:
|
| 99 |
-
gr_warning(translations["format_not_valid"])
|
| 100 |
-
return [None, None]
|
| 101 |
-
|
| 102 |
-
def onnx_export(model_path):
|
| 103 |
-
if not model_path.endswith(".pth"): model_path + ".pth"
|
| 104 |
-
if not model_path or not os.path.exists(model_path) or not model_path.endswith(".pth"): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 105 |
-
|
| 106 |
-
try:
|
| 107 |
-
gr_info(translations["start_onnx_export"])
|
| 108 |
-
output = onnx_exporter(model_path, model_path.replace(".pth", ".onnx"), is_half=config.is_half, device=config.device)
|
| 109 |
-
|
| 110 |
-
gr_info(translations["success"])
|
| 111 |
-
return output
|
| 112 |
-
except Exception as e:
|
| 113 |
-
return gr_error(e)
|
| 114 |
-
|
| 115 |
-
def model_info(path):
|
| 116 |
-
if not path or not os.path.exists(path) or os.path.isdir(path) or not path.endswith((".pth", ".onnx")): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 117 |
-
|
| 118 |
-
def prettify_date(date_str):
|
| 119 |
-
if date_str == translations["not_found_create_time"]: return None
|
| 120 |
-
|
| 121 |
-
try:
|
| 122 |
-
return datetime.datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f").strftime("%Y-%m-%d %H:%M:%S")
|
| 123 |
-
except ValueError as e:
|
| 124 |
-
logger.debug(e)
|
| 125 |
-
return translations["format_not_valid"]
|
| 126 |
-
|
| 127 |
-
if path.endswith(".pth"): model_data = torch.load(path, map_location=torch.device("cpu"))
|
| 128 |
-
else:
|
| 129 |
-
model = onnx.load(path)
|
| 130 |
-
model_data = None
|
| 131 |
-
|
| 132 |
-
for prop in model.metadata_props:
|
| 133 |
-
if prop.key == "model_info":
|
| 134 |
-
model_data = json.loads(prop.value)
|
| 135 |
-
break
|
| 136 |
-
|
| 137 |
-
gr_info(translations["read_info"])
|
| 138 |
-
|
| 139 |
-
epochs = model_data.get("epoch", None)
|
| 140 |
-
if epochs is None:
|
| 141 |
-
epochs = model_data.get("info", None)
|
| 142 |
-
try:
|
| 143 |
-
epoch = epochs.replace("epoch", "").replace("e", "").isdigit()
|
| 144 |
-
if epoch and epochs is None: epochs = translations["not_found"].format(name=translations["epoch"])
|
| 145 |
-
except:
|
| 146 |
-
pass
|
| 147 |
-
|
| 148 |
-
steps = model_data.get("step", translations["not_found"].format(name=translations["step"]))
|
| 149 |
-
sr = model_data.get("sr", translations["not_found"].format(name=translations["sr"]))
|
| 150 |
-
f0 = model_data.get("f0", translations["not_found"].format(name=translations["f0"]))
|
| 151 |
-
version = model_data.get("version", translations["not_found"].format(name=translations["version"]))
|
| 152 |
-
creation_date = model_data.get("creation_date", translations["not_found_create_time"])
|
| 153 |
-
model_hash = model_data.get("model_hash", translations["not_found"].format(name="model_hash"))
|
| 154 |
-
pitch_guidance = translations["trained_f0"] if f0 else translations["not_f0"]
|
| 155 |
-
creation_date_str = prettify_date(creation_date) if creation_date else translations["not_found_create_time"]
|
| 156 |
-
model_name = model_data.get("model_name", translations["unregistered"])
|
| 157 |
-
model_author = model_data.get("author", translations["not_author"])
|
| 158 |
-
vocoder = model_data.get("vocoder", "Default")
|
| 159 |
-
rms_extract = model_data.get("energy", False)
|
| 160 |
-
|
| 161 |
-
gr_info(translations["success"])
|
| 162 |
-
return translations["model_info"].format(model_name=model_name, model_author=model_author, epochs=epochs, steps=steps, version=version, sr=sr, pitch_guidance=pitch_guidance, model_hash=model_hash, creation_date_str=creation_date_str, vocoder=vocoder, rms_extract=rms_extract)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/presets.py
DELETED
|
@@ -1,165 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
|
| 5 |
-
sys.path.append(os.getcwd())
|
| 6 |
-
|
| 7 |
-
from main.app.variables import translations, configs
|
| 8 |
-
from main.app.core.ui import gr_info, gr_warning, change_preset_choices, change_effect_preset_choices
|
| 9 |
-
|
| 10 |
-
def load_presets(presets, cleaner, autotune, pitch, clean_strength, index_strength, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, formant_shifting, formant_qfrency, formant_timbre):
|
| 11 |
-
if not presets: gr_warning(translations["provide_file_settings"])
|
| 12 |
-
|
| 13 |
-
file = {}
|
| 14 |
-
if presets:
|
| 15 |
-
with open(os.path.join(configs["presets_path"], presets)) as f:
|
| 16 |
-
file = json.load(f)
|
| 17 |
-
|
| 18 |
-
gr_info(translations["load_presets"].format(presets=presets))
|
| 19 |
-
return [file.get("cleaner", cleaner), file.get("autotune", autotune), file.get("pitch", pitch), file.get("clean_strength", clean_strength), file.get("index_strength", index_strength), file.get("resample_sr", resample_sr), file.get("filter_radius", filter_radius), file.get("rms_mix_rate", rms_mix_rate), file.get("protect", protect), file.get("split_audio", split_audio), file.get("f0_autotune_strength", f0_autotune_strength), file.get("formant_shifting", formant_shifting), file.get("formant_qfrency", formant_qfrency), file.get("formant_timbre", formant_timbre)]
|
| 20 |
-
|
| 21 |
-
def save_presets(name, cleaner, autotune, pitch, clean_strength, index_strength, resample_sr, filter_radius, rms_mix_rate, protect, split_audio, f0_autotune_strength, cleaner_chbox, autotune_chbox, pitch_chbox, index_strength_chbox, resample_sr_chbox, filter_radius_chbox, rms_mix_rate_chbox, protect_chbox, split_audio_chbox, formant_shifting_chbox, formant_shifting, formant_qfrency, formant_timbre):
|
| 22 |
-
if not name: return gr_warning(translations["provide_filename_settings"])
|
| 23 |
-
if not any([cleaner_chbox, autotune_chbox, pitch_chbox, index_strength_chbox, resample_sr_chbox, filter_radius_chbox, rms_mix_rate_chbox, protect_chbox, split_audio_chbox, formant_shifting_chbox]): return gr_warning(translations["choose1"])
|
| 24 |
-
|
| 25 |
-
settings = {}
|
| 26 |
-
|
| 27 |
-
for checkbox, data in [(cleaner_chbox, {"cleaner": cleaner, "clean_strength": clean_strength}), (autotune_chbox, {"autotune": autotune, "f0_autotune_strength": f0_autotune_strength}), (pitch_chbox, {"pitch": pitch}), (index_strength_chbox, {"index_strength": index_strength}), (resample_sr_chbox, {"resample_sr": resample_sr}), (filter_radius_chbox, {"filter_radius": filter_radius}), (rms_mix_rate_chbox, {"rms_mix_rate": rms_mix_rate}), (protect_chbox, {"protect": protect}), (split_audio_chbox, {"split_audio": split_audio}), (formant_shifting_chbox, {"formant_shifting": formant_shifting, "formant_qfrency": formant_qfrency, "formant_timbre": formant_timbre})]:
|
| 28 |
-
if checkbox: settings.update(data)
|
| 29 |
-
|
| 30 |
-
with open(os.path.join(configs["presets_path"], name + ".conversion.json"), "w") as f:
|
| 31 |
-
json.dump(settings, f, indent=4)
|
| 32 |
-
|
| 33 |
-
gr_info(translations["export_settings"].format(name=name))
|
| 34 |
-
return change_preset_choices()
|
| 35 |
-
|
| 36 |
-
def audio_effect_load_presets(presets, resample_checkbox, audio_effect_resample_sr, chorus_depth, chorus_rate_hz, chorus_mix, chorus_centre_delay_ms, chorus_feedback, distortion_drive_db, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift_semitones, delay_second, delay_feedback, delay_mix, compressor_threshold_db, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold_db, limiter_release_ms, gain_db, bitcrush_bit_depth, clipping_threshold_db, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost, bass_frequency, treble_boost, treble_frequency, fade_in, fade_out, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade):
|
| 37 |
-
if not presets: gr_warning(translations["provide_file_settings"])
|
| 38 |
-
|
| 39 |
-
file = {}
|
| 40 |
-
if presets:
|
| 41 |
-
with open(os.path.join(configs["presets_path"], presets)) as f:
|
| 42 |
-
file = json.load(f)
|
| 43 |
-
|
| 44 |
-
gr_info(translations["load_presets"].format(presets=presets))
|
| 45 |
-
return [
|
| 46 |
-
file.get("resample_checkbox", resample_checkbox), file.get("audio_effect_resample_sr", audio_effect_resample_sr),
|
| 47 |
-
file.get("chorus_depth", chorus_depth), file.get("chorus_rate_hz", chorus_rate_hz),
|
| 48 |
-
file.get("chorus_mix", chorus_mix), file.get("chorus_centre_delay_ms", chorus_centre_delay_ms),
|
| 49 |
-
file.get("chorus_feedback", chorus_feedback), file.get("distortion_drive_db", distortion_drive_db),
|
| 50 |
-
file.get("reverb_room_size", reverb_room_size), file.get("reverb_damping", reverb_damping),
|
| 51 |
-
file.get("reverb_wet_level", reverb_wet_level), file.get("reverb_dry_level", reverb_dry_level),
|
| 52 |
-
file.get("reverb_width", reverb_width), file.get("reverb_freeze_mode", reverb_freeze_mode),
|
| 53 |
-
file.get("pitch_shift_semitones", pitch_shift_semitones), file.get("delay_second", delay_second),
|
| 54 |
-
file.get("delay_feedback", delay_feedback), file.get("delay_mix", delay_mix),
|
| 55 |
-
file.get("compressor_threshold_db", compressor_threshold_db), file.get("compressor_ratio", compressor_ratio),
|
| 56 |
-
file.get("compressor_attack_ms", compressor_attack_ms), file.get("compressor_release_ms", compressor_release_ms),
|
| 57 |
-
file.get("limiter_threshold_db", limiter_threshold_db), file.get("limiter_release_ms", limiter_release_ms),
|
| 58 |
-
file.get("gain_db", gain_db), file.get("bitcrush_bit_depth", bitcrush_bit_depth),
|
| 59 |
-
file.get("clipping_threshold_db", clipping_threshold_db), file.get("phaser_rate_hz", phaser_rate_hz),
|
| 60 |
-
file.get("phaser_depth", phaser_depth), file.get("phaser_centre_frequency_hz", phaser_centre_frequency_hz),
|
| 61 |
-
file.get("phaser_feedback", phaser_feedback), file.get("phaser_mix", phaser_mix),
|
| 62 |
-
file.get("bass_boost", bass_boost), file.get("bass_frequency", bass_frequency),
|
| 63 |
-
file.get("treble_boost", treble_boost), file.get("treble_frequency", treble_frequency),
|
| 64 |
-
file.get("fade_in", fade_in), file.get("fade_out", fade_out),
|
| 65 |
-
file.get("chorus_check_box", chorus_check_box), file.get("distortion_checkbox", distortion_checkbox),
|
| 66 |
-
file.get("reverb_check_box", reverb_check_box), file.get("delay_check_box", delay_check_box),
|
| 67 |
-
file.get("compressor_check_box", compressor_check_box), file.get("limiter", limiter),
|
| 68 |
-
file.get("gain_checkbox", gain_checkbox), file.get("bitcrush_checkbox", bitcrush_checkbox),
|
| 69 |
-
file.get("clipping_checkbox", clipping_checkbox), file.get("phaser_check_box", phaser_check_box),
|
| 70 |
-
file.get("bass_or_treble", bass_or_treble), file.get("fade", fade)
|
| 71 |
-
]
|
| 72 |
-
|
| 73 |
-
def audio_effect_save_presets(name, resample_checkbox, audio_effect_resample_sr, chorus_depth, chorus_rate_hz, chorus_mix, chorus_centre_delay_ms, chorus_feedback, distortion_drive_db, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift_semitones, delay_second, delay_feedback, delay_mix, compressor_threshold_db, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold_db, limiter_release_ms, gain_db, bitcrush_bit_depth, clipping_threshold_db, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost, bass_frequency, treble_boost, treble_frequency, fade_in, fade_out, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade):
|
| 74 |
-
if not name: return gr_warning(translations["provide_filename_settings"])
|
| 75 |
-
if not any([resample_checkbox, chorus_check_box, distortion_checkbox, reverb_check_box, delay_check_box, compressor_check_box, limiter, gain_checkbox, bitcrush_checkbox, clipping_checkbox, phaser_check_box, bass_or_treble, fade, pitch_shift_semitones != 0]): return gr_warning(translations["choose1"])
|
| 76 |
-
|
| 77 |
-
settings = {}
|
| 78 |
-
|
| 79 |
-
for checkbox, data in [
|
| 80 |
-
(resample_checkbox, {
|
| 81 |
-
"resample_checkbox": resample_checkbox,
|
| 82 |
-
"audio_effect_resample_sr": audio_effect_resample_sr
|
| 83 |
-
}),
|
| 84 |
-
(chorus_check_box, {
|
| 85 |
-
"chorus_check_box": chorus_check_box,
|
| 86 |
-
"chorus_depth": chorus_depth,
|
| 87 |
-
"chorus_rate_hz": chorus_rate_hz,
|
| 88 |
-
"chorus_mix": chorus_mix,
|
| 89 |
-
"chorus_centre_delay_ms": chorus_centre_delay_ms,
|
| 90 |
-
"chorus_feedback": chorus_feedback
|
| 91 |
-
}),
|
| 92 |
-
(distortion_checkbox, {
|
| 93 |
-
"distortion_checkbox": distortion_checkbox,
|
| 94 |
-
"distortion_drive_db": distortion_drive_db
|
| 95 |
-
}),
|
| 96 |
-
(reverb_check_box, {
|
| 97 |
-
"reverb_check_box": reverb_check_box,
|
| 98 |
-
"reverb_room_size": reverb_room_size,
|
| 99 |
-
"reverb_damping": reverb_damping,
|
| 100 |
-
"reverb_wet_level": reverb_wet_level,
|
| 101 |
-
"reverb_dry_level": reverb_dry_level,
|
| 102 |
-
"reverb_width": reverb_width,
|
| 103 |
-
"reverb_freeze_mode": reverb_freeze_mode
|
| 104 |
-
}),
|
| 105 |
-
(pitch_shift_semitones != 0, {
|
| 106 |
-
"pitch_shift_semitones": pitch_shift_semitones
|
| 107 |
-
}),
|
| 108 |
-
(delay_check_box, {
|
| 109 |
-
"delay_check_box": delay_check_box,
|
| 110 |
-
"delay_second": delay_second,
|
| 111 |
-
"delay_feedback": delay_feedback,
|
| 112 |
-
"delay_mix": delay_mix
|
| 113 |
-
}),
|
| 114 |
-
(compressor_check_box, {
|
| 115 |
-
"compressor_check_box": compressor_check_box,
|
| 116 |
-
"compressor_threshold_db": compressor_threshold_db,
|
| 117 |
-
"compressor_ratio": compressor_ratio,
|
| 118 |
-
"compressor_attack_ms": compressor_attack_ms,
|
| 119 |
-
"compressor_release_ms": compressor_release_ms
|
| 120 |
-
}),
|
| 121 |
-
(limiter, {
|
| 122 |
-
"limiter": limiter,
|
| 123 |
-
"limiter_threshold_db": limiter_threshold_db,
|
| 124 |
-
"limiter_release_ms": limiter_release_ms
|
| 125 |
-
}),
|
| 126 |
-
(gain_checkbox, {
|
| 127 |
-
"gain_checkbox": gain_checkbox,
|
| 128 |
-
"gain_db": gain_db
|
| 129 |
-
}),
|
| 130 |
-
(bitcrush_checkbox, {
|
| 131 |
-
"bitcrush_checkbox": bitcrush_checkbox,
|
| 132 |
-
"bitcrush_bit_depth": bitcrush_bit_depth
|
| 133 |
-
}),
|
| 134 |
-
(clipping_checkbox, {
|
| 135 |
-
"clipping_checkbox": clipping_checkbox,
|
| 136 |
-
"clipping_threshold_db": clipping_threshold_db
|
| 137 |
-
}),
|
| 138 |
-
(phaser_check_box, {
|
| 139 |
-
"phaser_check_box": phaser_check_box,
|
| 140 |
-
"phaser_rate_hz": phaser_rate_hz,
|
| 141 |
-
"phaser_depth": phaser_depth,
|
| 142 |
-
"phaser_centre_frequency_hz": phaser_centre_frequency_hz,
|
| 143 |
-
"phaser_feedback": phaser_feedback,
|
| 144 |
-
"phaser_mix": phaser_mix
|
| 145 |
-
}),
|
| 146 |
-
(bass_or_treble, {
|
| 147 |
-
"bass_or_treble": bass_or_treble,
|
| 148 |
-
"bass_boost": bass_boost,
|
| 149 |
-
"bass_frequency": bass_frequency,
|
| 150 |
-
"treble_boost": treble_boost,
|
| 151 |
-
"treble_frequency": treble_frequency
|
| 152 |
-
}),
|
| 153 |
-
(fade, {
|
| 154 |
-
"fade": fade,
|
| 155 |
-
"fade_in": fade_in,
|
| 156 |
-
"fade_out": fade_out
|
| 157 |
-
})
|
| 158 |
-
]:
|
| 159 |
-
if checkbox: settings.update(data)
|
| 160 |
-
|
| 161 |
-
with open(os.path.join(configs["presets_path"], name + ".effect.json"), "w") as f:
|
| 162 |
-
json.dump(settings, f, indent=4)
|
| 163 |
-
|
| 164 |
-
gr_info(translations["export_settings"].format(name=name))
|
| 165 |
-
return change_effect_preset_choices()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/process.py
DELETED
|
@@ -1,134 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
import sys
|
| 4 |
-
import shutil
|
| 5 |
-
import codecs
|
| 6 |
-
import zipfile
|
| 7 |
-
import requests
|
| 8 |
-
import xml.etree.ElementTree
|
| 9 |
-
|
| 10 |
-
sys.path.append(os.getcwd())
|
| 11 |
-
|
| 12 |
-
from main.app.variables import logger, translations, configs
|
| 13 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error, process_output
|
| 14 |
-
|
| 15 |
-
def read_docx_text(path):
|
| 16 |
-
with zipfile.ZipFile(path) as docx:
|
| 17 |
-
with docx.open("word/document.xml") as document_xml:
|
| 18 |
-
xml_content = document_xml.read()
|
| 19 |
-
|
| 20 |
-
WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
|
| 21 |
-
|
| 22 |
-
paragraphs = []
|
| 23 |
-
for paragraph in xml.etree.ElementTree.XML(xml_content).iter(WORD_NAMESPACE + 'p'):
|
| 24 |
-
texts = [node.text for node in paragraph.iter(WORD_NAMESPACE + 't') if node.text]
|
| 25 |
-
if texts: paragraphs.append(''.join(texts))
|
| 26 |
-
|
| 27 |
-
return '\n'.join(paragraphs)
|
| 28 |
-
|
| 29 |
-
def process_input(file_path):
|
| 30 |
-
if file_path.endswith(".srt"): file_contents = ""
|
| 31 |
-
elif file_path.endswith(".docx"): file_contents = read_docx_text(file_path)
|
| 32 |
-
else:
|
| 33 |
-
try:
|
| 34 |
-
with open(file_path, "r", encoding="utf-8") as file:
|
| 35 |
-
file_contents = file.read()
|
| 36 |
-
except Exception as e:
|
| 37 |
-
gr_warning(translations["read_error"])
|
| 38 |
-
logger.debug(e)
|
| 39 |
-
file_contents = ""
|
| 40 |
-
|
| 41 |
-
gr_info(translations["upload_success"].format(name=translations["text"]))
|
| 42 |
-
return file_contents
|
| 43 |
-
|
| 44 |
-
def move_files_from_directory(src_dir, dest_weights, dest_logs, model_name):
|
| 45 |
-
for root, _, files in os.walk(src_dir):
|
| 46 |
-
for file in files:
|
| 47 |
-
file_path = os.path.join(root, file)
|
| 48 |
-
if file.endswith(".index"):
|
| 49 |
-
model_log_dir = os.path.join(dest_logs, model_name)
|
| 50 |
-
os.makedirs(model_log_dir, exist_ok=True)
|
| 51 |
-
|
| 52 |
-
filepath = process_output(os.path.join(model_log_dir, file.replace(' ', '_').replace('(', '').replace(')', '').replace('[', '').replace(']', '').replace(",", "").replace('"', "").replace("'", "").replace("|", "").replace("{", "").replace("}", "").strip()))
|
| 53 |
-
|
| 54 |
-
shutil.move(file_path, filepath)
|
| 55 |
-
elif file.endswith(".pth") and not file.startswith("D_") and not file.startswith("G_"):
|
| 56 |
-
pth_path = process_output(os.path.join(dest_weights, model_name + ".pth"))
|
| 57 |
-
|
| 58 |
-
shutil.move(file_path, pth_path)
|
| 59 |
-
elif file.endswith(".onnx") and not file.startswith("D_") and not file.startswith("G_"):
|
| 60 |
-
pth_path = process_output(os.path.join(dest_weights, model_name + ".onnx"))
|
| 61 |
-
|
| 62 |
-
shutil.move(file_path, pth_path)
|
| 63 |
-
|
| 64 |
-
def extract_name_model(filename):
|
| 65 |
-
match = re.search(r"_([A-Za-z0-9]+)(?=_v\d*)", filename.replace('-', '').replace('(', '').replace(')', '').replace('[', '').replace(']', '').replace(",", "").replace('"', "").replace("'", "").replace("|", "").replace("{", "").replace("}", "").strip())
|
| 66 |
-
return match.group(1) if match else None
|
| 67 |
-
|
| 68 |
-
def save_drop_model(dropbox):
|
| 69 |
-
weight_folder = configs["weights_path"]
|
| 70 |
-
logs_folder = configs["logs_path"]
|
| 71 |
-
save_model_temp = "save_model_temp"
|
| 72 |
-
|
| 73 |
-
if not os.path.exists(weight_folder): os.makedirs(weight_folder, exist_ok=True)
|
| 74 |
-
if not os.path.exists(logs_folder): os.makedirs(logs_folder, exist_ok=True)
|
| 75 |
-
if not os.path.exists(save_model_temp): os.makedirs(save_model_temp, exist_ok=True)
|
| 76 |
-
|
| 77 |
-
shutil.move(dropbox, save_model_temp)
|
| 78 |
-
|
| 79 |
-
try:
|
| 80 |
-
file_name = os.path.basename(dropbox)
|
| 81 |
-
|
| 82 |
-
if file_name.endswith(".zip"):
|
| 83 |
-
shutil.unpack_archive(os.path.join(save_model_temp, file_name), save_model_temp)
|
| 84 |
-
move_files_from_directory(save_model_temp, weight_folder, logs_folder, file_name.replace(".zip", ""))
|
| 85 |
-
elif file_name.endswith((".pth", ".onnx")):
|
| 86 |
-
output_file = process_output(os.path.join(weight_folder, file_name))
|
| 87 |
-
|
| 88 |
-
shutil.move(os.path.join(save_model_temp, file_name), output_file)
|
| 89 |
-
elif file_name.endswith(".index"):
|
| 90 |
-
modelname = extract_name_model(file_name)
|
| 91 |
-
if modelname is None: modelname = os.path.splitext(os.path.basename(file_name))[0]
|
| 92 |
-
|
| 93 |
-
model_logs = os.path.join(logs_folder, modelname)
|
| 94 |
-
if not os.path.exists(model_logs): os.makedirs(model_logs, exist_ok=True)
|
| 95 |
-
|
| 96 |
-
shutil.move(os.path.join(save_model_temp, file_name), model_logs)
|
| 97 |
-
else:
|
| 98 |
-
gr_warning(translations["unable_analyze_model"])
|
| 99 |
-
return None
|
| 100 |
-
|
| 101 |
-
gr_info(translations["upload_success"].format(name=translations["model"]))
|
| 102 |
-
return None
|
| 103 |
-
except Exception as e:
|
| 104 |
-
gr_error(message=translations["error_occurred"].format(e=e))
|
| 105 |
-
return None
|
| 106 |
-
finally:
|
| 107 |
-
shutil.rmtree(save_model_temp, ignore_errors=True)
|
| 108 |
-
|
| 109 |
-
def zip_file(name, pth, index):
|
| 110 |
-
pth_path = os.path.join(configs["weights_path"], pth)
|
| 111 |
-
if not pth or not os.path.exists(pth_path) or not pth.endswith((".pth", ".onnx")): return gr_warning(translations["provide_file"].format(filename=translations["model"]))
|
| 112 |
-
|
| 113 |
-
zip_file_path = os.path.join(configs["logs_path"], name, name + ".zip")
|
| 114 |
-
gr_info(translations["start"].format(start=translations["zip"]))
|
| 115 |
-
|
| 116 |
-
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
|
| 117 |
-
zipf.write(pth_path, os.path.basename(pth_path))
|
| 118 |
-
if index: zipf.write(index, os.path.basename(index))
|
| 119 |
-
|
| 120 |
-
gr_info(translations["success"])
|
| 121 |
-
return {"visible": True, "value": zip_file_path, "__type__": "update"}
|
| 122 |
-
|
| 123 |
-
def fetch_pretrained_data():
|
| 124 |
-
try:
|
| 125 |
-
response = requests.get(codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/wfba/phfgbz_cergenvarq.wfba", "rot13"))
|
| 126 |
-
response.raise_for_status()
|
| 127 |
-
|
| 128 |
-
return response.json()
|
| 129 |
-
except:
|
| 130 |
-
return {}
|
| 131 |
-
|
| 132 |
-
def update_sample_rate_dropdown(model):
|
| 133 |
-
data = fetch_pretrained_data()
|
| 134 |
-
if model != translations["success"]: return {"choices": list(data[model].keys()), "value": list(data[model].keys())[0], "__type__": "update"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/restart.py
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import platform
|
| 5 |
-
import subprocess
|
| 6 |
-
|
| 7 |
-
sys.path.append(os.getcwd())
|
| 8 |
-
|
| 9 |
-
from main.app.core.ui import gr_info
|
| 10 |
-
from main.app.variables import python, translations, configs_json
|
| 11 |
-
|
| 12 |
-
def restart_app(app):
|
| 13 |
-
gr_info(translations["30s"])
|
| 14 |
-
os.system("cls" if platform.system() == "Windows" else "clear")
|
| 15 |
-
|
| 16 |
-
app.close()
|
| 17 |
-
subprocess.run([python, os.path.join("main", "app", "app.py")] + sys.argv[1:])
|
| 18 |
-
|
| 19 |
-
def change_language(lang, app):
|
| 20 |
-
configs = json.load(open(configs_json, "r"))
|
| 21 |
-
|
| 22 |
-
if lang != configs["language"]:
|
| 23 |
-
configs["language"] = lang
|
| 24 |
-
|
| 25 |
-
with open(configs_json, "w") as f:
|
| 26 |
-
json.dump(configs, f, indent=4)
|
| 27 |
-
|
| 28 |
-
restart_app(app)
|
| 29 |
-
|
| 30 |
-
def change_theme(theme, app):
|
| 31 |
-
configs = json.load(open(configs_json, "r"))
|
| 32 |
-
|
| 33 |
-
if theme != configs["theme"]:
|
| 34 |
-
configs["theme"] = theme
|
| 35 |
-
with open(configs_json, "w") as f:
|
| 36 |
-
json.dump(configs, f, indent=4)
|
| 37 |
-
|
| 38 |
-
restart_app(app)
|
| 39 |
-
|
| 40 |
-
def change_font(font, app):
|
| 41 |
-
configs = json.load(open(configs_json, "r"))
|
| 42 |
-
|
| 43 |
-
if font != configs["font"]:
|
| 44 |
-
configs["font"] = font
|
| 45 |
-
with open(configs_json, "w") as f:
|
| 46 |
-
json.dump(configs, f, indent=4)
|
| 47 |
-
|
| 48 |
-
restart_app(app)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/separate.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import subprocess
|
| 4 |
-
|
| 5 |
-
sys.path.append(os.getcwd())
|
| 6 |
-
|
| 7 |
-
from main.app.core.ui import gr_info, gr_warning
|
| 8 |
-
from main.app.variables import python, translations, configs, config
|
| 9 |
-
|
| 10 |
-
def separator_music(input, output_audio, format, shifts, segments_size, overlap, clean_audio, clean_strength, denoise, separator_model, kara_model, backing, reverb, backing_reverb, hop_length, batch_size, sample_rate):
|
| 11 |
-
output = os.path.dirname(output_audio) or output_audio
|
| 12 |
-
|
| 13 |
-
if not input or not os.path.exists(input) or os.path.isdir(input):
|
| 14 |
-
gr_warning(translations["input_not_valid"])
|
| 15 |
-
return [None]*4
|
| 16 |
-
|
| 17 |
-
if not os.path.exists(output):
|
| 18 |
-
gr_warning(translations["output_not_valid"])
|
| 19 |
-
return [None]*4
|
| 20 |
-
|
| 21 |
-
if not os.path.exists(output): os.makedirs(output)
|
| 22 |
-
gr_info(translations["start"].format(start=translations["separator_music"]))
|
| 23 |
-
|
| 24 |
-
if config.debug_mode: subprocess.run([python, configs["separate_path"], "--input_path", input, "--output_path", output, "--format", format, "--shifts", str(shifts), "--segments_size", str(segments_size), "--overlap", str(overlap), "--mdx_hop_length", str(hop_length), "--mdx_batch_size", str(batch_size), "--clean_audio", str(clean_audio), "--clean_strength", str(clean_strength), "--kara_model", kara_model, "--backing", str(backing), "--mdx_denoise", str(denoise), "--reverb", str(reverb), "--backing_reverb", str(backing_reverb), "--model_name", separator_model, "--sample_rate", str(sample_rate)])
|
| 25 |
-
else:
|
| 26 |
-
from main.inference.separator_music import separate
|
| 27 |
-
|
| 28 |
-
separate(input, output, format, shifts, segments_size, overlap, hop_length, batch_size, clean_audio, clean_strength, separator_model, kara_model, backing, denoise, reverb, backing_reverb, sample_rate)
|
| 29 |
-
|
| 30 |
-
gr_info(translations["success"])
|
| 31 |
-
|
| 32 |
-
filename, _ = os.path.splitext(os.path.basename(input))
|
| 33 |
-
output = os.path.join(output, filename)
|
| 34 |
-
|
| 35 |
-
return [os.path.join(output, f"Original_Vocals_No_Reverb.{format}") if reverb else os.path.join(output, f"Original_Vocals.{format}"), os.path.join(output, f"Instruments.{format}"), (os.path.join(output, f"Main_Vocals_No_Reverb.{format}") if reverb else os.path.join(output, f"Main_Vocals.{format}") if backing else None), (os.path.join(output, f"Backing_Vocals_No_Reverb.{format}") if backing_reverb else os.path.join(output, f"Backing_Vocals.{format}") if backing else None)] if os.path.isfile(input) else [None]*4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/training.py
DELETED
|
@@ -1,219 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import time
|
| 4 |
-
import shutil
|
| 5 |
-
import codecs
|
| 6 |
-
import threading
|
| 7 |
-
import subprocess
|
| 8 |
-
|
| 9 |
-
sys.path.append(os.getcwd())
|
| 10 |
-
|
| 11 |
-
from main.tools import huggingface
|
| 12 |
-
from main.app.core.ui import gr_info, gr_warning
|
| 13 |
-
from main.app.variables import python, translations, configs
|
| 14 |
-
|
| 15 |
-
def if_done(done, p):
|
| 16 |
-
while 1:
|
| 17 |
-
if p.poll() is None: time.sleep(0.5)
|
| 18 |
-
else: break
|
| 19 |
-
|
| 20 |
-
done[0] = True
|
| 21 |
-
|
| 22 |
-
def log_read(done, name):
|
| 23 |
-
log_file = os.path.join(configs["logs_path"], "app.log")
|
| 24 |
-
|
| 25 |
-
f = open(log_file, "w", encoding="utf-8")
|
| 26 |
-
f.close()
|
| 27 |
-
|
| 28 |
-
while 1:
|
| 29 |
-
with open(log_file, "r", encoding="utf-8") as f:
|
| 30 |
-
yield "".join(line for line in f.readlines() if "DEBUG" not in line and name in line and line.strip() != "")
|
| 31 |
-
|
| 32 |
-
time.sleep(1)
|
| 33 |
-
if done[0]: break
|
| 34 |
-
|
| 35 |
-
with open(log_file, "r", encoding="utf-8") as f:
|
| 36 |
-
log = "".join(line for line in f.readlines() if "DEBUG" not in line and line.strip() != "")
|
| 37 |
-
|
| 38 |
-
yield log
|
| 39 |
-
|
| 40 |
-
def create_dataset(input_audio, output_dataset, clean_dataset, clean_strength, separator_reverb, kim_vocals_version, overlap, segments_size, denoise_mdx, skip, skip_start, skip_end, hop_length, batch_size, sample_rate):
|
| 41 |
-
version = 1 if kim_vocals_version == "Version-1" else 2
|
| 42 |
-
gr_info(translations["start"].format(start=translations["create"]))
|
| 43 |
-
|
| 44 |
-
p = subprocess.Popen(f'{python} {configs["create_dataset_path"]} --input_audio "{input_audio}" --output_dataset "{output_dataset}" --clean_dataset {clean_dataset} --clean_strength {clean_strength} --separator_reverb {separator_reverb} --kim_vocal_version {version} --overlap {overlap} --segments_size {segments_size} --mdx_hop_length {hop_length} --mdx_batch_size {batch_size} --denoise_mdx {denoise_mdx} --skip {skip} --skip_start_audios "{skip_start}" --skip_end_audios "{skip_end}" --sample_rate {sample_rate}', shell=True)
|
| 45 |
-
done = [False]
|
| 46 |
-
|
| 47 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 48 |
-
|
| 49 |
-
for log in log_read(done, "create_dataset"):
|
| 50 |
-
yield log
|
| 51 |
-
|
| 52 |
-
def preprocess(model_name, sample_rate, cpu_core, cut_preprocess, process_effects, dataset, clean_dataset, clean_strength):
|
| 53 |
-
sr = int(float(sample_rate.rstrip("k")) * 1000)
|
| 54 |
-
|
| 55 |
-
if not model_name: return gr_warning(translations["provide_name"])
|
| 56 |
-
if not os.path.exists(dataset) or not any(f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3")) for f in os.listdir(dataset) if os.path.isfile(os.path.join(dataset, f))): return gr_warning(translations["not_found_data"])
|
| 57 |
-
|
| 58 |
-
model_dir = os.path.join(configs["logs_path"], model_name)
|
| 59 |
-
if os.path.exists(model_dir): shutil.rmtree(model_dir, ignore_errors=True)
|
| 60 |
-
|
| 61 |
-
p = subprocess.Popen(f'{python} {configs["preprocess_path"]} --model_name "{model_name}" --dataset_path "{dataset}" --sample_rate {sr} --cpu_cores {cpu_core} --cut_preprocess {cut_preprocess} --process_effects {process_effects} --clean_dataset {clean_dataset} --clean_strength {clean_strength}', shell=True)
|
| 62 |
-
done = [False]
|
| 63 |
-
|
| 64 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 65 |
-
os.makedirs(model_dir, exist_ok=True)
|
| 66 |
-
|
| 67 |
-
for log in log_read(done, "preprocess"):
|
| 68 |
-
yield log
|
| 69 |
-
|
| 70 |
-
def extract(model_name, version, method, pitch_guidance, hop_length, cpu_cores, gpu, sample_rate, embedders, custom_embedders, onnx_f0_mode, embedders_mode, f0_autotune, f0_autotune_strength, hybrid_method, rms_extract):
|
| 71 |
-
f0method, embedder_model = (method if method != "hybrid" else hybrid_method), (embedders if embedders != "custom" else custom_embedders)
|
| 72 |
-
sr = int(float(sample_rate.rstrip("k")) * 1000)
|
| 73 |
-
|
| 74 |
-
if not model_name: return gr_warning(translations["provide_name"])
|
| 75 |
-
model_dir = os.path.join(configs["logs_path"], model_name)
|
| 76 |
-
|
| 77 |
-
try:
|
| 78 |
-
if not any(os.path.isfile(os.path.join(model_dir, "sliced_audios", f)) for f in os.listdir(os.path.join(model_dir, "sliced_audios"))) or not any(os.path.isfile(os.path.join(model_dir, "sliced_audios_16k", f)) for f in os.listdir(os.path.join(model_dir, "sliced_audios_16k"))): return gr_warning(translations["not_found_data_preprocess"])
|
| 79 |
-
except:
|
| 80 |
-
return gr_warning(translations["not_found_data_preprocess"])
|
| 81 |
-
|
| 82 |
-
p = subprocess.Popen(f'{python} {configs["extract_path"]} --model_name "{model_name}" --rvc_version {version} --f0_method {f0method} --pitch_guidance {pitch_guidance} --hop_length {hop_length} --cpu_cores {cpu_cores} --gpu {gpu} --sample_rate {sr} --embedder_model {embedder_model} --f0_onnx {onnx_f0_mode} --embedders_mode {embedders_mode} --f0_autotune {f0_autotune} --f0_autotune_strength {f0_autotune_strength} --rms_extract {rms_extract}', shell=True)
|
| 83 |
-
done = [False]
|
| 84 |
-
|
| 85 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 86 |
-
os.makedirs(model_dir, exist_ok=True)
|
| 87 |
-
|
| 88 |
-
for log in log_read(done, "extract"):
|
| 89 |
-
yield log
|
| 90 |
-
|
| 91 |
-
def create_index(model_name, rvc_version, index_algorithm):
|
| 92 |
-
if not model_name: return gr_warning(translations["provide_name"])
|
| 93 |
-
model_dir = os.path.join(configs["logs_path"], model_name)
|
| 94 |
-
|
| 95 |
-
try:
|
| 96 |
-
if not any(os.path.isfile(os.path.join(model_dir, f"{rvc_version}_extracted", f)) for f in os.listdir(os.path.join(model_dir, f"{rvc_version}_extracted"))): return gr_warning(translations["not_found_data_extract"])
|
| 97 |
-
except:
|
| 98 |
-
return gr_warning(translations["not_found_data_extract"])
|
| 99 |
-
|
| 100 |
-
p = subprocess.Popen(f'{python} {configs["create_index_path"]} --model_name "{model_name}" --rvc_version {rvc_version} --index_algorithm {index_algorithm}', shell=True)
|
| 101 |
-
done = [False]
|
| 102 |
-
|
| 103 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 104 |
-
os.makedirs(model_dir, exist_ok=True)
|
| 105 |
-
|
| 106 |
-
for log in log_read(done, "create_index"):
|
| 107 |
-
yield log
|
| 108 |
-
|
| 109 |
-
def training(model_name, rvc_version, save_every_epoch, save_only_latest, save_every_weights, total_epoch, sample_rate, batch_size, gpu, pitch_guidance, not_pretrain, custom_pretrained, pretrain_g, pretrain_d, detector, threshold, clean_up, cache, model_author, vocoder, checkpointing, deterministic, benchmark, optimizer, energy_use):
|
| 110 |
-
sr = int(float(sample_rate.rstrip("k")) * 1000)
|
| 111 |
-
if not model_name: return gr_warning(translations["provide_name"])
|
| 112 |
-
|
| 113 |
-
model_dir = os.path.join(configs["logs_path"], model_name)
|
| 114 |
-
if os.path.exists(os.path.join(model_dir, "train_pid.txt")): os.remove(os.path.join(model_dir, "train_pid.txt"))
|
| 115 |
-
|
| 116 |
-
try:
|
| 117 |
-
if not any(os.path.isfile(os.path.join(model_dir, f"{rvc_version}_extracted", f)) for f in os.listdir(os.path.join(model_dir, f"{rvc_version}_extracted"))): return gr_warning(translations["not_found_data_extract"])
|
| 118 |
-
except:
|
| 119 |
-
return gr_warning(translations["not_found_data_extract"])
|
| 120 |
-
|
| 121 |
-
if not not_pretrain:
|
| 122 |
-
if not custom_pretrained:
|
| 123 |
-
pretrain_dir = configs["pretrained_v2_path"] if rvc_version == 'v2' else configs["pretrained_v1_path"]
|
| 124 |
-
download_version = codecs.decode(f"uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/cergenvarq_i{'2' if rvc_version == 'v2' else '1'}/", "rot13")
|
| 125 |
-
|
| 126 |
-
pretrained_selector = {
|
| 127 |
-
True: {
|
| 128 |
-
32000: ("f0G32k.pth", "f0D32k.pth"),
|
| 129 |
-
40000: ("f0G40k.pth", "f0D40k.pth"),
|
| 130 |
-
48000: ("f0G48k.pth", "f0D48k.pth")
|
| 131 |
-
},
|
| 132 |
-
False: {
|
| 133 |
-
32000: ("G32k.pth", "D32k.pth"),
|
| 134 |
-
40000: ("G40k.pth", "D40k.pth"),
|
| 135 |
-
48000: ("G48k.pth", "D48k.pth")
|
| 136 |
-
}
|
| 137 |
-
}
|
| 138 |
-
|
| 139 |
-
pg2, pd2 = "", ""
|
| 140 |
-
pg, pd = pretrained_selector[pitch_guidance][sr]
|
| 141 |
-
|
| 142 |
-
if energy_use: pg2, pd2 = pg2 + "ENERGY_", pd2 + "ENERGY_"
|
| 143 |
-
if vocoder != 'Default': pg2, pd2 = pg2 + vocoder + "_", pd2 + vocoder + "_"
|
| 144 |
-
|
| 145 |
-
pg2, pd2 = pg2 + pg, pd2 + pd
|
| 146 |
-
pretrained_G, pretrained_D = (
|
| 147 |
-
os.path.join(
|
| 148 |
-
pretrain_dir,
|
| 149 |
-
pg2
|
| 150 |
-
),
|
| 151 |
-
os.path.join(
|
| 152 |
-
pretrain_dir,
|
| 153 |
-
pd2
|
| 154 |
-
)
|
| 155 |
-
)
|
| 156 |
-
|
| 157 |
-
try:
|
| 158 |
-
if not os.path.exists(pretrained_G):
|
| 159 |
-
gr_info(translations["download_pretrained"].format(dg="G", rvc_version=rvc_version))
|
| 160 |
-
huggingface.HF_download_file(
|
| 161 |
-
"".join(
|
| 162 |
-
[
|
| 163 |
-
download_version,
|
| 164 |
-
pg2
|
| 165 |
-
]
|
| 166 |
-
),
|
| 167 |
-
os.path.join(
|
| 168 |
-
pretrain_dir,
|
| 169 |
-
pg2
|
| 170 |
-
)
|
| 171 |
-
)
|
| 172 |
-
|
| 173 |
-
if not os.path.exists(pretrained_D):
|
| 174 |
-
gr_info(translations["download_pretrained"].format(dg="D", rvc_version=rvc_version))
|
| 175 |
-
huggingface.HF_download_file(
|
| 176 |
-
"".join(
|
| 177 |
-
[
|
| 178 |
-
download_version,
|
| 179 |
-
pd2
|
| 180 |
-
]
|
| 181 |
-
),
|
| 182 |
-
os.path.join(
|
| 183 |
-
pretrain_dir,
|
| 184 |
-
pd2
|
| 185 |
-
)
|
| 186 |
-
)
|
| 187 |
-
except:
|
| 188 |
-
gr_warning(translations["not_use_pretrain_error_download"])
|
| 189 |
-
pretrained_G = pretrained_D = None
|
| 190 |
-
else:
|
| 191 |
-
if not pretrain_g: return gr_warning(translations["provide_pretrained"].format(dg="G"))
|
| 192 |
-
if not pretrain_d: return gr_warning(translations["provide_pretrained"].format(dg="D"))
|
| 193 |
-
|
| 194 |
-
pg2, pd2 = pretrain_g, pretrain_d
|
| 195 |
-
pretrained_G, pretrained_D = (
|
| 196 |
-
(os.path.join(configs["pretrained_custom_path"], pg2) if not os.path.exists(pg2) else pg2),
|
| 197 |
-
(os.path.join(configs["pretrained_custom_path"], pd2) if not os.path.exists(pd2) else pd2)
|
| 198 |
-
)
|
| 199 |
-
|
| 200 |
-
if not os.path.exists(pretrained_G): return gr_warning(translations["not_found_pretrain"].format(dg="G"))
|
| 201 |
-
if not os.path.exists(pretrained_D): return gr_warning(translations["not_found_pretrain"].format(dg="D"))
|
| 202 |
-
else:
|
| 203 |
-
pretrained_G = pretrained_D = None
|
| 204 |
-
gr_warning(translations["not_use_pretrain"])
|
| 205 |
-
|
| 206 |
-
gr_info(translations["start"].format(start=translations["training"]))
|
| 207 |
-
|
| 208 |
-
p = subprocess.Popen(f'{python} {configs["train_path"]} --model_name "{model_name}" --rvc_version {rvc_version} --save_every_epoch {save_every_epoch} --save_only_latest {save_only_latest} --save_every_weights {save_every_weights} --total_epoch {total_epoch} --sample_rate {sr} --batch_size {batch_size} --gpu {gpu} --pitch_guidance {pitch_guidance} --overtraining_detector {detector} --overtraining_threshold {threshold} --cleanup {clean_up} --cache_data_in_gpu {cache} --g_pretrained_path "{pretrained_G}" --d_pretrained_path "{pretrained_D}" --model_author "{model_author}" --vocoder "{vocoder}" --checkpointing {checkpointing} --deterministic {deterministic} --benchmark {benchmark} --optimizer {optimizer} --energy_use {energy_use}', shell=True)
|
| 209 |
-
done = [False]
|
| 210 |
-
|
| 211 |
-
with open(os.path.join(model_dir, "train_pid.txt"), "w") as pid_file:
|
| 212 |
-
pid_file.write(str(p.pid))
|
| 213 |
-
|
| 214 |
-
threading.Thread(target=if_done, args=(done, p)).start()
|
| 215 |
-
|
| 216 |
-
for log in log_read(done, "train"):
|
| 217 |
-
lines = log.splitlines()
|
| 218 |
-
if len(lines) > 100: log = "\n".join(lines[-100:])
|
| 219 |
-
yield log
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/tts.py
DELETED
|
@@ -1,99 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import pysrt
|
| 4 |
-
import codecs
|
| 5 |
-
import librosa
|
| 6 |
-
import asyncio
|
| 7 |
-
import requests
|
| 8 |
-
import tempfile
|
| 9 |
-
|
| 10 |
-
import numpy as np
|
| 11 |
-
import soundfile as sf
|
| 12 |
-
|
| 13 |
-
from edge_tts import Communicate
|
| 14 |
-
|
| 15 |
-
sys.path.append(os.getcwd())
|
| 16 |
-
|
| 17 |
-
from main.app.variables import translations
|
| 18 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error
|
| 19 |
-
|
| 20 |
-
def synthesize_tts(prompt, voice, speed, output, pitch, google):
|
| 21 |
-
if not google: asyncio.run(Communicate(text=prompt, voice=voice, rate=f"+{speed}%" if speed >= 0 else f"{speed}%", pitch=f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz").save(output))
|
| 22 |
-
else:
|
| 23 |
-
response = requests.get(codecs.decode("uggcf://genafyngr.tbbtyr.pbz/genafyngr_ggf", "rot13"), params={"ie": "UTF-8", "q": prompt, "tl": voice, "ttsspeed": speed, "client": "tw-ob"}, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"})
|
| 24 |
-
|
| 25 |
-
if response.status_code == 200:
|
| 26 |
-
with open(output, "wb") as f:
|
| 27 |
-
f.write(response.content)
|
| 28 |
-
|
| 29 |
-
if pitch != 0 or speed != 0:
|
| 30 |
-
y, sr = librosa.load(output, sr=None)
|
| 31 |
-
|
| 32 |
-
if pitch != 0: y = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch)
|
| 33 |
-
if speed != 0: y = librosa.effects.time_stretch(y, rate=speed)
|
| 34 |
-
|
| 35 |
-
sf.write(file=output, data=y, samplerate=sr, format=os.path.splitext(os.path.basename(output))[-1].lower().replace('.', ''))
|
| 36 |
-
else: gr_error(f"{response.status_code}, {response.text}")
|
| 37 |
-
|
| 38 |
-
def time_stretch(y, sr, target_duration):
|
| 39 |
-
rate = (len(y) / sr) / target_duration
|
| 40 |
-
if rate != 1.0: y = librosa.effects.time_stretch(y=y.astype(np.float32), rate=rate)
|
| 41 |
-
|
| 42 |
-
n_target = int(round(target_duration * sr))
|
| 43 |
-
return np.pad(y, (0, n_target - len(y))) if len(y) < n_target else y[:n_target]
|
| 44 |
-
|
| 45 |
-
def pysrttime_to_seconds(t):
|
| 46 |
-
return (t.hours * 60 + t.minutes) * 60 + t.seconds + t.milliseconds / 1000
|
| 47 |
-
|
| 48 |
-
def srt_tts(srt_file, out_file, voice, rate = 0, sr = 24000, google = False):
|
| 49 |
-
subs = pysrt.open(srt_file)
|
| 50 |
-
if not subs: raise ValueError(translations["srt"])
|
| 51 |
-
|
| 52 |
-
final_audio = np.zeros(int(round(pysrttime_to_seconds(subs[-1].end) * sr)), dtype=np.float32)
|
| 53 |
-
|
| 54 |
-
with tempfile.TemporaryDirectory() as tempdir:
|
| 55 |
-
for idx, seg in enumerate(subs):
|
| 56 |
-
wav_path = os.path.join(tempdir, f"seg_{idx}.wav")
|
| 57 |
-
synthesize_tts(" ".join(seg.text.splitlines()), voice, 0, wav_path, rate, google)
|
| 58 |
-
|
| 59 |
-
audio, file_sr = sf.read(wav_path, dtype=np.float32)
|
| 60 |
-
if file_sr != sr: audio = np.interp(np.linspace(0, len(audio) - 1, int(len(audio) * sr / file_sr)), np.arange(len(audio)), audio)
|
| 61 |
-
adjusted = time_stretch(audio, sr, pysrttime_to_seconds(seg.duration))
|
| 62 |
-
|
| 63 |
-
start_sample = int(round(pysrttime_to_seconds(seg.start) * sr))
|
| 64 |
-
end_sample = start_sample + adjusted.shape[0]
|
| 65 |
-
|
| 66 |
-
if end_sample > final_audio.shape[0]:
|
| 67 |
-
adjusted = adjusted[: final_audio.shape[0] - start_sample]
|
| 68 |
-
end_sample = final_audio.shape[0]
|
| 69 |
-
|
| 70 |
-
final_audio[start_sample:end_sample] += adjusted
|
| 71 |
-
|
| 72 |
-
sf.write(out_file, final_audio, sr)
|
| 73 |
-
|
| 74 |
-
def TTS(prompt, voice, speed, output, pitch, google, srt_input):
|
| 75 |
-
if not srt_input: srt_input = ""
|
| 76 |
-
|
| 77 |
-
if not prompt and not srt_input.endswith(".srt"):
|
| 78 |
-
gr_warning(translations["enter_the_text"])
|
| 79 |
-
return None
|
| 80 |
-
|
| 81 |
-
if not voice:
|
| 82 |
-
gr_warning(translations["choose_voice"])
|
| 83 |
-
return None
|
| 84 |
-
|
| 85 |
-
if not output:
|
| 86 |
-
gr_warning(translations["output_not_valid"])
|
| 87 |
-
return None
|
| 88 |
-
|
| 89 |
-
if os.path.isdir(output): output = os.path.join(output, f"tts.wav")
|
| 90 |
-
gr_info(translations["convert"].format(name=translations["text"]))
|
| 91 |
-
|
| 92 |
-
output_dir = os.path.dirname(output) or output
|
| 93 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir, exist_ok=True)
|
| 94 |
-
|
| 95 |
-
if srt_input.endswith(".srt"): srt_tts(srt_input, output, voice, 0, 24000, google)
|
| 96 |
-
else: synthesize_tts(prompt, voice, speed, output, pitch, google)
|
| 97 |
-
|
| 98 |
-
gr_info(translations["success"])
|
| 99 |
-
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/ui.py
DELETED
|
@@ -1,179 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import torch
|
| 5 |
-
import shutil
|
| 6 |
-
|
| 7 |
-
import gradio as gr
|
| 8 |
-
|
| 9 |
-
sys.path.append(os.getcwd())
|
| 10 |
-
|
| 11 |
-
from main.library import opencl
|
| 12 |
-
from main.app.variables import config, configs, configs_json, logger, translations, edgetts, google_tts_voice, method_f0, method_f0_full
|
| 13 |
-
|
| 14 |
-
def gr_info(message):
|
| 15 |
-
gr.Info(message, duration=2)
|
| 16 |
-
logger.info(message)
|
| 17 |
-
|
| 18 |
-
def gr_warning(message):
|
| 19 |
-
gr.Warning(message, duration=2)
|
| 20 |
-
logger.warning(message)
|
| 21 |
-
|
| 22 |
-
def gr_error(message):
|
| 23 |
-
gr.Error(message=message, duration=6)
|
| 24 |
-
logger.error(message)
|
| 25 |
-
|
| 26 |
-
def get_gpu_info():
|
| 27 |
-
ngpu = torch.cuda.device_count()
|
| 28 |
-
gpu_infos = [f"{i}: {torch.cuda.get_device_name(i)} ({int(torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024 + 0.4)} GB)" for i in range(ngpu) if torch.cuda.is_available() or ngpu != 0]
|
| 29 |
-
|
| 30 |
-
if len(gpu_infos) == 0:
|
| 31 |
-
ngpu = opencl.device_count()
|
| 32 |
-
gpu_infos = [f"{i}: {opencl.device_name(i)}" for i in range(ngpu) if opencl.is_available() or ngpu != 0]
|
| 33 |
-
|
| 34 |
-
return "\n".join(gpu_infos) if len(gpu_infos) > 0 else translations["no_support_gpu"]
|
| 35 |
-
|
| 36 |
-
def gpu_number_str():
|
| 37 |
-
ngpu = torch.cuda.device_count()
|
| 38 |
-
if ngpu == 0: ngpu = opencl.device_count()
|
| 39 |
-
|
| 40 |
-
return str("-".join(map(str, range(ngpu))) if torch.cuda.is_available() or opencl.is_available() else "-")
|
| 41 |
-
|
| 42 |
-
def change_f0_choices():
|
| 43 |
-
f0_file = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["f0_path"]) for f in files if f.endswith(".txt")])
|
| 44 |
-
return {"value": f0_file[0] if len(f0_file) >= 1 else "", "choices": f0_file, "__type__": "update"}
|
| 45 |
-
|
| 46 |
-
def change_audios_choices(input_audio):
|
| 47 |
-
audios = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["audios_path"]) for f in files if os.path.splitext(f)[1].lower() in (".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")])
|
| 48 |
-
return {"value": input_audio if input_audio != "" else (audios[0] if len(audios) >= 1 else ""), "choices": audios, "__type__": "update"}
|
| 49 |
-
|
| 50 |
-
def change_models_choices():
|
| 51 |
-
model, index = sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith((".pth", ".onnx")) and not model.startswith("G_") and not model.startswith("D_"))), sorted([os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name])
|
| 52 |
-
return [{"value": model[0] if len(model) >= 1 else "", "choices": model, "__type__": "update"}, {"value": index[0] if len(index) >= 1 else "", "choices": index, "__type__": "update"}]
|
| 53 |
-
|
| 54 |
-
def change_pretrained_choices():
|
| 55 |
-
pretrainD = sorted([model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "D" in model])
|
| 56 |
-
pretrainG = sorted([model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "G" in model])
|
| 57 |
-
|
| 58 |
-
return [{"choices": pretrainD, "value": pretrainD[0] if len(pretrainD) >= 1 else "", "__type__": "update"}, {"choices": pretrainG, "value": pretrainG[0] if len(pretrainG) >= 1 else "", "__type__": "update"}]
|
| 59 |
-
|
| 60 |
-
def change_choices_del():
|
| 61 |
-
return [{"choices": sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith(".pth") and not model.startswith("G_") and not model.startswith("D_"))), "__type__": "update"}, {"choices": sorted([os.path.join(configs["logs_path"], f) for f in os.listdir(configs["logs_path"]) if "mute" not in f and os.path.isdir(os.path.join(configs["logs_path"], f))]), "__type__": "update"}]
|
| 62 |
-
|
| 63 |
-
def change_preset_choices():
|
| 64 |
-
return {"value": "", "choices": sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".conversion.json"))), "__type__": "update"}
|
| 65 |
-
|
| 66 |
-
def change_effect_preset_choices():
|
| 67 |
-
return {"value": "", "choices": sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".effect.json"))), "__type__": "update"}
|
| 68 |
-
|
| 69 |
-
def change_tts_voice_choices(google):
|
| 70 |
-
return {"choices": google_tts_voice if google else edgetts, "value": google_tts_voice[0] if google else edgetts[0], "__type__": "update"}
|
| 71 |
-
|
| 72 |
-
def change_backing_choices(backing, merge):
|
| 73 |
-
if backing or merge: return {"value": False, "interactive": False, "__type__": "update"}
|
| 74 |
-
elif not backing or not merge: return {"interactive": True, "__type__": "update"}
|
| 75 |
-
else: gr_warning(translations["option_not_valid"])
|
| 76 |
-
|
| 77 |
-
def change_download_choices(select):
|
| 78 |
-
selects = [False]*10
|
| 79 |
-
|
| 80 |
-
if select == translations["download_url"]: selects[0] = selects[1] = selects[2] = True
|
| 81 |
-
elif select == translations["download_from_csv"]: selects[3] = selects[4] = True
|
| 82 |
-
elif select == translations["search_models"]: selects[5] = selects[6] = True
|
| 83 |
-
elif select == translations["upload"]: selects[9] = True
|
| 84 |
-
else: gr_warning(translations["option_not_valid"])
|
| 85 |
-
|
| 86 |
-
return [{"visible": selects[i], "__type__": "update"} for i in range(len(selects))]
|
| 87 |
-
|
| 88 |
-
def change_download_pretrained_choices(select):
|
| 89 |
-
selects = [False]*8
|
| 90 |
-
|
| 91 |
-
if select == translations["download_url"]: selects[0] = selects[1] = selects[2] = True
|
| 92 |
-
elif select == translations["list_model"]: selects[3] = selects[4] = selects[5] = True
|
| 93 |
-
elif select == translations["upload"]: selects[6] = selects[7] = True
|
| 94 |
-
else: gr_warning(translations["option_not_valid"])
|
| 95 |
-
|
| 96 |
-
return [{"visible": selects[i], "__type__": "update"} for i in range(len(selects))]
|
| 97 |
-
|
| 98 |
-
def get_index(model):
|
| 99 |
-
model = os.path.basename(model).split("_")[0]
|
| 100 |
-
return {"value": next((f for f in [os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name] if model.split(".")[0] in f), ""), "__type__": "update"} if model else None
|
| 101 |
-
|
| 102 |
-
def index_strength_show(index):
|
| 103 |
-
return {"visible": index != "" and os.path.exists(index), "value": 0.5, "__type__": "update"}
|
| 104 |
-
|
| 105 |
-
def hoplength_show(method, hybrid_method=None):
|
| 106 |
-
visible = False
|
| 107 |
-
|
| 108 |
-
for m in ["mangio-crepe", "fcpe", "yin", "piptrack", "fcn"]:
|
| 109 |
-
if m in method: visible = True
|
| 110 |
-
if m in hybrid_method: visible = True
|
| 111 |
-
|
| 112 |
-
if visible: break
|
| 113 |
-
else: visible = False
|
| 114 |
-
|
| 115 |
-
return {"visible": visible, "__type__": "update"}
|
| 116 |
-
|
| 117 |
-
def visible(value):
|
| 118 |
-
return {"visible": value, "__type__": "update"}
|
| 119 |
-
|
| 120 |
-
def valueFalse_interactive(value):
|
| 121 |
-
return {"value": False, "interactive": value, "__type__": "update"}
|
| 122 |
-
|
| 123 |
-
def valueEmpty_visible1(value):
|
| 124 |
-
return {"value": "", "visible": value, "__type__": "update"}
|
| 125 |
-
|
| 126 |
-
def pitch_guidance_lock(vocoders):
|
| 127 |
-
return {"value": True, "interactive": vocoders == "Default", "__type__": "update"}
|
| 128 |
-
|
| 129 |
-
def vocoders_lock(pitch, vocoders):
|
| 130 |
-
return {"value": vocoders if pitch else "Default", "interactive": pitch, "__type__": "update"}
|
| 131 |
-
|
| 132 |
-
def unlock_f0(value):
|
| 133 |
-
return {"choices": method_f0_full if value else method_f0, "value": "rmvpe", "__type__": "update"}
|
| 134 |
-
|
| 135 |
-
def unlock_vocoder(value, vocoder):
|
| 136 |
-
return {"value": vocoder if value == "v2" else "Default", "interactive": value == "v2", "__type__": "update"}
|
| 137 |
-
|
| 138 |
-
def unlock_ver(value, vocoder):
|
| 139 |
-
return {"value": "v2" if vocoder == "Default" else value, "interactive": vocoder == "Default", "__type__": "update"}
|
| 140 |
-
|
| 141 |
-
def visible_embedders(value):
|
| 142 |
-
return {"visible": value != "spin", "__type__": "update"}
|
| 143 |
-
|
| 144 |
-
def change_fp(fp):
|
| 145 |
-
fp16 = fp == "fp16"
|
| 146 |
-
|
| 147 |
-
if fp16 and config.device in ["cpu", "mps", "ocl:0"]:
|
| 148 |
-
gr_warning(translations["fp16_not_support"])
|
| 149 |
-
return "fp32"
|
| 150 |
-
else:
|
| 151 |
-
gr_info(translations["start_update_precision"])
|
| 152 |
-
|
| 153 |
-
configs = json.load(open(configs_json, "r"))
|
| 154 |
-
configs["fp16"] = config.is_half = fp16
|
| 155 |
-
|
| 156 |
-
with open(configs_json, "w") as f:
|
| 157 |
-
json.dump(configs, f, indent=4)
|
| 158 |
-
|
| 159 |
-
gr_info(translations["success"])
|
| 160 |
-
return "fp16" if fp16 else "fp32"
|
| 161 |
-
|
| 162 |
-
def process_output(file_path):
|
| 163 |
-
if config.configs.get("delete_exists_file", True):
|
| 164 |
-
if os.path.exists(file_path): os.remove(file_path)
|
| 165 |
-
return file_path
|
| 166 |
-
else:
|
| 167 |
-
if not os.path.exists(file_path): return file_path
|
| 168 |
-
file = os.path.splitext(os.path.basename(file_path))
|
| 169 |
-
|
| 170 |
-
index = 1
|
| 171 |
-
while 1:
|
| 172 |
-
file_path = os.path.join(os.path.dirname(file_path), f"{file[0]}_{index}{file[1]}")
|
| 173 |
-
if not os.path.exists(file_path): return file_path
|
| 174 |
-
index += 1
|
| 175 |
-
|
| 176 |
-
def shutil_move(input_path, output_path):
|
| 177 |
-
output_path = os.path.join(output_path, os.path.basename(input_path)) if os.path.isdir(output_path) else output_path
|
| 178 |
-
|
| 179 |
-
return shutil.move(input_path, process_output(output_path)) if os.path.exists(output_path) else shutil.move(input_path, output_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/core/utils.py
DELETED
|
@@ -1,97 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import codecs
|
| 5 |
-
import requests
|
| 6 |
-
import platform
|
| 7 |
-
import datetime
|
| 8 |
-
|
| 9 |
-
sys.path.append(os.getcwd())
|
| 10 |
-
|
| 11 |
-
from main.app.core.ui import gr_info, gr_warning, gr_error
|
| 12 |
-
from main.app.variables import logger, translations, configs
|
| 13 |
-
|
| 14 |
-
def stop_pid(pid_file, model_name=None, train=False):
|
| 15 |
-
try:
|
| 16 |
-
pid_file_path = os.path.join("assets", f"{pid_file}.txt") if model_name is None else os.path.join(configs["logs_path"], model_name, f"{pid_file}.txt")
|
| 17 |
-
|
| 18 |
-
if not os.path.exists(pid_file_path): return gr_warning(translations["not_found_pid"])
|
| 19 |
-
else:
|
| 20 |
-
with open(pid_file_path, "r") as pid_file:
|
| 21 |
-
pids = [int(pid) for pid in pid_file.readlines()]
|
| 22 |
-
|
| 23 |
-
for pid in pids:
|
| 24 |
-
os.kill(pid, 9)
|
| 25 |
-
|
| 26 |
-
if os.path.exists(pid_file_path): os.remove(pid_file_path)
|
| 27 |
-
|
| 28 |
-
pid_file_path = os.path.join(configs["logs_path"], model_name, "config.json")
|
| 29 |
-
|
| 30 |
-
if train and os.path.exists(pid_file_path):
|
| 31 |
-
with open(pid_file_path, "r") as pid_file:
|
| 32 |
-
pid_data = json.load(pid_file)
|
| 33 |
-
pids = pid_data.get("process_pids", [])
|
| 34 |
-
|
| 35 |
-
with open(pid_file_path, "w") as pid_file:
|
| 36 |
-
pid_data.pop("process_pids", None)
|
| 37 |
-
|
| 38 |
-
json.dump(pid_data, pid_file, indent=4)
|
| 39 |
-
|
| 40 |
-
for pid in pids:
|
| 41 |
-
os.kill(pid, 9)
|
| 42 |
-
|
| 43 |
-
gr_info(translations["end_pid"])
|
| 44 |
-
except:
|
| 45 |
-
pass
|
| 46 |
-
|
| 47 |
-
def report_bug(error_info, provide):
|
| 48 |
-
report_path = os.path.join(configs["logs_path"], "report_bugs.log")
|
| 49 |
-
if os.path.exists(report_path): os.remove(report_path)
|
| 50 |
-
|
| 51 |
-
report_url = codecs.decode(requests.get(codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/jroubbx.gkg", "rot13")).text, "rot13")
|
| 52 |
-
if not error_info: error_info = "Không Có"
|
| 53 |
-
|
| 54 |
-
gr_info(translations["thank"])
|
| 55 |
-
|
| 56 |
-
if provide:
|
| 57 |
-
try:
|
| 58 |
-
for log in [os.path.join(root, name) for root, _, files in os.walk(os.path.join(configs["logs_path"]), topdown=False) for name in files if name.endswith(".log")]:
|
| 59 |
-
with open(log, "r", encoding="utf-8") as r:
|
| 60 |
-
with open(report_path, "a", encoding="utf-8") as w:
|
| 61 |
-
w.write(str(r.read()))
|
| 62 |
-
w.write("\n")
|
| 63 |
-
except Exception as e:
|
| 64 |
-
gr_error(translations["error_read_log"])
|
| 65 |
-
logger.debug(e)
|
| 66 |
-
|
| 67 |
-
try:
|
| 68 |
-
with open(report_path, "r", encoding="utf-8") as f:
|
| 69 |
-
content = f.read()
|
| 70 |
-
|
| 71 |
-
requests.post(report_url, json={"embeds": [{"title": "Báo Cáo Lỗi", "description": f"Mô tả lỗi: {error_info}", "color": 15158332, "author": {"name": "Vietnamese_RVC", "icon_url": codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/vpb.cat", "rot13"), "url": codecs.decode("uggcf://tvguho.pbz/CunzUhlauNau16/Ivrganzrfr-EIP/gerr/znva","rot13")}, "thumbnail": {"url": codecs.decode("uggcf://p.grabe.pbz/7dADJbv-36fNNNNq/grabe.tvs", "rot13")}, "fields": [{"name": "Số Lượng Gỡ Lỗi", "value": content.count("DEBUG")}, {"name": "Số Lượng Thông Tin", "value": content.count("INFO")}, {"name": "Số Lượng Cảnh Báo", "value": content.count("WARNING")}, {"name": "Số Lượng Lỗi", "value": content.count("ERROR")}], "footer": {"text": f"Tên Máy: {platform.uname().node} - Hệ Điều Hành: {platform.system()}-{platform.version()}\nThời Gian Báo Cáo Lỗi: {datetime.datetime.now()}."}}]})
|
| 72 |
-
|
| 73 |
-
with open(report_path, "rb") as f:
|
| 74 |
-
requests.post(report_url, files={"file": f})
|
| 75 |
-
except Exception as e:
|
| 76 |
-
gr_error(translations["error_send"])
|
| 77 |
-
finally:
|
| 78 |
-
if os.path.exists(report_path): os.remove(report_path)
|
| 79 |
-
else: requests.post(report_url, json={"embeds": [{"title": "Báo Cáo Lỗi", "description": error_info}]})
|
| 80 |
-
|
| 81 |
-
def google_translate(text, source='auto', target='vi'):
|
| 82 |
-
if text == "": return gr_warning(translations["prompt_warning"])
|
| 83 |
-
|
| 84 |
-
try:
|
| 85 |
-
import textwrap
|
| 86 |
-
|
| 87 |
-
def translate_chunk(chunk):
|
| 88 |
-
response = requests.get(codecs.decode("uggcf://genafyngr.tbbtyrncvf.pbz/genafyngr_n/fvatyr", "rot13"), params={'client': 'gtx', 'sl': source, 'tl': target, 'dt': 't', 'q': chunk})
|
| 89 |
-
return ''.join([i[0] for i in response.json()[0]]) if response.status_code == 200 else chunk
|
| 90 |
-
|
| 91 |
-
translated_text = ''
|
| 92 |
-
for chunk in textwrap.wrap(text, 5000, break_long_words=False, break_on_hyphens=False):
|
| 93 |
-
translated_text += translate_chunk(chunk)
|
| 94 |
-
|
| 95 |
-
return translated_text
|
| 96 |
-
except:
|
| 97 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/parser.py
DELETED
|
@@ -1,319 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
sys.path.append(os.getcwd())
|
| 5 |
-
|
| 6 |
-
try:
|
| 7 |
-
argv = sys.argv[1]
|
| 8 |
-
except IndexError:
|
| 9 |
-
argv = None
|
| 10 |
-
|
| 11 |
-
argv_is_allows = ["--audio_effects", "--convert", "--create_dataset", "--create_index", "--extract", "--preprocess", "--separator_music", "--train", "--help_audio_effects", "--help_convert", "--help_create_dataset", "--help_create_index", "--help_extract", "--help_preprocess", "--help_separator_music", "--help_train", "--help"]
|
| 12 |
-
|
| 13 |
-
if argv not in argv_is_allows:
|
| 14 |
-
print("Cú pháp không hợp lệ! Sử dụng --help để biết thêm")
|
| 15 |
-
quit()
|
| 16 |
-
|
| 17 |
-
if argv_is_allows[0] in argv: from main.inference.audio_effects import main
|
| 18 |
-
elif argv_is_allows[1] in argv: from main.inference.conversion.convert import main
|
| 19 |
-
elif argv_is_allows[2] in argv: from main.inference.create_dataset import main
|
| 20 |
-
elif argv_is_allows[3] in argv: from main.inference.create_index import main
|
| 21 |
-
elif argv_is_allows[4] in argv: from main.inference.extracting.extract import main
|
| 22 |
-
elif argv_is_allows[5] in argv: from main.inference.preprocess.preprocess import main
|
| 23 |
-
elif argv_is_allows[6] in argv: from main.inference.separator_music import main
|
| 24 |
-
elif argv_is_allows[7] in argv: from main.inference.training.train import main
|
| 25 |
-
elif argv_is_allows[8] in argv:
|
| 26 |
-
print("""Các tham số của `--audio_effects`:
|
| 27 |
-
1. Đường dẫn tệp:
|
| 28 |
-
- `--input_path` (bắt buộc): Đường dẫn đến tệp âm thanh đầu vào.
|
| 29 |
-
- `--output_path` (mặc định: `./audios/apply_effects.wav`): Đường dẫn lưu tệp đầu ra.
|
| 30 |
-
- `--export_format` (mặc định: `wav`): Định dạng xuất tệp (`wav`, `mp3`, ...).
|
| 31 |
-
|
| 32 |
-
2. Lấy mẫu lại:
|
| 33 |
-
- `--resample` (mặc định: `False`): Có lấy mẫu lại hay không.
|
| 34 |
-
- `--resample_sr` (mặc định: `0`): Tần số lấy mẫu mới (Hz).
|
| 35 |
-
|
| 36 |
-
3. Hiệu ứng chorus:
|
| 37 |
-
- `--chorus`: Bật/tắt chorus.
|
| 38 |
-
- `--chorus_depth`, `--chorus_rate`, `--chorus_mix`, `--chorus_delay`, `--chorus_feedback`: Các thông số điều chỉnh chorus.
|
| 39 |
-
|
| 40 |
-
4. Hiệu ứng distortion:
|
| 41 |
-
- `--distortion`: Bật/tắt distortion.
|
| 42 |
-
- `--drive_db`: Mức độ méo âm thanh.
|
| 43 |
-
|
| 44 |
-
5. Hiệu ứng reverb:
|
| 45 |
-
- `--reverb`: Bật/tắt hồi âm.
|
| 46 |
-
- `--reverb_room_size`, `--reverb_damping`, `--reverb_wet_level`, `--reverb_dry_level`, `--reverb_width`, `--reverb_freeze_mode`: Điều chỉnh hồi âm.
|
| 47 |
-
|
| 48 |
-
6. Hiệu ứng pitch shift:
|
| 49 |
-
- `--pitchshift`: Bật/tắt thay đổi cao độ.
|
| 50 |
-
- `--pitch_shift`: Giá trị dịch cao độ.
|
| 51 |
-
|
| 52 |
-
7. Hiệu ứng delay:
|
| 53 |
-
- `--delay`: Bật/tắt delay.
|
| 54 |
-
- `--delay_seconds`, `--delay_feedback`, `--delay_mix`: Điều chỉnh thời gian trễ, phản hồi và hòa trộn.
|
| 55 |
-
|
| 56 |
-
8. Compressor:
|
| 57 |
-
- `--compressor`: Bật/tắt compressor.
|
| 58 |
-
- `--compressor_threshold`, `--compressor_ratio`, `--compressor_attack_ms`, `--compressor_release_ms`: Các thông số nén.
|
| 59 |
-
|
| 60 |
-
9. Limiter:
|
| 61 |
-
- `--limiter`: Bật/tắt giới hạn mức âm thanh.
|
| 62 |
-
- `--limiter_threshold`, `--limiter_release`: Ngưỡng giới hạn và thời gian nhả.
|
| 63 |
-
|
| 64 |
-
10. Gain (Khuếch đại):
|
| 65 |
-
- `--gain`: Bật/tắt gain.
|
| 66 |
-
- `--gain_db`: Mức gain (dB).
|
| 67 |
-
|
| 68 |
-
11. Bitcrush:
|
| 69 |
-
- `--bitcrush`: Bật/tắt hiệu ứng giảm độ phân giải.
|
| 70 |
-
- `--bitcrush_bit_depth`: Số bit của bitcrush.
|
| 71 |
-
|
| 72 |
-
12. Clipping:
|
| 73 |
-
- `--clipping`: Bật/tắt cắt âm thanh.
|
| 74 |
-
- `--clipping_threshold`: Ngưỡng clipping.
|
| 75 |
-
|
| 76 |
-
13. Phaser:
|
| 77 |
-
- `--phaser`: Bật/tắt hiệu ứng phaser.
|
| 78 |
-
- `--phaser_rate_hz`, `--phaser_depth`, `--phaser_centre_frequency_hz`, `--phaser_feedback`, `--phaser_mix`: Điều chỉnh hiệu ứng phaser.
|
| 79 |
-
|
| 80 |
-
14. Boost bass & treble:
|
| 81 |
-
- `--treble_bass_boost`: Bật/tắt tăng cường âm bass và treble.
|
| 82 |
-
- `--bass_boost_db`, `--bass_boost_frequency`, `--treble_boost_db`, `--treble_boost_frequency`: Các thông số tăng bass và treble.
|
| 83 |
-
|
| 84 |
-
15. Fade in & fade out:
|
| 85 |
-
- `--fade_in_out`: Bật/tắt hiệu ứng fade.
|
| 86 |
-
- `--fade_in_duration`, `--fade_out_duration`: Thời gian fade vào/ra.
|
| 87 |
-
|
| 88 |
-
16. Kết hợp âm thanh:
|
| 89 |
-
- `--audio_combination`: Bật/tắt ghép nhiều tệp âm thanh.
|
| 90 |
-
- `--audio_combination_input`: Đường dẫn tệp âm thanh bổ sung.
|
| 91 |
-
- `--main_volume`: Âm lượng của âm thanh chính.
|
| 92 |
-
- `--combination_volume`:: Âm lượng của âm thanh cần kết hợp.
|
| 93 |
-
""")
|
| 94 |
-
quit()
|
| 95 |
-
elif argv_is_allows[9] in argv:
|
| 96 |
-
print("""Các tham số của --convert:
|
| 97 |
-
1. Cấu hình xử lý giọng nói:
|
| 98 |
-
- `--pitch` (mặc định: `0`): Điều chỉnh cao độ.
|
| 99 |
-
- `--filter_radius` (mặc định: `3`): Độ mượt của đường F0.
|
| 100 |
-
- `--index_rate` (mặc định: `0.5`): Tỷ lệ sử dụng chỉ mục giọng nói.
|
| 101 |
-
- `--rms_mix_rate` (mặc định: `1`): Hệ số điều chỉnh biên độ âm lượng.
|
| 102 |
-
- `--protect` (mặc định: `0.33`): Bảo vệ phụ âm.
|
| 103 |
-
|
| 104 |
-
2. Cấu hình mẫu (frame hop):
|
| 105 |
-
- `--hop_length` (mặc định: `64`): Bước nhảy khi xử lý âm thanh.
|
| 106 |
-
|
| 107 |
-
3. Cấu hình F0:
|
| 108 |
-
- `--f0_method` (mặc định: `rmvpe`): Phương pháp dự đoán F0 (`pm`, `dio`, `mangio-crepe-tiny`, `mangio-crepe-small`, `mangio-crepe-medium`, `mangio-crepe-large`, `mangio-crepe-full`, `crepe-tiny`, `crepe-small`, `crepe-medium`, `crepe-large`, `crepe-full`, `fcpe`, `fcpe-legacy`, `rmvpe`, `rmvpe-legacy`, `harvest`, `yin`, `pyin`, `swipe`).
|
| 109 |
-
- `--f0_autotune` (mặc định: `False`): Có tự động điều chỉnh F0 hay không.
|
| 110 |
-
- `--f0_autotune_strength` (mặc định: `1`): Cường độ hiệu chỉnh tự động F0.
|
| 111 |
-
- `--f0_file` (mặc định: ``): Đường dẫn tệp F0 có sẵn.
|
| 112 |
-
- `--f0_onnx` (mặc định: `False`): Có sử dụng phiên bản ONNX của F0 hay không.
|
| 113 |
-
- `--proposal_pitch` (mặc định: `False`): Đề xuất cao độ thay vì điều chỉnh thủ công.
|
| 114 |
-
- `--proposal_pitch_threshold` (mặc định: `255.0`): Tần số ước tính cao độ.
|
| 115 |
-
|
| 116 |
-
4. Mô hình nhúng:
|
| 117 |
-
- `--embedder_model` (mặc định: `contentvec_base`): Mô hình nhúng sử dụng.
|
| 118 |
-
- `--embedders_mode` (mặc định: `fairseq`): Chế độ nhúng (`fairseq`, `transformers`, `onnx`).
|
| 119 |
-
|
| 120 |
-
5. Đường dẫn tệp:
|
| 121 |
-
- `--input_path` (bắt buộc): Đường dẫn tệp âm thanh đầu vào.
|
| 122 |
-
- `--output_path` (mặc định: `./audios/output.wav`): Đường dẫn lưu tệp đầu ra.
|
| 123 |
-
- `--export_format` (mặc định: `wav`): Định dạng xuất tệp.
|
| 124 |
-
- `--pth_path` (bắt buộc): Đường dẫn đến tệp mô hình `.pth`.
|
| 125 |
-
- `--index_path` (mặc định: `None`): Đường dẫn tệp chỉ mục (nếu có).
|
| 126 |
-
|
| 127 |
-
6. Làm sạch âm thanh:
|
| 128 |
-
- `--clean_audio` (mặc định: `False`): Có áp dụng làm sạch âm thanh không.
|
| 129 |
-
- `--clean_strength` (mặc định: `0.7`): Mức độ làm sạch.
|
| 130 |
-
|
| 131 |
-
7. Resampling & chia nhỏ âm thanh:
|
| 132 |
-
- `--resample_sr` (mặc định: `0`): Tần số lấy mẫu mới (0 nghĩa là giữ nguyên).
|
| 133 |
-
- `--split_audio` (mặc định: `False`): Có chia nhỏ audio trước khi xử lý không.
|
| 134 |
-
|
| 135 |
-
8. Kiểm tra & tối ưu hóa:
|
| 136 |
-
- `--checkpointing` (mặc định: `False`): Bật/tắt checkpointing để tiết kiệm RAM.
|
| 137 |
-
|
| 138 |
-
9. Dịch formant:
|
| 139 |
-
- `--formant_shifting` (mặc định: `False`): Có bật hiệu ứng dịch formant không.
|
| 140 |
-
- `--formant_qfrency` (mặc định: `0.8`): Hệ số dịch formant theo tần số.
|
| 141 |
-
- `--formant_timbre` (mặc định: `0.8`): Hệ số thay đổi màu sắc giọng.
|
| 142 |
-
""")
|
| 143 |
-
quit()
|
| 144 |
-
elif argv_is_allows[10] in argv:
|
| 145 |
-
print("""Các tham số của --create_dataset:
|
| 146 |
-
1. Đường dẫn & cấu hình dataset:
|
| 147 |
-
- `--input_audio` (bắt buộc): Đường dẫn liên kết đến âm thanh (Liên kết Youtube, có thể dùng dấu `,` để dùng nhiều liên kết).
|
| 148 |
-
- `--output_dataset` (mặc định: `./dataset`): Thư mục xuất dữ liệu đầu ra.
|
| 149 |
-
- `--sample_rate` (mặc định: `44100`): Tần số lấy mẫu cho âm thanh.
|
| 150 |
-
|
| 151 |
-
2. Làm sạch dữ liệu:
|
| 152 |
-
- `--clean_dataset` (mặc định: `False`): Có áp dụng làm sạch dữ liệu hay không.
|
| 153 |
-
- `--clean_strength` (mặc định: `0.7`): Mức độ làm sạch dữ liệu.
|
| 154 |
-
|
| 155 |
-
3. Tách giọng & hiệu ứng:
|
| 156 |
-
- `--separator_reverb` (mặc định: `False`): Có tách vang giọng không.
|
| 157 |
-
- `--kim_vocal_version` (mặc định: `2`): Phiên bản mô hình Kim Vocal để tách (`1`, `2`).
|
| 158 |
-
|
| 159 |
-
4. Cấu hình phân đoạn âm thanh:
|
| 160 |
-
- `--overlap` (mặc định: `0.25`): Mức độ chồng lấn giữa các đoạn khi tách.
|
| 161 |
-
- `--segments_size` (mặc định: `256`): Kích thước của từng phân đoạn.
|
| 162 |
-
|
| 163 |
-
5. Cấu hình MDX (Music Demixing):
|
| 164 |
-
- `--mdx_hop_length` (mặc định: `1024`): Bước nhảy MDX khi xử lý.
|
| 165 |
-
- `--mdx_batch_size` (mặc định: `1`): Kích thước batch khi xử lý MDX.
|
| 166 |
-
- `--denoise_mdx` (mặc định: `False`): Có áp dụng khử nhiễu khi tách bằng MDX không.
|
| 167 |
-
|
| 168 |
-
6. Bỏ qua phần âm thanh:
|
| 169 |
-
- `--skip` (mặc định: `False`): Có bỏ qua giây âm thanh nào không.
|
| 170 |
-
- `--skip_start_audios` (mặc định: `0`): Thời gian (giây) cần bỏ qua ở đầu audio.
|
| 171 |
-
- `--skip_end_audios` (mặc định: `0`): Thời gian (giây) cần bỏ qua ở cuối audio.
|
| 172 |
-
""")
|
| 173 |
-
quit()
|
| 174 |
-
elif argv_is_allows[11] in argv:
|
| 175 |
-
print("""Các tham số của --create_index:
|
| 176 |
-
1. Thông tin mô hình:
|
| 177 |
-
- `--model_name` (bắt buộc): Tên mô hình.
|
| 178 |
-
- `--rvc_version` (mặc định: `v2`): Phiên bản (`v1`, `v2`).
|
| 179 |
-
- `--index_algorithm` (mặc định: `Auto`): Thuật toán index sử dụng (`Auto`, `Faiss`, `KMeans`).
|
| 180 |
-
""")
|
| 181 |
-
quit()
|
| 182 |
-
elif argv_is_allows[12] in argv:
|
| 183 |
-
print("""Các tham số của --extract:
|
| 184 |
-
1. Thông tin mô hình:
|
| 185 |
-
- `--model_name` (bắt buộc): Tên mô hình.
|
| 186 |
-
- `--rvc_version` (mặc định: `v2`): Phiên bản RVC (`v1`, `v2`).
|
| 187 |
-
|
| 188 |
-
2. Cấu hình F0:
|
| 189 |
-
- `--f0_method` (mặc định: `rmvpe`): Phương pháp dự đoán F0 (`pm`, `dio`, `mangio-crepe-tiny`, `mangio-crepe-small`, `mangio-crepe-medium`, `mangio-crepe-large`, `mangio-crepe-full`, `crepe-tiny`, `crepe-small`, `crepe-medium`, `crepe-large`, `crepe-full`, `fcpe`, `fcpe-legacy`, `rmvpe`, `rmvpe-legacy`, `harvest`, `yin`, `pyin`, `swipe`).
|
| 190 |
-
- `--pitch_guidance` (mặc định: `True`): Có sử dụng hướng dẫn cao độ hay không.
|
| 191 |
-
- `--f0_autotune` (mặc định: `False`): Có tự động điều chỉnh F0 hay không.
|
| 192 |
-
- `--f0_autotune_strength` (mặc định: `1`): Cường độ hiệu chỉnh tự động F0.
|
| 193 |
-
|
| 194 |
-
3. Cấu hình xử lý:
|
| 195 |
-
- `--hop_length` (mặc định: `128`): Độ dài bước nhảy trong quá trình xử lý.
|
| 196 |
-
- `--cpu_cores` (mặc định: `2`): Số lượng luồng CPU sử dụng.
|
| 197 |
-
- `--gpu` (mặc định: `-`): Chỉ định GPU sử dụng (ví dụ: `0` cho GPU đầu tiên, `-` để tắt GPU).
|
| 198 |
-
- `--sample_rate` (bắt buộc): Tần số lấy mẫu của âm thanh đầu vào.
|
| 199 |
-
|
| 200 |
-
4. Cấu hình nhúng:
|
| 201 |
-
- `--embedder_model` (mặc định: `contentvec_base`): Tên mô hình nhúng.
|
| 202 |
-
- `--f0_onnx` (mặc định: `False`): Có sử dụng phiên bản ONNX của F0 hay không.
|
| 203 |
-
- `--embedders_mode` (mặc định: `fairseq`): Chế độ nhúng (`fairseq`, `transformers`, `onnx`).
|
| 204 |
-
|
| 205 |
-
4. RMS:
|
| 206 |
-
- `--rms_extract` (mặc định: False): Trích xuất thêm năng lượng rms.
|
| 207 |
-
""")
|
| 208 |
-
quit()
|
| 209 |
-
elif argv_is_allows[13] in argv:
|
| 210 |
-
print("""Các tham số của --preprocess:
|
| 211 |
-
1. Thông tin mô hình:
|
| 212 |
-
- `--model_name` (bắt buộc): Tên mô hình.
|
| 213 |
-
|
| 214 |
-
2. Cấu hình dữ liệu:
|
| 215 |
-
- `--dataset_path` (mặc định: `./dataset`): Đường dẫn thư mục chứa tệp dữ liệu.
|
| 216 |
-
- `--sample_rate` (bắt buộc): Tần số lấy mẫu của dữ liệu âm thanh.
|
| 217 |
-
|
| 218 |
-
3. Cấu hình xử lý:
|
| 219 |
-
- `--cpu_cores` (mặc định: `2`): Số lượng luồng CPU sử dụng.
|
| 220 |
-
- `--cut_preprocess` (mặc định: `True`): Có cắt tệp dữ liệu hay không.
|
| 221 |
-
- `--process_effects` (mặc định: `False`): Có áp dụng tiền xử lý hay không.
|
| 222 |
-
- `--clean_dataset` (mặc định: `False`): Có làm sạch tệp dữ liệu hay không.
|
| 223 |
-
- `--clean_strength` (mặc định: `0.7`): Độ mạnh của quá trình làm sạch dữ liệu.
|
| 224 |
-
""")
|
| 225 |
-
quit()
|
| 226 |
-
elif argv_is_allows[14] in argv:
|
| 227 |
-
print("""Các tham số của --separator_music:
|
| 228 |
-
1. Đường dẫn dữ liệu:
|
| 229 |
-
- `--input_path` (bắt buộc): Đường dẫn tệp âm thanh đầu vào.
|
| 230 |
-
- `--output_path` (mặc định: `./audios`): Thư mục lưu tệp đầu ra.
|
| 231 |
-
- `--format` (mặc định: `wav`): Định dạng xuất tệp (`wav`, `mp3`,...).
|
| 232 |
-
|
| 233 |
-
2. Cấu hình xử lý âm thanh:
|
| 234 |
-
- `--shifts` (mặc định: `2`): Số lượng dự đoán.
|
| 235 |
-
- `--segments_size` (mặc định: `256`): Kích thước phân đoạn âm thanh.
|
| 236 |
-
- `--overlap` (mặc định: `0.25`): Mức độ chồng lấn giữa các đoạn.
|
| 237 |
-
- `--mdx_hop_length` (mặc định: `1024`): Bước nhảy MDX khi xử lý.
|
| 238 |
-
- `--mdx_batch_size` (mặc định: `1`): Kích thước lô.
|
| 239 |
-
|
| 240 |
-
3. Xử lý làm sạch:
|
| 241 |
-
- `--clean_audio` (mặc định: `False`): Có làm sạch âm thanh hay không.
|
| 242 |
-
- `--clean_strength` (mặc định: `0.7`): Độ mạnh của bộ lọc làm sạch.
|
| 243 |
-
|
| 244 |
-
4. Cấu hình mô hình:
|
| 245 |
-
- `--model_name` (mặc định: `HT-Normal`): Mô hình tách nhạc (`Main_340`, `Main_390`, `Main_406`, `Main_427`, `Main_438`, `Inst_full_292`, `Inst_HQ_1`, `Inst_HQ_2`, `Inst_HQ_3`, `Inst_HQ_4`, `Inst_HQ_5`, `Kim_Vocal_1`, `Kim_Vocal_2`, `Kim_Inst`, `Inst_187_beta`, `Inst_82_beta`, `Inst_90_beta`, `Voc_FT`, `Crowd_HQ`, `Inst_1`, `Inst_2`, `Inst_3`, `MDXNET_1_9703`, `MDXNET_2_9682`, `MDXNET_3_9662`, `Inst_Main`, `MDXNET_Main`, `MDXNET_9482`, `HT-Normal`, `HT-Tuned`, `HD_MMI`, `HT_6S`).
|
| 246 |
-
- `--kara_model` (mặc định: `Version-1`): Phiên bản mô hình tách bè (`Version-1`, `Version-2`).
|
| 247 |
-
|
| 248 |
-
5. Hiệu ứng và xử lý hậu kỳ:
|
| 249 |
-
- `--backing` (mặc định: `False`): Có tách bè hay không.
|
| 250 |
-
- `--mdx_denoise` (mặc định: `False`): Có sử dụng khử nhiễu MDX hay không.
|
| 251 |
-
- `--reverb` (mặc định: `False`): Có tách vang hay không.
|
| 252 |
-
- `--backing_reverb` (mặc định: `False`): có tách vang cho giọng bè không.
|
| 253 |
-
|
| 254 |
-
6. Tần số lấy mẫu:
|
| 255 |
-
- `--sample_rate` (mặc định: `44100`): Tần số lấy mẫu của âm thanh đầu ra.
|
| 256 |
-
""")
|
| 257 |
-
quit()
|
| 258 |
-
elif argv_is_allows[15] in argv:
|
| 259 |
-
print("""Các tham số của --train:
|
| 260 |
-
1. Cấu hình mô hình:
|
| 261 |
-
- `--model_name` (bắt buộc): Tên mô hình.
|
| 262 |
-
- `--rvc_version` (mặc định: `v2`): Phiên bản RVC (`v1`, `v2`).
|
| 263 |
-
- `--model_author` (tùy chọn): Tác giả của mô hình.
|
| 264 |
-
|
| 265 |
-
2. Cấu hình lưu:
|
| 266 |
-
- `--save_every_epoch` (bắt buộc): Số kỷ nguyên giữa mỗi lần lưu.
|
| 267 |
-
- `--save_only_latest` (mặc định: `True`): Chỉ lưu điểm mới nhất.
|
| 268 |
-
- `--save_every_weights` (mặc định: `True`): Lưu tất cả trọng số của mô hình.
|
| 269 |
-
|
| 270 |
-
3. Cấu hình huấn luyện:
|
| 271 |
-
- `--total_epoch` (mặc định: `300`): Tổng số kỷ nguyên huấn luyện.
|
| 272 |
-
- `--batch_size` (mặc định: `8`): Kích thước lô trong quá trình huấn luyện.
|
| 273 |
-
- `--sample_rate` (bắt buộc): Tần số lấy mẫu của âm thanh.
|
| 274 |
-
|
| 275 |
-
4. Cấu hình thiết bị:
|
| 276 |
-
- `--gpu` (mặc định: `0`): Chỉ định GPU để sử dụng (số hoặc `-` nếu không dùng GPU).
|
| 277 |
-
- `--cache_data_in_gpu` (mặc định: `False`): Lưu dữ liệu vào GPU để tăng tốc.
|
| 278 |
-
|
| 279 |
-
5. Cấu hình huấn luyện nâng cao:
|
| 280 |
-
- `--pitch_guidance` (mặc định: `True`): Sử dụng hướng dẫn cao độ.
|
| 281 |
-
- `--g_pretrained_path` (mặc định: ``): Đường dẫn đến trọng số G đã huấn luyện trước.
|
| 282 |
-
- `--d_pretrained_path` (mặc định: ``): Đường dẫn đến trọng số D đã huấn luyện trước.
|
| 283 |
-
- `--vocoder` (mặc định: `Default`): Bộ mã hóa được sử dụng (`Default`, `MRF-HiFi-GAN`, `RefineGAN`).
|
| 284 |
-
- `--energy_use` (mặc định: `False`): Sử dụng năng lượng rms.
|
| 285 |
-
|
| 286 |
-
6. Phát hiện huấn luyện quá mức:
|
| 287 |
-
- `--overtraining_detector` (mặc định: `False`): Bật/tắt chế độ phát hiện huấn luyện quá mức.
|
| 288 |
-
- `--overtraining_threshold` (mặc định: `50`): Ngưỡng để xác định huấn luyện quá mức.
|
| 289 |
-
|
| 290 |
-
7. Xử lý dữ liệu:
|
| 291 |
-
- `--cleanup` (mặc định: `False`): Dọn dẹp tệp huấn luyện cũ để tiến hành huấn luyện lại từ đầu.
|
| 292 |
-
|
| 293 |
-
8. Tối ưu:
|
| 294 |
-
- `--checkpointing` (mặc định: `False`): Bật/tắt checkpointing để tiết kiệm RAM.
|
| 295 |
-
- `--deterministic` (mặc định: `False`): Khi bật sẽ sử dụng các thuật toán có tính xác định cao, đảm bảo rằng mỗi lần chạy cùng một dữ liệu đầu vào sẽ cho kết quả giống nhau.
|
| 296 |
-
- `--benchmark` (mặc định: `False`): Khi bật sẽ thử nghiệm và chọn thuật toán tối ưu nhất cho phần cứng và kích thước cụ thể.
|
| 297 |
-
- `--optimizer` (mặc định: `AdamW`): Trình tối ưu hóa được sử dụng (`AdamW`, `RAdam`).
|
| 298 |
-
""")
|
| 299 |
-
quit()
|
| 300 |
-
elif argv_is_allows[16] in argv:
|
| 301 |
-
print("""Sử dụng:
|
| 302 |
-
1. `--help_audio_effects`: Trợ giúp về phần thêm hiệu ứng âm thanh.
|
| 303 |
-
2. `--help_convert`: Trợ giúp về chuyển đổi âm thanh.
|
| 304 |
-
3. `--help_create_dataset`: Trợ giúp về tạo dữ liệu huấn luyện.
|
| 305 |
-
4. `--help_create_index`: Trợ giúp về tạo chỉ mục.
|
| 306 |
-
5. `--help_extract`: Trợ giúp về trích xuất dữ liệu huấn luyện.
|
| 307 |
-
6. `--help_preprocess`: Trợ giúp về xử lý trước dữ liệu.
|
| 308 |
-
7. `--help_separator_music`: Trợ giúp về tách nhạc.
|
| 309 |
-
8. `--help_train`: Trợ giúp về huấn luyện mô hình.
|
| 310 |
-
""")
|
| 311 |
-
quit()
|
| 312 |
-
|
| 313 |
-
if __name__ == "__main__":
|
| 314 |
-
import torch.multiprocessing as mp
|
| 315 |
-
|
| 316 |
-
if "--train" in argv: mp.set_start_method("spawn")
|
| 317 |
-
if "--preprocess" in argv or "--extract" in argv: mp.set_start_method("spawn", force=True)
|
| 318 |
-
|
| 319 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/run_tensorboard.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import time
|
| 4 |
-
import logging
|
| 5 |
-
import webbrowser
|
| 6 |
-
|
| 7 |
-
from tensorboard import program
|
| 8 |
-
|
| 9 |
-
sys.path.append(os.getcwd())
|
| 10 |
-
|
| 11 |
-
from main.configs.config import Config
|
| 12 |
-
|
| 13 |
-
config = Config()
|
| 14 |
-
translations = config.translations
|
| 15 |
-
|
| 16 |
-
def launch_tensorboard():
|
| 17 |
-
for l in ["root", "tensorboard"]:
|
| 18 |
-
logging.getLogger(l).setLevel(logging.ERROR)
|
| 19 |
-
|
| 20 |
-
tb = program.TensorBoard()
|
| 21 |
-
tb.configure(argv=[None, "--logdir", config.configs["logs_path"], f"--port={config.configs['tensorboard_port']}"])
|
| 22 |
-
url = tb.launch()
|
| 23 |
-
|
| 24 |
-
print(f"{translations['tensorboard_url']}: {url}")
|
| 25 |
-
if "--open" in sys.argv: webbrowser.open(url)
|
| 26 |
-
|
| 27 |
-
return f"{translations['tensorboard_url']}: {url}"
|
| 28 |
-
|
| 29 |
-
if __name__ == "__main__":
|
| 30 |
-
launch_tensorboard()
|
| 31 |
-
|
| 32 |
-
while 1:
|
| 33 |
-
time.sleep(5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/downloads/downloads.py
DELETED
|
@@ -1,119 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import translations, configs, models, model_options
|
| 9 |
-
from main.app.core.downloads import download_model, search_models, download_pretrained_model
|
| 10 |
-
from main.app.core.ui import change_download_choices, change_download_pretrained_choices, shutil_move
|
| 11 |
-
from main.app.core.process import fetch_pretrained_data, save_drop_model, update_sample_rate_dropdown
|
| 12 |
-
|
| 13 |
-
def download_tab():
|
| 14 |
-
with gr.TabItem(translations["downloads"], visible=configs.get("downloads_tab", True)):
|
| 15 |
-
gr.Markdown(translations["download_markdown"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
gr.Markdown(translations["download_markdown_2"])
|
| 18 |
-
with gr.Row():
|
| 19 |
-
with gr.Accordion(translations["model_download"], open=True):
|
| 20 |
-
with gr.Row():
|
| 21 |
-
downloadmodel = gr.Radio(label=translations["model_download_select"], choices=[translations["download_url"], translations["download_from_csv"], translations["search_models"], translations["upload"]], interactive=True, value=translations["download_url"])
|
| 22 |
-
with gr.Row():
|
| 23 |
-
gr.Markdown("___")
|
| 24 |
-
with gr.Column():
|
| 25 |
-
with gr.Row():
|
| 26 |
-
url_input = gr.Textbox(label=translations["model_url"], value="", placeholder="https://...", scale=6)
|
| 27 |
-
download_model_name = gr.Textbox(label=translations["modelname"], value="", placeholder=translations["modelname"], scale=2)
|
| 28 |
-
url_download = gr.Button(value=translations["downloads"], scale=2)
|
| 29 |
-
with gr.Column():
|
| 30 |
-
model_browser = gr.Dropdown(choices=models.keys(), label=translations["model_warehouse"], scale=8, allow_custom_value=True, visible=False)
|
| 31 |
-
download_from_browser = gr.Button(value=translations["get_model"], scale=2, variant="primary", visible=False)
|
| 32 |
-
with gr.Column():
|
| 33 |
-
search_name = gr.Textbox(label=translations["name_to_search"], placeholder=translations["modelname"], interactive=True, scale=8, visible=False)
|
| 34 |
-
search = gr.Button(translations["search_2"], scale=2, visible=False)
|
| 35 |
-
search_dropdown = gr.Dropdown(label=translations["select_download_model"], value="", choices=[], allow_custom_value=True, interactive=False, visible=False)
|
| 36 |
-
download = gr.Button(translations["downloads"], variant="primary", visible=False)
|
| 37 |
-
with gr.Column():
|
| 38 |
-
model_upload = gr.File(label=translations["drop_model"], file_types=[".pth", ".onnx", ".index", ".zip"], visible=False)
|
| 39 |
-
with gr.Row():
|
| 40 |
-
with gr.Accordion(translations["download_pretrained_2"], open=False):
|
| 41 |
-
with gr.Row():
|
| 42 |
-
pretrain_download_choices = gr.Radio(label=translations["model_download_select"], choices=[translations["download_url"], translations["list_model"], translations["upload"]], value=translations["download_url"], interactive=True)
|
| 43 |
-
with gr.Row():
|
| 44 |
-
gr.Markdown("___")
|
| 45 |
-
with gr.Column():
|
| 46 |
-
with gr.Row():
|
| 47 |
-
pretrainD = gr.Textbox(label=translations["pretrained_url"].format(dg="D"), value="", placeholder="https://...", interactive=True, scale=4)
|
| 48 |
-
pretrainG = gr.Textbox(label=translations["pretrained_url"].format(dg="G"), value="", placeholder="https://...", interactive=True, scale=4)
|
| 49 |
-
download_pretrain_button = gr.Button(translations["downloads"], scale=2)
|
| 50 |
-
with gr.Column():
|
| 51 |
-
with gr.Row():
|
| 52 |
-
pretrain_choices = gr.Dropdown(label=translations["select_pretrain"], info=translations["select_pretrain_info"], choices=list(fetch_pretrained_data().keys()), value="Titan_Medium", allow_custom_value=True, interactive=True, scale=6, visible=False)
|
| 53 |
-
sample_rate_pretrain = gr.Dropdown(label=translations["pretrain_sr"], info=translations["pretrain_sr"], choices=["48k", "40k", "32k"], value="48k", interactive=True, visible=False)
|
| 54 |
-
download_pretrain_choices_button = gr.Button(translations["downloads"], scale=2, variant="primary", visible=False)
|
| 55 |
-
with gr.Row():
|
| 56 |
-
pretrain_upload_g = gr.File(label=translations["drop_pretrain"].format(dg="G"), file_types=[".pth"], visible=False)
|
| 57 |
-
pretrain_upload_d = gr.File(label=translations["drop_pretrain"].format(dg="D"), file_types=[".pth"], visible=False)
|
| 58 |
-
with gr.Row():
|
| 59 |
-
url_download.click(
|
| 60 |
-
fn=download_model,
|
| 61 |
-
inputs=[
|
| 62 |
-
url_input,
|
| 63 |
-
download_model_name
|
| 64 |
-
],
|
| 65 |
-
outputs=[url_input],
|
| 66 |
-
api_name="download_model"
|
| 67 |
-
)
|
| 68 |
-
download_from_browser.click(
|
| 69 |
-
fn=lambda model: download_model(models[model], model),
|
| 70 |
-
inputs=[model_browser],
|
| 71 |
-
outputs=[model_browser],
|
| 72 |
-
api_name="download_browser"
|
| 73 |
-
)
|
| 74 |
-
with gr.Row():
|
| 75 |
-
downloadmodel.change(fn=change_download_choices, inputs=[downloadmodel], outputs=[url_input, download_model_name, url_download, model_browser, download_from_browser, search_name, search, search_dropdown, download, model_upload])
|
| 76 |
-
search.click(fn=search_models, inputs=[search_name], outputs=[search_dropdown, download])
|
| 77 |
-
model_upload.upload(fn=save_drop_model, inputs=[model_upload], outputs=[model_upload])
|
| 78 |
-
download.click(
|
| 79 |
-
fn=lambda model: download_model(model_options[model], model),
|
| 80 |
-
inputs=[search_dropdown],
|
| 81 |
-
outputs=[search_dropdown],
|
| 82 |
-
api_name="search_models"
|
| 83 |
-
)
|
| 84 |
-
with gr.Row():
|
| 85 |
-
pretrain_download_choices.change(fn=change_download_pretrained_choices, inputs=[pretrain_download_choices], outputs=[pretrainD, pretrainG, download_pretrain_button, pretrain_choices, sample_rate_pretrain, download_pretrain_choices_button, pretrain_upload_d, pretrain_upload_g])
|
| 86 |
-
pretrain_choices.change(fn=update_sample_rate_dropdown, inputs=[pretrain_choices], outputs=[sample_rate_pretrain])
|
| 87 |
-
with gr.Row():
|
| 88 |
-
download_pretrain_button.click(
|
| 89 |
-
fn=download_pretrained_model,
|
| 90 |
-
inputs=[
|
| 91 |
-
pretrain_download_choices,
|
| 92 |
-
pretrainD,
|
| 93 |
-
pretrainG
|
| 94 |
-
],
|
| 95 |
-
outputs=[pretrainD, pretrainG],
|
| 96 |
-
api_name="download_pretrain_link"
|
| 97 |
-
)
|
| 98 |
-
download_pretrain_choices_button.click(
|
| 99 |
-
fn=download_pretrained_model,
|
| 100 |
-
inputs=[
|
| 101 |
-
pretrain_download_choices,
|
| 102 |
-
pretrain_choices,
|
| 103 |
-
sample_rate_pretrain
|
| 104 |
-
],
|
| 105 |
-
outputs=[pretrain_choices],
|
| 106 |
-
api_name="download_pretrain_choices"
|
| 107 |
-
)
|
| 108 |
-
pretrain_upload_g.upload(
|
| 109 |
-
fn=lambda pretrain_upload_g: shutil_move(pretrain_upload_g.name, configs["pretrained_custom_path"]),
|
| 110 |
-
inputs=[pretrain_upload_g],
|
| 111 |
-
outputs=[],
|
| 112 |
-
api_name="upload_pretrain_g"
|
| 113 |
-
)
|
| 114 |
-
pretrain_upload_d.upload(
|
| 115 |
-
fn=lambda pretrain_upload_d: shutil_move(pretrain_upload_d.name, configs["pretrained_custom_path"]),
|
| 116 |
-
inputs=[pretrain_upload_d],
|
| 117 |
-
outputs=[],
|
| 118 |
-
api_name="upload_pretrain_d"
|
| 119 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/editing/child/audio_effects.py
DELETED
|
@@ -1,393 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.editing import audio_effects
|
| 9 |
-
from main.app.core.presets import audio_effect_load_presets, audio_effect_save_presets
|
| 10 |
-
from main.app.core.ui import visible, change_audios_choices, change_effect_preset_choices, shutil_move
|
| 11 |
-
from main.app.variables import translations, paths_for_files, sample_rate_choice, audio_effect_presets_file, configs
|
| 12 |
-
|
| 13 |
-
def audio_effects_tab():
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["audio_effects_edit"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
with gr.Column():
|
| 18 |
-
with gr.Row():
|
| 19 |
-
reverb_check_box = gr.Checkbox(label=translations["reverb"], value=False, interactive=True)
|
| 20 |
-
chorus_check_box = gr.Checkbox(label=translations["chorus"], value=False, interactive=True)
|
| 21 |
-
delay_check_box = gr.Checkbox(label=translations["delay"], value=False, interactive=True)
|
| 22 |
-
phaser_check_box = gr.Checkbox(label=translations["phaser"], value=False, interactive=True)
|
| 23 |
-
compressor_check_box = gr.Checkbox(label=translations["compressor"], value=False, interactive=True)
|
| 24 |
-
more_options = gr.Checkbox(label=translations["more_option"], value=False, interactive=True)
|
| 25 |
-
with gr.Row():
|
| 26 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 27 |
-
with gr.Row():
|
| 28 |
-
upload_audio = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
|
| 29 |
-
with gr.Row():
|
| 30 |
-
audio_in_path = gr.Dropdown(label=translations["input_audio"], value="", choices=paths_for_files, info=translations["provide_audio"], interactive=True, allow_custom_value=True)
|
| 31 |
-
audio_out_path = gr.Textbox(label=translations["output_audio"], value="audios/audio_effects.wav", placeholder="audios/audio_effects.wav", info=translations["provide_output"], interactive=True)
|
| 32 |
-
with gr.Row():
|
| 33 |
-
with gr.Column():
|
| 34 |
-
audio_combination = gr.Checkbox(label=translations["merge_instruments"], value=False, interactive=True)
|
| 35 |
-
audio_combination_input = gr.Dropdown(label=translations["input_audio"], value="", choices=paths_for_files, info=translations["provide_audio"], interactive=True, allow_custom_value=True, visible=audio_combination.value)
|
| 36 |
-
with gr.Row():
|
| 37 |
-
main_vol = gr.Slider(minimum=-80, maximum=80, label=translations["main_volume"], info=translations["main_volume_info"], value=-4, step=1, interactive=True, visible=audio_combination.value)
|
| 38 |
-
combine_vol = gr.Slider(minimum=-80, maximum=80, label=translations["combination_volume"], info=translations["combination_volume_info"], value=-7, step=1, interactive=True, visible=audio_combination.value)
|
| 39 |
-
with gr.Row():
|
| 40 |
-
audio_effects_refresh = gr.Button(translations["refresh"])
|
| 41 |
-
with gr.Row():
|
| 42 |
-
audio_output_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
|
| 43 |
-
with gr.Row():
|
| 44 |
-
with gr.Accordion(translations["use_presets"], open=False):
|
| 45 |
-
with gr.Row():
|
| 46 |
-
presets_name = gr.Dropdown(label=translations["file_preset"], choices=audio_effect_presets_file, value=audio_effect_presets_file[0] if len(audio_effect_presets_file) > 0 else '', interactive=True, allow_custom_value=True)
|
| 47 |
-
with gr.Row():
|
| 48 |
-
load_click = gr.Button(translations["load_file"], variant="primary")
|
| 49 |
-
refresh_click = gr.Button(translations["refresh"])
|
| 50 |
-
with gr.Accordion(translations["export_file"], open=False):
|
| 51 |
-
with gr.Row():
|
| 52 |
-
with gr.Column():
|
| 53 |
-
name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
|
| 54 |
-
save_file_button = gr.Button(translations["export_file"])
|
| 55 |
-
with gr.Row():
|
| 56 |
-
upload_presets = gr.File(label=translations["upload_presets"], file_types=[".effect.json"])
|
| 57 |
-
with gr.Row():
|
| 58 |
-
apply_effects_button = gr.Button(translations["apply"], variant="primary", scale=2)
|
| 59 |
-
with gr.Row():
|
| 60 |
-
with gr.Column():
|
| 61 |
-
with gr.Row():
|
| 62 |
-
with gr.Accordion(translations["reverb"], open=False, visible=reverb_check_box.value) as reverb_accordion:
|
| 63 |
-
reverb_freeze_mode = gr.Checkbox(label=translations["reverb_freeze"], info=translations["reverb_freeze_info"], value=False, interactive=True)
|
| 64 |
-
reverb_room_size = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.15, label=translations["room_size"], info=translations["room_size_info"], interactive=True)
|
| 65 |
-
reverb_damping = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label=translations["damping"], info=translations["damping_info"], interactive=True)
|
| 66 |
-
reverb_wet_level = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.2, label=translations["wet_level"], info=translations["wet_level_info"], interactive=True)
|
| 67 |
-
reverb_dry_level = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label=translations["dry_level"], info=translations["dry_level_info"], interactive=True)
|
| 68 |
-
reverb_width = gr.Slider(minimum=0, maximum=1, step=0.01, value=1, label=translations["width"], info=translations["width_info"], interactive=True)
|
| 69 |
-
with gr.Row():
|
| 70 |
-
with gr.Accordion(translations["chorus"], open=False, visible=chorus_check_box.value) as chorus_accordion:
|
| 71 |
-
chorus_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_depth"], info=translations["chorus_depth_info"], interactive=True)
|
| 72 |
-
chorus_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1.5, label=translations["chorus_rate_hz"], info=translations["chorus_rate_hz_info"], interactive=True)
|
| 73 |
-
chorus_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["chorus_mix"], info=translations["chorus_mix_info"], interactive=True)
|
| 74 |
-
chorus_centre_delay_ms = gr.Slider(minimum=0, maximum=50, step=1, value=10, label=translations["chorus_centre_delay_ms"], info=translations["chorus_centre_delay_ms_info"], interactive=True)
|
| 75 |
-
chorus_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["chorus_feedback"], info=translations["chorus_feedback_info"], interactive=True)
|
| 76 |
-
with gr.Row():
|
| 77 |
-
with gr.Accordion(translations["delay"], open=False, visible=delay_check_box.value) as delay_accordion:
|
| 78 |
-
delay_second = gr.Slider(minimum=0, maximum=5, step=0.01, value=0.5, label=translations["delay_seconds"], info=translations["delay_seconds_info"], interactive=True)
|
| 79 |
-
delay_feedback = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_feedback"], info=translations["delay_feedback_info"], interactive=True)
|
| 80 |
-
delay_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["delay_mix"], info=translations["delay_mix_info"], interactive=True)
|
| 81 |
-
with gr.Column():
|
| 82 |
-
with gr.Row():
|
| 83 |
-
with gr.Accordion(translations["more_option"], open=False, visible=more_options.value) as more_accordion:
|
| 84 |
-
with gr.Row():
|
| 85 |
-
fade = gr.Checkbox(label=translations["fade"], value=False, interactive=True)
|
| 86 |
-
bass_or_treble = gr.Checkbox(label=translations["bass_or_treble"], value=False, interactive=True)
|
| 87 |
-
limiter = gr.Checkbox(label=translations["limiter"], value=False, interactive=True)
|
| 88 |
-
resample_checkbox = gr.Checkbox(label=translations["resample"], value=False, interactive=True)
|
| 89 |
-
with gr.Row():
|
| 90 |
-
distortion_checkbox = gr.Checkbox(label=translations["distortion"], value=False, interactive=True)
|
| 91 |
-
gain_checkbox = gr.Checkbox(label=translations["gain"], value=False, interactive=True)
|
| 92 |
-
bitcrush_checkbox = gr.Checkbox(label=translations["bitcrush"], value=False, interactive=True)
|
| 93 |
-
clipping_checkbox = gr.Checkbox(label=translations["clipping"], value=False, interactive=True)
|
| 94 |
-
with gr.Accordion(translations["fade"], open=True, visible=fade.value) as fade_accordion:
|
| 95 |
-
with gr.Row():
|
| 96 |
-
fade_in = gr.Slider(minimum=0, maximum=10000, step=100, value=0, label=translations["fade_in"], info=translations["fade_in_info"], interactive=True)
|
| 97 |
-
fade_out = gr.Slider(minimum=0, maximum=10000, step=100, value=0, label=translations["fade_out"], info=translations["fade_out_info"], interactive=True)
|
| 98 |
-
with gr.Accordion(translations["bass_or_treble"], open=True, visible=bass_or_treble.value) as bass_treble_accordion:
|
| 99 |
-
with gr.Row():
|
| 100 |
-
bass_boost = gr.Slider(minimum=0, maximum=20, step=1, value=0, label=translations["bass_boost"], info=translations["bass_boost_info"], interactive=True)
|
| 101 |
-
bass_frequency = gr.Slider(minimum=20, maximum=200, step=10, value=100, label=translations["bass_frequency"], info=translations["bass_frequency_info"], interactive=True)
|
| 102 |
-
with gr.Row():
|
| 103 |
-
treble_boost = gr.Slider(minimum=0, maximum=20, step=1, value=0, label=translations["treble_boost"], info=translations["treble_boost_info"], interactive=True)
|
| 104 |
-
treble_frequency = gr.Slider(minimum=1000, maximum=10000, step=500, value=3000, label=translations["treble_frequency"], info=translations["treble_frequency_info"], interactive=True)
|
| 105 |
-
with gr.Accordion(translations["limiter"], open=True, visible=limiter.value) as limiter_accordion:
|
| 106 |
-
with gr.Row():
|
| 107 |
-
limiter_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-1, label=translations["limiter_threshold_db"], info=translations["limiter_threshold_db_info"], interactive=True)
|
| 108 |
-
limiter_release_ms = gr.Slider(minimum=10, maximum=1000, step=1, value=100, label=translations["limiter_release_ms"], info=translations["limiter_release_ms_info"], interactive=True)
|
| 109 |
-
with gr.Column():
|
| 110 |
-
pitch_shift_semitones = gr.Slider(minimum=-20, maximum=20, step=1, value=0, label=translations["pitch"], info=translations["pitch_info"], interactive=True)
|
| 111 |
-
audio_effect_resample_sr = gr.Radio(choices=[0]+sample_rate_choice, value=0, label=translations["resample"], info=translations["resample_info"], interactive=True, visible=resample_checkbox.value)
|
| 112 |
-
distortion_drive_db = gr.Slider(minimum=0, maximum=50, step=1, value=20, label=translations["distortion"], info=translations["distortion_info"], interactive=True, visible=distortion_checkbox.value)
|
| 113 |
-
gain_db = gr.Slider(minimum=-60, maximum=60, step=1, value=0, label=translations["gain"], info=translations["gain_info"], interactive=True, visible=gain_checkbox.value)
|
| 114 |
-
clipping_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-1, label=translations["clipping_threshold_db"], info=translations["clipping_threshold_db_info"], interactive=True, visible=clipping_checkbox.value)
|
| 115 |
-
bitcrush_bit_depth = gr.Slider(minimum=1, maximum=24, step=1, value=16, label=translations["bitcrush_bit_depth"], info=translations["bitcrush_bit_depth_info"], interactive=True, visible=bitcrush_checkbox.value)
|
| 116 |
-
with gr.Row():
|
| 117 |
-
with gr.Accordion(translations["phaser"], open=False, visible=phaser_check_box.value) as phaser_accordion:
|
| 118 |
-
phaser_depth = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_depth"], info=translations["phaser_depth_info"], interactive=True)
|
| 119 |
-
phaser_rate_hz = gr.Slider(minimum=0.1, maximum=10, step=0.1, value=1, label=translations["phaser_rate_hz"], info=translations["phaser_rate_hz_info"], interactive=True)
|
| 120 |
-
phaser_mix = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label=translations["phaser_mix"], info=translations["phaser_mix_info"], interactive=True)
|
| 121 |
-
phaser_centre_frequency_hz = gr.Slider(minimum=50, maximum=5000, step=10, value=1000, label=translations["phaser_centre_frequency_hz"], info=translations["phaser_centre_frequency_hz_info"], interactive=True)
|
| 122 |
-
phaser_feedback = gr.Slider(minimum=-1, maximum=1, step=0.01, value=0, label=translations["phaser_feedback"], info=translations["phaser_feedback_info"], interactive=True)
|
| 123 |
-
with gr.Row():
|
| 124 |
-
with gr.Accordion(translations["compressor"], open=False, visible=compressor_check_box.value) as compressor_accordion:
|
| 125 |
-
compressor_threshold_db = gr.Slider(minimum=-60, maximum=0, step=1, value=-20, label=translations["compressor_threshold_db"], info=translations["compressor_threshold_db_info"], interactive=True)
|
| 126 |
-
compressor_ratio = gr.Slider(minimum=1, maximum=20, step=0.1, value=1, label=translations["compressor_ratio"], info=translations["compressor_ratio_info"], interactive=True)
|
| 127 |
-
compressor_attack_ms = gr.Slider(minimum=0.1, maximum=100, step=0.1, value=10, label=translations["compressor_attack_ms"], info=translations["compressor_attack_ms_info"], interactive=True)
|
| 128 |
-
compressor_release_ms = gr.Slider(minimum=10, maximum=1000, step=1, value=100, label=translations["compressor_release_ms"], info=translations["compressor_release_ms_info"], interactive=True)
|
| 129 |
-
with gr.Row():
|
| 130 |
-
gr.Markdown(translations["output_audio"])
|
| 131 |
-
with gr.Row():
|
| 132 |
-
audio_play_input = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 133 |
-
audio_play_output = gr.Audio(show_download_button=True, interactive=False, label=translations["output_audio"])
|
| 134 |
-
with gr.Row():
|
| 135 |
-
reverb_check_box.change(fn=visible, inputs=[reverb_check_box], outputs=[reverb_accordion])
|
| 136 |
-
chorus_check_box.change(fn=visible, inputs=[chorus_check_box], outputs=[chorus_accordion])
|
| 137 |
-
delay_check_box.change(fn=visible, inputs=[delay_check_box], outputs=[delay_accordion])
|
| 138 |
-
with gr.Row():
|
| 139 |
-
compressor_check_box.change(fn=visible, inputs=[compressor_check_box], outputs=[compressor_accordion])
|
| 140 |
-
phaser_check_box.change(fn=visible, inputs=[phaser_check_box], outputs=[phaser_accordion])
|
| 141 |
-
more_options.change(fn=visible, inputs=[more_options], outputs=[more_accordion])
|
| 142 |
-
with gr.Row():
|
| 143 |
-
fade.change(fn=visible, inputs=[fade], outputs=[fade_accordion])
|
| 144 |
-
bass_or_treble.change(fn=visible, inputs=[bass_or_treble], outputs=[bass_treble_accordion])
|
| 145 |
-
limiter.change(fn=visible, inputs=[limiter], outputs=[limiter_accordion])
|
| 146 |
-
resample_checkbox.change(fn=visible, inputs=[resample_checkbox], outputs=[audio_effect_resample_sr])
|
| 147 |
-
with gr.Row():
|
| 148 |
-
distortion_checkbox.change(fn=visible, inputs=[distortion_checkbox], outputs=[distortion_drive_db])
|
| 149 |
-
gain_checkbox.change(fn=visible, inputs=[gain_checkbox], outputs=[gain_db])
|
| 150 |
-
clipping_checkbox.change(fn=visible, inputs=[clipping_checkbox], outputs=[clipping_threshold_db])
|
| 151 |
-
bitcrush_checkbox.change(fn=visible, inputs=[bitcrush_checkbox], outputs=[bitcrush_bit_depth])
|
| 152 |
-
with gr.Row():
|
| 153 |
-
upload_audio.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[upload_audio], outputs=[audio_in_path])
|
| 154 |
-
audio_in_path.change(fn=lambda audio: audio if audio else None, inputs=[audio_in_path], outputs=[audio_play_input])
|
| 155 |
-
audio_effects_refresh.click(fn=lambda a, b: [change_audios_choices(a), change_audios_choices(b)], inputs=[audio_in_path, audio_combination_input], outputs=[audio_in_path, audio_combination_input])
|
| 156 |
-
with gr.Row():
|
| 157 |
-
more_options.change(fn=lambda: [False]*8, inputs=[], outputs=[fade, bass_or_treble, limiter, resample_checkbox, distortion_checkbox, gain_checkbox, clipping_checkbox, bitcrush_checkbox])
|
| 158 |
-
audio_combination.change(fn=visible, inputs=[audio_combination], outputs=[audio_combination_input])
|
| 159 |
-
audio_combination.change(fn=lambda a: [visible(a)]*2, inputs=[audio_combination], outputs=[main_vol, combine_vol])
|
| 160 |
-
with gr.Row():
|
| 161 |
-
upload_presets.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["presets_path"]), inputs=[upload_presets], outputs=[presets_name])
|
| 162 |
-
refresh_click.click(fn=change_effect_preset_choices, inputs=[], outputs=[presets_name])
|
| 163 |
-
with gr.Row():
|
| 164 |
-
load_click.click(
|
| 165 |
-
fn=audio_effect_load_presets,
|
| 166 |
-
inputs=[
|
| 167 |
-
presets_name,
|
| 168 |
-
resample_checkbox,
|
| 169 |
-
audio_effect_resample_sr,
|
| 170 |
-
chorus_depth,
|
| 171 |
-
chorus_rate_hz,
|
| 172 |
-
chorus_mix,
|
| 173 |
-
chorus_centre_delay_ms,
|
| 174 |
-
chorus_feedback,
|
| 175 |
-
distortion_drive_db,
|
| 176 |
-
reverb_room_size,
|
| 177 |
-
reverb_damping,
|
| 178 |
-
reverb_wet_level,
|
| 179 |
-
reverb_dry_level,
|
| 180 |
-
reverb_width,
|
| 181 |
-
reverb_freeze_mode,
|
| 182 |
-
pitch_shift_semitones,
|
| 183 |
-
delay_second,
|
| 184 |
-
delay_feedback,
|
| 185 |
-
delay_mix,
|
| 186 |
-
compressor_threshold_db,
|
| 187 |
-
compressor_ratio,
|
| 188 |
-
compressor_attack_ms,
|
| 189 |
-
compressor_release_ms,
|
| 190 |
-
limiter_threshold_db,
|
| 191 |
-
limiter_release_ms,
|
| 192 |
-
gain_db,
|
| 193 |
-
bitcrush_bit_depth,
|
| 194 |
-
clipping_threshold_db,
|
| 195 |
-
phaser_rate_hz,
|
| 196 |
-
phaser_depth,
|
| 197 |
-
phaser_centre_frequency_hz,
|
| 198 |
-
phaser_feedback,
|
| 199 |
-
phaser_mix,
|
| 200 |
-
bass_boost,
|
| 201 |
-
bass_frequency,
|
| 202 |
-
treble_boost,
|
| 203 |
-
treble_frequency,
|
| 204 |
-
fade_in,
|
| 205 |
-
fade_out,
|
| 206 |
-
chorus_check_box,
|
| 207 |
-
distortion_checkbox,
|
| 208 |
-
reverb_check_box,
|
| 209 |
-
delay_check_box,
|
| 210 |
-
compressor_check_box,
|
| 211 |
-
limiter,
|
| 212 |
-
gain_checkbox,
|
| 213 |
-
bitcrush_checkbox,
|
| 214 |
-
clipping_checkbox,
|
| 215 |
-
phaser_check_box,
|
| 216 |
-
bass_or_treble,
|
| 217 |
-
fade
|
| 218 |
-
],
|
| 219 |
-
outputs=[
|
| 220 |
-
resample_checkbox,
|
| 221 |
-
audio_effect_resample_sr,
|
| 222 |
-
chorus_depth,
|
| 223 |
-
chorus_rate_hz,
|
| 224 |
-
chorus_mix,
|
| 225 |
-
chorus_centre_delay_ms,
|
| 226 |
-
chorus_feedback,
|
| 227 |
-
distortion_drive_db,
|
| 228 |
-
reverb_room_size,
|
| 229 |
-
reverb_damping,
|
| 230 |
-
reverb_wet_level,
|
| 231 |
-
reverb_dry_level,
|
| 232 |
-
reverb_width,
|
| 233 |
-
reverb_freeze_mode,
|
| 234 |
-
pitch_shift_semitones,
|
| 235 |
-
delay_second,
|
| 236 |
-
delay_feedback,
|
| 237 |
-
delay_mix,
|
| 238 |
-
compressor_threshold_db,
|
| 239 |
-
compressor_ratio,
|
| 240 |
-
compressor_attack_ms,
|
| 241 |
-
compressor_release_ms,
|
| 242 |
-
limiter_threshold_db,
|
| 243 |
-
limiter_release_ms,
|
| 244 |
-
gain_db,
|
| 245 |
-
bitcrush_bit_depth,
|
| 246 |
-
clipping_threshold_db,
|
| 247 |
-
phaser_rate_hz,
|
| 248 |
-
phaser_depth,
|
| 249 |
-
phaser_centre_frequency_hz,
|
| 250 |
-
phaser_feedback,
|
| 251 |
-
phaser_mix,
|
| 252 |
-
bass_boost,
|
| 253 |
-
bass_frequency,
|
| 254 |
-
treble_boost,
|
| 255 |
-
treble_frequency,
|
| 256 |
-
fade_in,
|
| 257 |
-
fade_out,
|
| 258 |
-
chorus_check_box,
|
| 259 |
-
distortion_checkbox,
|
| 260 |
-
reverb_check_box,
|
| 261 |
-
delay_check_box,
|
| 262 |
-
compressor_check_box,
|
| 263 |
-
limiter,
|
| 264 |
-
gain_checkbox,
|
| 265 |
-
bitcrush_checkbox,
|
| 266 |
-
clipping_checkbox,
|
| 267 |
-
phaser_check_box,
|
| 268 |
-
bass_or_treble,
|
| 269 |
-
fade
|
| 270 |
-
],
|
| 271 |
-
)
|
| 272 |
-
save_file_button.click(
|
| 273 |
-
fn=audio_effect_save_presets,
|
| 274 |
-
inputs=[
|
| 275 |
-
name_to_save_file,
|
| 276 |
-
resample_checkbox,
|
| 277 |
-
audio_effect_resample_sr,
|
| 278 |
-
chorus_depth,
|
| 279 |
-
chorus_rate_hz,
|
| 280 |
-
chorus_mix,
|
| 281 |
-
chorus_centre_delay_ms,
|
| 282 |
-
chorus_feedback,
|
| 283 |
-
distortion_drive_db,
|
| 284 |
-
reverb_room_size,
|
| 285 |
-
reverb_damping,
|
| 286 |
-
reverb_wet_level,
|
| 287 |
-
reverb_dry_level,
|
| 288 |
-
reverb_width,
|
| 289 |
-
reverb_freeze_mode,
|
| 290 |
-
pitch_shift_semitones,
|
| 291 |
-
delay_second,
|
| 292 |
-
delay_feedback,
|
| 293 |
-
delay_mix,
|
| 294 |
-
compressor_threshold_db,
|
| 295 |
-
compressor_ratio,
|
| 296 |
-
compressor_attack_ms,
|
| 297 |
-
compressor_release_ms,
|
| 298 |
-
limiter_threshold_db,
|
| 299 |
-
limiter_release_ms,
|
| 300 |
-
gain_db,
|
| 301 |
-
bitcrush_bit_depth,
|
| 302 |
-
clipping_threshold_db,
|
| 303 |
-
phaser_rate_hz,
|
| 304 |
-
phaser_depth,
|
| 305 |
-
phaser_centre_frequency_hz,
|
| 306 |
-
phaser_feedback,
|
| 307 |
-
phaser_mix,
|
| 308 |
-
bass_boost,
|
| 309 |
-
bass_frequency,
|
| 310 |
-
treble_boost,
|
| 311 |
-
treble_frequency,
|
| 312 |
-
fade_in,
|
| 313 |
-
fade_out,
|
| 314 |
-
chorus_check_box,
|
| 315 |
-
distortion_checkbox,
|
| 316 |
-
reverb_check_box,
|
| 317 |
-
delay_check_box,
|
| 318 |
-
compressor_check_box,
|
| 319 |
-
limiter,
|
| 320 |
-
gain_checkbox,
|
| 321 |
-
bitcrush_checkbox,
|
| 322 |
-
clipping_checkbox,
|
| 323 |
-
phaser_check_box,
|
| 324 |
-
bass_or_treble,
|
| 325 |
-
fade
|
| 326 |
-
],
|
| 327 |
-
outputs=[presets_name]
|
| 328 |
-
)
|
| 329 |
-
with gr.Row():
|
| 330 |
-
apply_effects_button.click(
|
| 331 |
-
fn=audio_effects,
|
| 332 |
-
inputs=[
|
| 333 |
-
audio_in_path,
|
| 334 |
-
audio_out_path,
|
| 335 |
-
resample_checkbox,
|
| 336 |
-
audio_effect_resample_sr,
|
| 337 |
-
chorus_depth,
|
| 338 |
-
chorus_rate_hz,
|
| 339 |
-
chorus_mix,
|
| 340 |
-
chorus_centre_delay_ms,
|
| 341 |
-
chorus_feedback,
|
| 342 |
-
distortion_drive_db,
|
| 343 |
-
reverb_room_size,
|
| 344 |
-
reverb_damping,
|
| 345 |
-
reverb_wet_level,
|
| 346 |
-
reverb_dry_level,
|
| 347 |
-
reverb_width,
|
| 348 |
-
reverb_freeze_mode,
|
| 349 |
-
pitch_shift_semitones,
|
| 350 |
-
delay_second,
|
| 351 |
-
delay_feedback,
|
| 352 |
-
delay_mix,
|
| 353 |
-
compressor_threshold_db,
|
| 354 |
-
compressor_ratio,
|
| 355 |
-
compressor_attack_ms,
|
| 356 |
-
compressor_release_ms,
|
| 357 |
-
limiter_threshold_db,
|
| 358 |
-
limiter_release_ms,
|
| 359 |
-
gain_db,
|
| 360 |
-
bitcrush_bit_depth,
|
| 361 |
-
clipping_threshold_db,
|
| 362 |
-
phaser_rate_hz,
|
| 363 |
-
phaser_depth,
|
| 364 |
-
phaser_centre_frequency_hz,
|
| 365 |
-
phaser_feedback,
|
| 366 |
-
phaser_mix,
|
| 367 |
-
bass_boost,
|
| 368 |
-
bass_frequency,
|
| 369 |
-
treble_boost,
|
| 370 |
-
treble_frequency,
|
| 371 |
-
fade_in,
|
| 372 |
-
fade_out,
|
| 373 |
-
audio_output_format,
|
| 374 |
-
chorus_check_box,
|
| 375 |
-
distortion_checkbox,
|
| 376 |
-
reverb_check_box,
|
| 377 |
-
delay_check_box,
|
| 378 |
-
compressor_check_box,
|
| 379 |
-
limiter,
|
| 380 |
-
gain_checkbox,
|
| 381 |
-
bitcrush_checkbox,
|
| 382 |
-
clipping_checkbox,
|
| 383 |
-
phaser_check_box,
|
| 384 |
-
bass_or_treble,
|
| 385 |
-
fade,
|
| 386 |
-
audio_combination,
|
| 387 |
-
audio_combination_input,
|
| 388 |
-
main_vol,
|
| 389 |
-
combine_vol
|
| 390 |
-
],
|
| 391 |
-
outputs=[audio_play_output],
|
| 392 |
-
api_name="audio_effects"
|
| 393 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/editing/child/quirk.py
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.editing import apply_voice_quirk
|
| 9 |
-
from main.app.core.ui import change_audios_choices, shutil_move
|
| 10 |
-
from main.app.variables import translations, paths_for_files, configs
|
| 11 |
-
|
| 12 |
-
def quirk_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["quirk_markdown"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
input_audio_play = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 17 |
-
with gr.Row():
|
| 18 |
-
quirk_choice = gr.Radio(label=translations["quirk_label"], info=translations["quirk_label_info"], choices=list(translations["quirk_choice"].keys()), interactive=True, value=list(translations["quirk_choice"].keys())[0])
|
| 19 |
-
with gr.Row():
|
| 20 |
-
apply_quirk_button = gr.Button(translations["apply"], variant="primary")
|
| 21 |
-
with gr.Row():
|
| 22 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 23 |
-
with gr.Row():
|
| 24 |
-
quirk_upload_audio = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
|
| 25 |
-
with gr.Column():
|
| 26 |
-
quirk_export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
|
| 27 |
-
quirk_input_path = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
|
| 28 |
-
quirk_output_path = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
|
| 29 |
-
with gr.Column():
|
| 30 |
-
quirk_refresh = gr.Button(translations["refresh"])
|
| 31 |
-
with gr.Row():
|
| 32 |
-
output_audio_play = gr.Audio(show_download_button=True, interactive=False, label=translations["output_audio"])
|
| 33 |
-
with gr.Row():
|
| 34 |
-
quirk_upload_audio.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[quirk_upload_audio], outputs=[quirk_input_path])
|
| 35 |
-
quirk_input_path.change(fn=lambda audio: audio if audio else None, inputs=[quirk_input_path], outputs=[input_audio_play])
|
| 36 |
-
quirk_refresh.click(fn=change_audios_choices, inputs=[quirk_input_path], outputs=[quirk_input_path])
|
| 37 |
-
with gr.Row():
|
| 38 |
-
apply_quirk_button.click(
|
| 39 |
-
fn=apply_voice_quirk,
|
| 40 |
-
inputs=[
|
| 41 |
-
quirk_input_path,
|
| 42 |
-
quirk_choice,
|
| 43 |
-
quirk_output_path,
|
| 44 |
-
quirk_export_format
|
| 45 |
-
],
|
| 46 |
-
outputs=[output_audio_play],
|
| 47 |
-
api_name="quirk"
|
| 48 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/editing/editing.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import configs, translations
|
| 9 |
-
from main.app.tabs.editing.child.quirk import quirk_tab
|
| 10 |
-
from main.app.tabs.editing.child.audio_effects import audio_effects_tab
|
| 11 |
-
|
| 12 |
-
def editing_tab():
|
| 13 |
-
with gr.TabItem(translations["editing"], visible=configs.get("editing_tab", True)):
|
| 14 |
-
with gr.TabItem(translations["audio_effects"], visible=configs.get("effects_tab", True)):
|
| 15 |
-
gr.Markdown(translations["apply_audio_effects"])
|
| 16 |
-
audio_effects_tab()
|
| 17 |
-
|
| 18 |
-
with gr.TabItem(translations["quirk"], visible=configs.get("quirk", True)):
|
| 19 |
-
gr.Markdown(translations["quirk_info"])
|
| 20 |
-
quirk_tab()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/convert_model.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.ui import visible, shutil_move
|
| 9 |
-
from main.app.core.model_utils import onnx_export
|
| 10 |
-
from main.app.variables import translations, configs
|
| 11 |
-
|
| 12 |
-
def convert_model_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["pytorch2onnx_markdown"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
model_pth_upload = gr.File(label=translations["drop_model"], file_types=[".pth"])
|
| 17 |
-
with gr.Row():
|
| 18 |
-
convert_onnx = gr.Button(translations["convert_model"], variant="primary", scale=2)
|
| 19 |
-
with gr.Row():
|
| 20 |
-
model_pth_path = gr.Textbox(label=translations["model_path"], value="", placeholder="assets/weights/Model.pth", info=translations["model_path_info"], interactive=True)
|
| 21 |
-
with gr.Row():
|
| 22 |
-
output_model2 = gr.File(label=translations["output_model_path"], file_types=[".pth", ".onnx"], interactive=False, visible=False)
|
| 23 |
-
with gr.Row():
|
| 24 |
-
model_pth_upload.upload(fn=lambda model_pth_upload: shutil_move(model_pth_upload.name, configs["weights_path"]), inputs=[model_pth_upload], outputs=[model_pth_path])
|
| 25 |
-
convert_onnx.click(
|
| 26 |
-
fn=onnx_export,
|
| 27 |
-
inputs=[model_pth_path],
|
| 28 |
-
outputs=[output_model2],
|
| 29 |
-
api_name="model_onnx_export"
|
| 30 |
-
)
|
| 31 |
-
convert_onnx.click(fn=lambda: visible(True), inputs=[], outputs=[output_model2])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/f0_extract.py
DELETED
|
@@ -1,51 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.f0_extract import f0_extract
|
| 9 |
-
from main.app.core.ui import change_audios_choices, unlock_f0, shutil_move
|
| 10 |
-
from main.app.variables import translations, paths_for_files, method_f0, configs
|
| 11 |
-
|
| 12 |
-
def f0_extract_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["f0_extractor_markdown_2"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
extractor_button = gr.Button(translations["extract_button"].replace("2. ", ""), variant="primary")
|
| 17 |
-
with gr.Row():
|
| 18 |
-
with gr.Column():
|
| 19 |
-
upload_audio_file = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
|
| 20 |
-
audioplay = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 21 |
-
with gr.Column():
|
| 22 |
-
with gr.Accordion(translations["f0_method"], open=False):
|
| 23 |
-
with gr.Group():
|
| 24 |
-
with gr.Row():
|
| 25 |
-
onnx_f0_mode3 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 26 |
-
unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 27 |
-
f0_method_extract = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=[m for m in method_f0 if m != "hybrid"], value="rmvpe", interactive=True)
|
| 28 |
-
with gr.Accordion(translations["audio_path"], open=True):
|
| 29 |
-
input_audio_path = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, allow_custom_value=True, interactive=True)
|
| 30 |
-
refresh_audio_button = gr.Button(translations["refresh"])
|
| 31 |
-
with gr.Row():
|
| 32 |
-
gr.Markdown("___")
|
| 33 |
-
with gr.Row():
|
| 34 |
-
file_output = gr.File(label="", file_types=[".txt"], interactive=False)
|
| 35 |
-
image_output = gr.Image(label="", interactive=False, show_download_button=True)
|
| 36 |
-
with gr.Row():
|
| 37 |
-
upload_audio_file.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[upload_audio_file], outputs=[input_audio_path])
|
| 38 |
-
input_audio_path.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio_path], outputs=[audioplay])
|
| 39 |
-
refresh_audio_button.click(fn=change_audios_choices, inputs=[input_audio_path], outputs=[input_audio_path])
|
| 40 |
-
with gr.Row():
|
| 41 |
-
unlock_full_method.change(fn=lambda method: [m for m in unlock_f0(method) if m != "hybrid"], inputs=[unlock_full_method], outputs=[f0_method_extract])
|
| 42 |
-
extractor_button.click(
|
| 43 |
-
fn=f0_extract,
|
| 44 |
-
inputs=[
|
| 45 |
-
input_audio_path,
|
| 46 |
-
f0_method_extract,
|
| 47 |
-
onnx_f0_mode3
|
| 48 |
-
],
|
| 49 |
-
outputs=[file_output, image_output],
|
| 50 |
-
api_name="f0_extract"
|
| 51 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/fushion.py
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.ui import visible, shutil_move
|
| 9 |
-
from main.app.core.model_utils import fushion_model
|
| 10 |
-
from main.app.variables import translations, configs
|
| 11 |
-
|
| 12 |
-
def fushion_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["fushion_markdown_2"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
name_to_save = gr.Textbox(label=translations["modelname"], placeholder="Model.pth", value="", max_lines=1, interactive=True)
|
| 17 |
-
with gr.Row():
|
| 18 |
-
fushion_button = gr.Button(translations["fushion"], variant="primary", scale=4)
|
| 19 |
-
with gr.Column():
|
| 20 |
-
with gr.Row():
|
| 21 |
-
model_a = gr.File(label=f"{translations['model_name']} 1", file_types=[".pth", ".onnx"])
|
| 22 |
-
model_b = gr.File(label=f"{translations['model_name']} 2", file_types=[".pth", ".onnx"])
|
| 23 |
-
with gr.Row():
|
| 24 |
-
model_path_a = gr.Textbox(label=f"{translations['model_path']} 1", value="", placeholder="assets/weights/Model_1.pth")
|
| 25 |
-
model_path_b = gr.Textbox(label=f"{translations['model_path']} 2", value="", placeholder="assets/weights/Model_2.pth")
|
| 26 |
-
with gr.Row():
|
| 27 |
-
ratio = gr.Slider(minimum=0, maximum=1, label=translations["model_ratio"], info=translations["model_ratio_info"], value=0.5, interactive=True)
|
| 28 |
-
with gr.Row():
|
| 29 |
-
output_model = gr.File(label=translations["output_model_path"], file_types=[".pth", ".onnx"], interactive=False, visible=False)
|
| 30 |
-
with gr.Row():
|
| 31 |
-
model_a.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model_a], outputs=[model_path_a])
|
| 32 |
-
model_b.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model_b], outputs=[model_path_b])
|
| 33 |
-
with gr.Row():
|
| 34 |
-
fushion_button.click(
|
| 35 |
-
fn=fushion_model,
|
| 36 |
-
inputs=[
|
| 37 |
-
name_to_save,
|
| 38 |
-
model_path_a,
|
| 39 |
-
model_path_b,
|
| 40 |
-
ratio
|
| 41 |
-
],
|
| 42 |
-
outputs=[name_to_save, output_model],
|
| 43 |
-
api_name="fushion_model"
|
| 44 |
-
)
|
| 45 |
-
fushion_button.click(fn=lambda: visible(True), inputs=[], outputs=[output_model])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/read_model.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.ui import shutil_move
|
| 9 |
-
from main.app.core.model_utils import model_info
|
| 10 |
-
from main.app.variables import translations, configs
|
| 11 |
-
|
| 12 |
-
def read_model_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["read_model_markdown_2"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
model = gr.File(label=translations["drop_model"], file_types=[".pth", ".onnx"])
|
| 17 |
-
with gr.Row():
|
| 18 |
-
read_button = gr.Button(translations["readmodel"], variant="primary", scale=2)
|
| 19 |
-
with gr.Column():
|
| 20 |
-
model_path = gr.Textbox(label=translations["model_path"], value="", placeholder="assets/weights/Model.pth", info=translations["model_path_info"], interactive=True)
|
| 21 |
-
output_info = gr.Textbox(label=translations["modelinfo"], value="", interactive=False, scale=6)
|
| 22 |
-
with gr.Row():
|
| 23 |
-
model.upload(fn=lambda model: shutil_move(model.name, configs["weights_path"]), inputs=[model], outputs=[model_path])
|
| 24 |
-
read_button.click(
|
| 25 |
-
fn=model_info,
|
| 26 |
-
inputs=[model_path],
|
| 27 |
-
outputs=[output_info],
|
| 28 |
-
api_name="read_model"
|
| 29 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/report_bugs.py
DELETED
|
@@ -1,24 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import codecs
|
| 4 |
-
|
| 5 |
-
import gradio as gr
|
| 6 |
-
|
| 7 |
-
sys.path.append(os.getcwd())
|
| 8 |
-
|
| 9 |
-
from main.app.core.utils import report_bug
|
| 10 |
-
from main.app.variables import translations
|
| 11 |
-
|
| 12 |
-
def report_bugs_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["report_bug_info"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
with gr.Column():
|
| 17 |
-
with gr.Group():
|
| 18 |
-
agree_log = gr.Checkbox(label=translations["agree_log"], value=True, interactive=True)
|
| 19 |
-
report_text = gr.Textbox(label=translations["error_info"], info=translations["error_info_2"], interactive=True)
|
| 20 |
-
report_button = gr.Button(translations["report_bugs"], variant="primary", scale=2)
|
| 21 |
-
with gr.Row():
|
| 22 |
-
gr.Markdown(translations["report_info"].format(github=codecs.decode("uggcf://tvguho.pbz/CunzUhlauNau16/Ivrganzrfr-EIP/vffhrf", "rot13")))
|
| 23 |
-
with gr.Row():
|
| 24 |
-
report_button.click(fn=report_bug, inputs=[report_text, agree_log], outputs=[])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/child/settings.py
DELETED
|
@@ -1,61 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.ui import change_fp
|
| 9 |
-
from main.app.core.utils import stop_pid
|
| 10 |
-
from main.app.core.restart import change_font, change_language, change_theme
|
| 11 |
-
from main.app.variables import translations, theme, font, configs, language, config
|
| 12 |
-
|
| 13 |
-
def settings_tab(app):
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["settings_markdown_2"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
toggle_button = gr.Button(translations["change_light_dark"], variant="secondary", scale=2)
|
| 18 |
-
with gr.Row():
|
| 19 |
-
with gr.Column():
|
| 20 |
-
language_dropdown = gr.Dropdown(label=translations["lang"], interactive=True, info=translations["lang_restart"], choices=configs.get("support_language", "vi-VN"), value=language)
|
| 21 |
-
change_lang = gr.Button(translations["change_lang"], variant="primary", scale=2)
|
| 22 |
-
with gr.Column():
|
| 23 |
-
theme_dropdown = gr.Dropdown(label=translations["theme"], interactive=True, info=translations["theme_restart"], choices=configs.get("themes", theme), value=theme, allow_custom_value=True)
|
| 24 |
-
changetheme = gr.Button(translations["theme_button"], variant="primary", scale=2)
|
| 25 |
-
with gr.Row():
|
| 26 |
-
with gr.Column():
|
| 27 |
-
fp_choice = gr.Radio(choices=["fp16","fp32"], value="fp16" if configs.get("fp16", False) else "fp32", label=translations["precision"], info=translations["precision_info"], interactive=config.device not in ["cpu", "mps", "ocl:0"])
|
| 28 |
-
fp_button = gr.Button(translations["update_precision"], variant="secondary", scale=2)
|
| 29 |
-
with gr.Column():
|
| 30 |
-
font_choice = gr.Textbox(label=translations["font"], info=translations["font_info"], value=font, interactive=True)
|
| 31 |
-
font_button = gr.Button(translations["change_font"])
|
| 32 |
-
with gr.Row():
|
| 33 |
-
with gr.Column():
|
| 34 |
-
with gr.Accordion(translations["stop"], open=False, visible=config.debug_mode):
|
| 35 |
-
separate_stop = gr.Button(translations["stop_separate"])
|
| 36 |
-
convert_stop = gr.Button(translations["stop_convert"])
|
| 37 |
-
create_dataset_stop = gr.Button(translations["stop_create_dataset"])
|
| 38 |
-
with gr.Accordion(translations["stop_training"], open=False):
|
| 39 |
-
model_name_stop = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
|
| 40 |
-
preprocess_stop = gr.Button(translations["stop_preprocess"])
|
| 41 |
-
extract_stop = gr.Button(translations["stop_extract"])
|
| 42 |
-
train_stop = gr.Button(translations["stop_training"])
|
| 43 |
-
with gr.Row():
|
| 44 |
-
toggle_button.click(fn=None, js="() => {document.body.classList.toggle('dark')}")
|
| 45 |
-
fp_button.click(fn=change_fp, inputs=[fp_choice], outputs=[fp_choice])
|
| 46 |
-
with gr.Row():
|
| 47 |
-
change_lang.click(fn=lambda a: change_language(a, app), inputs=[language_dropdown], outputs=[])
|
| 48 |
-
changetheme.click(fn=lambda a: change_theme(a, app) , inputs=[theme_dropdown], outputs=[])
|
| 49 |
-
font_button.click(fn=lambda a: change_font(a, app), inputs=[font_choice], outputs=[])
|
| 50 |
-
with gr.Row():
|
| 51 |
-
change_lang.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
|
| 52 |
-
changetheme.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
|
| 53 |
-
font_button.click(fn=None, js="setTimeout(function() {location.reload()}, 30000)", inputs=[], outputs=[])
|
| 54 |
-
with gr.Row():
|
| 55 |
-
separate_stop.click(fn=lambda: stop_pid("separate_pid", None, False), inputs=[], outputs=[])
|
| 56 |
-
convert_stop.click(fn=lambda: stop_pid("convert_pid", None, False), inputs=[], outputs=[])
|
| 57 |
-
create_dataset_stop.click(fn=lambda: stop_pid("create_dataset_pid", None, False), inputs=[], outputs=[])
|
| 58 |
-
with gr.Row():
|
| 59 |
-
preprocess_stop.click(fn=lambda model_name_stop: stop_pid("preprocess_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
|
| 60 |
-
extract_stop.click(fn=lambda model_name_stop: stop_pid("extract_pid", model_name_stop, False), inputs=[model_name_stop], outputs=[])
|
| 61 |
-
train_stop.click(fn=lambda model_name_stop: stop_pid("train_pid", model_name_stop, True), inputs=[model_name_stop], outputs=[])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/extra/extra.py
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import translations, configs
|
| 9 |
-
from main.app.tabs.extra.child.fushion import fushion_tab
|
| 10 |
-
from main.app.tabs.extra.child.settings import settings_tab
|
| 11 |
-
from main.app.tabs.extra.child.read_model import read_model_tab
|
| 12 |
-
from main.app.tabs.extra.child.f0_extract import f0_extract_tab
|
| 13 |
-
from main.app.tabs.extra.child.report_bugs import report_bugs_tab
|
| 14 |
-
from main.app.tabs.extra.child.convert_model import convert_model_tab
|
| 15 |
-
|
| 16 |
-
def extra_tab(app):
|
| 17 |
-
with gr.TabItem(translations["extra"], visible=configs.get("extra_tab", True)):
|
| 18 |
-
with gr.TabItem(translations["fushion"], visible=configs.get("fushion_tab", True)):
|
| 19 |
-
gr.Markdown(translations["fushion_markdown"])
|
| 20 |
-
fushion_tab()
|
| 21 |
-
|
| 22 |
-
with gr.TabItem(translations["read_model"], visible=configs.get("read_tab", True)):
|
| 23 |
-
gr.Markdown(translations["read_model_markdown"])
|
| 24 |
-
read_model_tab()
|
| 25 |
-
|
| 26 |
-
with gr.TabItem(translations["convert_model"], visible=configs.get("onnx_tab", True)):
|
| 27 |
-
gr.Markdown(translations["pytorch2onnx"])
|
| 28 |
-
convert_model_tab()
|
| 29 |
-
|
| 30 |
-
with gr.TabItem(translations["f0_extractor_tab"], visible=configs.get("f0_extractor_tab", True)):
|
| 31 |
-
gr.Markdown(translations["f0_extractor_markdown"])
|
| 32 |
-
f0_extract_tab()
|
| 33 |
-
|
| 34 |
-
with gr.TabItem(translations["settings"], visible=configs.get("settings_tab", True)):
|
| 35 |
-
gr.Markdown(translations["settings_markdown"])
|
| 36 |
-
settings_tab(app)
|
| 37 |
-
|
| 38 |
-
with gr.TabItem(translations["report_bugs"], visible=configs.get("report_bug_tab", True)):
|
| 39 |
-
gr.Markdown(translations["report_bugs"])
|
| 40 |
-
report_bugs_tab()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/inference/child/convert.py
DELETED
|
@@ -1,313 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.presets import load_presets, save_presets
|
| 9 |
-
from main.app.core.inference import convert_audio, convert_selection
|
| 10 |
-
from main.app.variables import translations, paths_for_files, sample_rate_choice, model_name, index_path, method_f0, f0_file, embedders_mode, embedders_model, presets_file, configs
|
| 11 |
-
from main.app.core.ui import visible, valueFalse_interactive, change_audios_choices, change_f0_choices, unlock_f0, change_preset_choices, change_backing_choices, hoplength_show, change_models_choices, get_index, index_strength_show, visible_embedders, shutil_move
|
| 12 |
-
|
| 13 |
-
def convert_tab():
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["convert_info"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
with gr.Column():
|
| 18 |
-
with gr.Group():
|
| 19 |
-
with gr.Row():
|
| 20 |
-
cleaner0 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
|
| 21 |
-
autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 22 |
-
use_audio = gr.Checkbox(label=translations["use_audio"], value=False, interactive=True)
|
| 23 |
-
checkpointing = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
| 24 |
-
with gr.Row():
|
| 25 |
-
use_original = gr.Checkbox(label=translations["convert_original"], value=False, interactive=True, visible=use_audio.value)
|
| 26 |
-
convert_backing = gr.Checkbox(label=translations["convert_backing"], value=False, interactive=True, visible=use_audio.value)
|
| 27 |
-
not_merge_backing = gr.Checkbox(label=translations["not_merge_backing"], value=False, interactive=True, visible=use_audio.value)
|
| 28 |
-
merge_instrument = gr.Checkbox(label=translations["merge_instruments"], value=False, interactive=True, visible=use_audio.value)
|
| 29 |
-
with gr.Row():
|
| 30 |
-
pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 31 |
-
clean_strength0 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner0.value)
|
| 32 |
-
with gr.Row():
|
| 33 |
-
with gr.Column():
|
| 34 |
-
audio_select = gr.Dropdown(label=translations["select_separate"], choices=[], value="", interactive=True, allow_custom_value=True, visible=False)
|
| 35 |
-
convert_button_2 = gr.Button(translations["convert_audio"], visible=False)
|
| 36 |
-
with gr.Row():
|
| 37 |
-
with gr.Column():
|
| 38 |
-
convert_button = gr.Button(translations["convert_audio"], variant="primary")
|
| 39 |
-
with gr.Row():
|
| 40 |
-
with gr.Column():
|
| 41 |
-
input0 = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
|
| 42 |
-
play_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 43 |
-
with gr.Column():
|
| 44 |
-
with gr.Accordion(translations["model_accordion"], open=True):
|
| 45 |
-
with gr.Row():
|
| 46 |
-
model_pth = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 47 |
-
model_index = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 48 |
-
with gr.Row():
|
| 49 |
-
refresh = gr.Button(translations["refresh"])
|
| 50 |
-
with gr.Row():
|
| 51 |
-
index_strength = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index.value != "")
|
| 52 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 53 |
-
with gr.Column():
|
| 54 |
-
export_format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
|
| 55 |
-
input_audio0 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
|
| 56 |
-
output_audio = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
|
| 57 |
-
with gr.Column():
|
| 58 |
-
refresh0 = gr.Button(translations["refresh"])
|
| 59 |
-
with gr.Accordion(translations["setting"], open=False):
|
| 60 |
-
with gr.Accordion(translations["f0_method"], open=False):
|
| 61 |
-
with gr.Group():
|
| 62 |
-
with gr.Row():
|
| 63 |
-
onnx_f0_mode = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 64 |
-
unlock_full_method = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 65 |
-
method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 66 |
-
hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=["hybrid[pm+dio]", "hybrid[pm+crepe-tiny]", "hybrid[pm+crepe]", "hybrid[pm+fcpe]", "hybrid[pm+rmvpe]", "hybrid[pm+harvest]", "hybrid[pm+yin]", "hybrid[dio+crepe-tiny]", "hybrid[dio+crepe]", "hybrid[dio+fcpe]", "hybrid[dio+rmvpe]", "hybrid[dio+harvest]", "hybrid[dio+yin]", "hybrid[crepe-tiny+crepe]", "hybrid[crepe-tiny+fcpe]", "hybrid[crepe-tiny+rmvpe]", "hybrid[crepe-tiny+harvest]", "hybrid[crepe+fcpe]", "hybrid[crepe+rmvpe]", "hybrid[crepe+harvest]", "hybrid[crepe+yin]", "hybrid[fcpe+rmvpe]", "hybrid[fcpe+harvest]", "hybrid[fcpe+yin]", "hybrid[rmvpe+harvest]", "hybrid[rmvpe+yin]", "hybrid[harvest+yin]"], value="hybrid[pm+dio]", interactive=True, allow_custom_value=True, visible=method.value == "hybrid")
|
| 67 |
-
hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
|
| 68 |
-
with gr.Accordion(translations["f0_file"], open=False):
|
| 69 |
-
upload_f0_file = gr.File(label=translations["upload_f0"], file_types=[".txt"])
|
| 70 |
-
f0_file_dropdown = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
|
| 71 |
-
refresh_f0_file = gr.Button(translations["refresh"])
|
| 72 |
-
with gr.Accordion(translations["hubert_model"], open=False):
|
| 73 |
-
embed_mode = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 74 |
-
embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 75 |
-
custom_embedders = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders.value == "custom")
|
| 76 |
-
with gr.Accordion(translations["use_presets"], open=False):
|
| 77 |
-
with gr.Row():
|
| 78 |
-
presets_name = gr.Dropdown(label=translations["file_preset"], choices=presets_file, value=presets_file[0] if len(presets_file) > 0 else '', interactive=True, allow_custom_value=True)
|
| 79 |
-
with gr.Row():
|
| 80 |
-
load_click = gr.Button(translations["load_file"], variant="primary")
|
| 81 |
-
refresh_click = gr.Button(translations["refresh"])
|
| 82 |
-
with gr.Accordion(translations["export_file"], open=False):
|
| 83 |
-
with gr.Row():
|
| 84 |
-
with gr.Column():
|
| 85 |
-
with gr.Group():
|
| 86 |
-
with gr.Row():
|
| 87 |
-
cleaner_chbox = gr.Checkbox(label=translations["save_clean"], value=True, interactive=True)
|
| 88 |
-
autotune_chbox = gr.Checkbox(label=translations["save_autotune"], value=True, interactive=True)
|
| 89 |
-
pitch_chbox = gr.Checkbox(label=translations["save_pitch"], value=True, interactive=True)
|
| 90 |
-
index_strength_chbox = gr.Checkbox(label=translations["save_index_2"], value=True, interactive=True)
|
| 91 |
-
resample_sr_chbox = gr.Checkbox(label=translations["save_resample"], value=True, interactive=True)
|
| 92 |
-
filter_radius_chbox = gr.Checkbox(label=translations["save_filter"], value=True, interactive=True)
|
| 93 |
-
rms_mix_rate_chbox = gr.Checkbox(label=translations["save_envelope"], value=True, interactive=True)
|
| 94 |
-
protect_chbox = gr.Checkbox(label=translations["save_protect"], value=True, interactive=True)
|
| 95 |
-
split_audio_chbox = gr.Checkbox(label=translations["save_split"], value=True, interactive=True)
|
| 96 |
-
formant_shifting_chbox = gr.Checkbox(label=translations["formantshift"], value=True, interactive=True)
|
| 97 |
-
with gr.Row():
|
| 98 |
-
with gr.Column():
|
| 99 |
-
name_to_save_file = gr.Textbox(label=translations["filename_to_save"])
|
| 100 |
-
save_file_button = gr.Button(translations["export_file"])
|
| 101 |
-
with gr.Row():
|
| 102 |
-
upload_presets = gr.File(label=translations["upload_presets"], file_types=[".conversion.json"])
|
| 103 |
-
with gr.Column():
|
| 104 |
-
with gr.Row():
|
| 105 |
-
split_audio = gr.Checkbox(label=translations["split_audio"], value=False, interactive=True)
|
| 106 |
-
formant_shifting = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
|
| 107 |
-
proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
|
| 108 |
-
resample_sr = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
|
| 109 |
-
proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
|
| 110 |
-
f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
|
| 111 |
-
filter_radius = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
|
| 112 |
-
rms_mix_rate = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
|
| 113 |
-
protect = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
|
| 114 |
-
with gr.Row():
|
| 115 |
-
formant_qfrency = gr.Slider(value=1.0, label=translations["formant_qfrency"], info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 116 |
-
formant_timbre = gr.Slider(value=1.0, label=translations["formant_timbre"], info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 117 |
-
with gr.Row():
|
| 118 |
-
gr.Markdown(translations["output_convert"])
|
| 119 |
-
with gr.Row():
|
| 120 |
-
main_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["main_convert"])
|
| 121 |
-
backing_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["convert_backing"], visible=convert_backing.value)
|
| 122 |
-
main_backing = gr.Audio(show_download_button=True, interactive=False, label=translations["main_or_backing"], visible=convert_backing.value)
|
| 123 |
-
with gr.Row():
|
| 124 |
-
original_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["convert_original"], visible=use_original.value)
|
| 125 |
-
vocal_instrument = gr.Audio(show_download_button=True, interactive=False, label=translations["voice_or_instruments"], visible=merge_instrument.value)
|
| 126 |
-
with gr.Row():
|
| 127 |
-
upload_f0_file.upload(fn=lambda inp: shutil_move(inp.name, configs["f0_path"]), inputs=[upload_f0_file], outputs=[f0_file_dropdown])
|
| 128 |
-
refresh_f0_file.click(fn=change_f0_choices, inputs=[], outputs=[f0_file_dropdown])
|
| 129 |
-
unlock_full_method.change(fn=unlock_f0, inputs=[unlock_full_method], outputs=[method])
|
| 130 |
-
with gr.Row():
|
| 131 |
-
load_click.click(
|
| 132 |
-
fn=load_presets,
|
| 133 |
-
inputs=[
|
| 134 |
-
presets_name,
|
| 135 |
-
cleaner0,
|
| 136 |
-
autotune,
|
| 137 |
-
pitch,
|
| 138 |
-
clean_strength0,
|
| 139 |
-
index_strength,
|
| 140 |
-
resample_sr,
|
| 141 |
-
filter_radius,
|
| 142 |
-
rms_mix_rate,
|
| 143 |
-
protect,
|
| 144 |
-
split_audio,
|
| 145 |
-
f0_autotune_strength,
|
| 146 |
-
formant_qfrency,
|
| 147 |
-
formant_timbre
|
| 148 |
-
],
|
| 149 |
-
outputs=[
|
| 150 |
-
cleaner0,
|
| 151 |
-
autotune,
|
| 152 |
-
pitch,
|
| 153 |
-
clean_strength0,
|
| 154 |
-
index_strength,
|
| 155 |
-
resample_sr,
|
| 156 |
-
filter_radius,
|
| 157 |
-
rms_mix_rate,
|
| 158 |
-
protect,
|
| 159 |
-
split_audio,
|
| 160 |
-
f0_autotune_strength,
|
| 161 |
-
formant_shifting,
|
| 162 |
-
formant_qfrency,
|
| 163 |
-
formant_timbre
|
| 164 |
-
]
|
| 165 |
-
)
|
| 166 |
-
refresh_click.click(fn=change_preset_choices, inputs=[], outputs=[presets_name])
|
| 167 |
-
save_file_button.click(
|
| 168 |
-
fn=save_presets,
|
| 169 |
-
inputs=[
|
| 170 |
-
name_to_save_file,
|
| 171 |
-
cleaner0,
|
| 172 |
-
autotune,
|
| 173 |
-
pitch,
|
| 174 |
-
clean_strength0,
|
| 175 |
-
index_strength,
|
| 176 |
-
resample_sr,
|
| 177 |
-
filter_radius,
|
| 178 |
-
rms_mix_rate,
|
| 179 |
-
protect,
|
| 180 |
-
split_audio,
|
| 181 |
-
f0_autotune_strength,
|
| 182 |
-
cleaner_chbox,
|
| 183 |
-
autotune_chbox,
|
| 184 |
-
pitch_chbox,
|
| 185 |
-
index_strength_chbox,
|
| 186 |
-
resample_sr_chbox,
|
| 187 |
-
filter_radius_chbox,
|
| 188 |
-
rms_mix_rate_chbox,
|
| 189 |
-
protect_chbox,
|
| 190 |
-
split_audio_chbox,
|
| 191 |
-
formant_shifting_chbox,
|
| 192 |
-
formant_shifting,
|
| 193 |
-
formant_qfrency,
|
| 194 |
-
formant_timbre
|
| 195 |
-
],
|
| 196 |
-
outputs=[presets_name]
|
| 197 |
-
)
|
| 198 |
-
with gr.Row():
|
| 199 |
-
upload_presets.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["presets_path"]), inputs=[upload_presets], outputs=[presets_name])
|
| 200 |
-
autotune.change(fn=visible, inputs=[autotune], outputs=[f0_autotune_strength])
|
| 201 |
-
use_audio.change(fn=lambda a: [visible(a), visible(a), visible(a), visible(a), visible(a), valueFalse_interactive(a), valueFalse_interactive(a), valueFalse_interactive(a), valueFalse_interactive(a), visible(not a), visible(not a), visible(not a), visible(not a)], inputs=[use_audio], outputs=[main_backing, use_original, convert_backing, not_merge_backing, merge_instrument, use_original, convert_backing, not_merge_backing, merge_instrument, input_audio0, output_audio, input0, play_audio])
|
| 202 |
-
with gr.Row():
|
| 203 |
-
convert_backing.change(fn=lambda a,b: [change_backing_choices(a, b), visible(a)], inputs=[convert_backing, not_merge_backing], outputs=[use_original, backing_convert])
|
| 204 |
-
use_original.change(fn=lambda audio, original: [visible(original), visible(not original), visible(audio and not original), valueFalse_interactive(not original), valueFalse_interactive(not original)], inputs=[use_audio, use_original], outputs=[original_convert, main_convert, main_backing, convert_backing, not_merge_backing])
|
| 205 |
-
cleaner0.change(fn=visible, inputs=[cleaner0], outputs=[clean_strength0])
|
| 206 |
-
with gr.Row():
|
| 207 |
-
merge_instrument.change(fn=visible, inputs=[merge_instrument], outputs=[vocal_instrument])
|
| 208 |
-
not_merge_backing.change(fn=lambda audio, merge, cvb: [visible(audio and not merge), change_backing_choices(cvb, merge)], inputs=[use_audio, not_merge_backing, convert_backing], outputs=[main_backing, use_original])
|
| 209 |
-
method.change(fn=lambda method, hybrid: [visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method, hybrid_method], outputs=[hybrid_method, hop_length])
|
| 210 |
-
with gr.Row():
|
| 211 |
-
hybrid_method.change(fn=hoplength_show, inputs=[method, hybrid_method], outputs=[hop_length])
|
| 212 |
-
refresh.click(fn=change_models_choices, inputs=[], outputs=[model_pth, model_index])
|
| 213 |
-
model_pth.change(fn=get_index, inputs=[model_pth], outputs=[model_index])
|
| 214 |
-
with gr.Row():
|
| 215 |
-
input0.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[input0], outputs=[input_audio0])
|
| 216 |
-
input_audio0.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio0], outputs=[play_audio])
|
| 217 |
-
formant_shifting.change(fn=lambda a: [visible(a)]*2, inputs=[formant_shifting], outputs=[formant_qfrency, formant_timbre])
|
| 218 |
-
with gr.Row():
|
| 219 |
-
embedders.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders], outputs=[custom_embedders])
|
| 220 |
-
refresh0.click(fn=change_audios_choices, inputs=[input_audio0], outputs=[input_audio0])
|
| 221 |
-
model_index.change(fn=index_strength_show, inputs=[model_index], outputs=[index_strength])
|
| 222 |
-
with gr.Row():
|
| 223 |
-
convert_button.click(fn=lambda: visible(False), inputs=[], outputs=[convert_button])
|
| 224 |
-
convert_button_2.click(fn=lambda: [visible(False), visible(False)], inputs=[], outputs=[audio_select, convert_button_2])
|
| 225 |
-
with gr.Row():
|
| 226 |
-
embed_mode.change(fn=visible_embedders, inputs=[embed_mode], outputs=[embedders])
|
| 227 |
-
proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
|
| 228 |
-
with gr.Row():
|
| 229 |
-
convert_button.click(
|
| 230 |
-
fn=convert_selection,
|
| 231 |
-
inputs=[
|
| 232 |
-
cleaner0,
|
| 233 |
-
autotune,
|
| 234 |
-
use_audio,
|
| 235 |
-
use_original,
|
| 236 |
-
convert_backing,
|
| 237 |
-
not_merge_backing,
|
| 238 |
-
merge_instrument,
|
| 239 |
-
pitch,
|
| 240 |
-
clean_strength0,
|
| 241 |
-
model_pth,
|
| 242 |
-
model_index,
|
| 243 |
-
index_strength,
|
| 244 |
-
input_audio0,
|
| 245 |
-
output_audio,
|
| 246 |
-
export_format,
|
| 247 |
-
method,
|
| 248 |
-
hybrid_method,
|
| 249 |
-
hop_length,
|
| 250 |
-
embedders,
|
| 251 |
-
custom_embedders,
|
| 252 |
-
resample_sr,
|
| 253 |
-
filter_radius,
|
| 254 |
-
rms_mix_rate,
|
| 255 |
-
protect,
|
| 256 |
-
split_audio,
|
| 257 |
-
f0_autotune_strength,
|
| 258 |
-
checkpointing,
|
| 259 |
-
onnx_f0_mode,
|
| 260 |
-
formant_shifting,
|
| 261 |
-
formant_qfrency,
|
| 262 |
-
formant_timbre,
|
| 263 |
-
f0_file_dropdown,
|
| 264 |
-
embed_mode,
|
| 265 |
-
proposal_pitch,
|
| 266 |
-
proposal_pitch_threshold
|
| 267 |
-
],
|
| 268 |
-
outputs=[audio_select, main_convert, backing_convert, main_backing, original_convert, vocal_instrument, convert_button, convert_button_2],
|
| 269 |
-
api_name="convert_selection"
|
| 270 |
-
)
|
| 271 |
-
convert_button_2.click(
|
| 272 |
-
fn=convert_audio,
|
| 273 |
-
inputs=[
|
| 274 |
-
cleaner0,
|
| 275 |
-
autotune,
|
| 276 |
-
use_audio,
|
| 277 |
-
use_original,
|
| 278 |
-
convert_backing,
|
| 279 |
-
not_merge_backing,
|
| 280 |
-
merge_instrument,
|
| 281 |
-
pitch,
|
| 282 |
-
clean_strength0,
|
| 283 |
-
model_pth,
|
| 284 |
-
model_index,
|
| 285 |
-
index_strength,
|
| 286 |
-
input_audio0,
|
| 287 |
-
output_audio,
|
| 288 |
-
export_format,
|
| 289 |
-
method,
|
| 290 |
-
hybrid_method,
|
| 291 |
-
hop_length,
|
| 292 |
-
embedders,
|
| 293 |
-
custom_embedders,
|
| 294 |
-
resample_sr,
|
| 295 |
-
filter_radius,
|
| 296 |
-
rms_mix_rate,
|
| 297 |
-
protect,
|
| 298 |
-
split_audio,
|
| 299 |
-
f0_autotune_strength,
|
| 300 |
-
audio_select,
|
| 301 |
-
checkpointing,
|
| 302 |
-
onnx_f0_mode,
|
| 303 |
-
formant_shifting,
|
| 304 |
-
formant_qfrency,
|
| 305 |
-
formant_timbre,
|
| 306 |
-
f0_file_dropdown,
|
| 307 |
-
embed_mode,
|
| 308 |
-
proposal_pitch,
|
| 309 |
-
proposal_pitch_threshold
|
| 310 |
-
],
|
| 311 |
-
outputs=[main_convert, backing_convert, main_backing, original_convert, vocal_instrument, convert_button],
|
| 312 |
-
api_name="convert_audio"
|
| 313 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/inference/child/convert_tts.py
DELETED
|
@@ -1,171 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.tts import TTS
|
| 9 |
-
from main.app.core.process import process_input
|
| 10 |
-
from main.app.core.inference import convert_tts
|
| 11 |
-
from main.app.core.utils import google_translate
|
| 12 |
-
from main.app.variables import translations, sample_rate_choice, model_name, index_path, method_f0, f0_file, embedders_mode, embedders_model, edgetts, google_tts_voice, configs
|
| 13 |
-
from main.app.core.ui import visible, change_f0_choices, unlock_f0, hoplength_show, change_models_choices, get_index, index_strength_show, visible_embedders, change_tts_voice_choices, shutil_move
|
| 14 |
-
|
| 15 |
-
def convert_tts_tab():
|
| 16 |
-
with gr.Row():
|
| 17 |
-
gr.Markdown(translations["convert_text_markdown_2"])
|
| 18 |
-
with gr.Row():
|
| 19 |
-
with gr.Column():
|
| 20 |
-
with gr.Group():
|
| 21 |
-
with gr.Row():
|
| 22 |
-
use_txt = gr.Checkbox(label=translations["input_txt"], value=False, interactive=True)
|
| 23 |
-
google_tts_check_box = gr.Checkbox(label=translations["googletts"], value=False, interactive=True)
|
| 24 |
-
prompt = gr.Textbox(label=translations["text_to_speech"], value="", placeholder="Hello Words", lines=3)
|
| 25 |
-
with gr.Column():
|
| 26 |
-
speed = gr.Slider(label=translations["voice_speed"], info=translations["voice_speed_info"], minimum=-100, maximum=100, value=0, step=1)
|
| 27 |
-
pitch0 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 28 |
-
with gr.Row():
|
| 29 |
-
tts_button = gr.Button(translations["tts_1"], variant="primary", scale=2)
|
| 30 |
-
convert_button0 = gr.Button(translations["tts_2"], variant="secondary", scale=2)
|
| 31 |
-
with gr.Row():
|
| 32 |
-
with gr.Column():
|
| 33 |
-
txt_input = gr.File(label=translations["drop_text"], file_types=[".txt", ".srt", ".docx"], visible=use_txt.value)
|
| 34 |
-
tts_voice = gr.Dropdown(label=translations["voice"], choices=edgetts, interactive=True, value="vi-VN-NamMinhNeural")
|
| 35 |
-
tts_pitch = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info_2"], label=translations["pitch"], value=0, interactive=True)
|
| 36 |
-
with gr.Accordion(translations["translate"], open=False):
|
| 37 |
-
with gr.Row():
|
| 38 |
-
source_lang = gr.Dropdown(label=translations["source_lang"], choices=["auto"]+google_tts_voice, interactive=True, value="auto")
|
| 39 |
-
target_lang = gr.Dropdown(label=translations["target_lang"], choices=google_tts_voice, interactive=True, value="en")
|
| 40 |
-
translate_button = gr.Button(translations["translate"])
|
| 41 |
-
with gr.Column():
|
| 42 |
-
with gr.Accordion(translations["model_accordion"], open=True):
|
| 43 |
-
with gr.Row():
|
| 44 |
-
model_pth0 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 45 |
-
model_index0 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 46 |
-
with gr.Row():
|
| 47 |
-
refresh1 = gr.Button(translations["refresh"])
|
| 48 |
-
with gr.Row():
|
| 49 |
-
index_strength0 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index0.value != "")
|
| 50 |
-
with gr.Accordion(translations["output_path"], open=False):
|
| 51 |
-
export_format0 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
|
| 52 |
-
output_audio0 = gr.Textbox(label=translations["output_tts"], value="audios/tts.wav", placeholder="audios/tts.wav", info=translations["tts_output"], interactive=True)
|
| 53 |
-
output_audio1 = gr.Textbox(label=translations["output_tts_convert"], value="audios/tts-convert.wav", placeholder="audios/tts-convert.wav", info=translations["tts_output"], interactive=True)
|
| 54 |
-
with gr.Accordion(translations["setting"], open=False):
|
| 55 |
-
with gr.Accordion(translations["f0_method"], open=False):
|
| 56 |
-
with gr.Group():
|
| 57 |
-
with gr.Row():
|
| 58 |
-
onnx_f0_mode1 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 59 |
-
unlock_full_method3 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 60 |
-
method0 = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 61 |
-
hybrid_method0 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=["hybrid[pm+dio]", "hybrid[pm+crepe-tiny]", "hybrid[pm+crepe]", "hybrid[pm+fcpe]", "hybrid[pm+rmvpe]", "hybrid[pm+harvest]", "hybrid[pm+yin]", "hybrid[dio+crepe-tiny]", "hybrid[dio+crepe]", "hybrid[dio+fcpe]", "hybrid[dio+rmvpe]", "hybrid[dio+harvest]", "hybrid[dio+yin]", "hybrid[crepe-tiny+crepe]", "hybrid[crepe-tiny+fcpe]", "hybrid[crepe-tiny+rmvpe]", "hybrid[crepe-tiny+harvest]", "hybrid[crepe+fcpe]", "hybrid[crepe+rmvpe]", "hybrid[crepe+harvest]", "hybrid[crepe+yin]", "hybrid[fcpe+rmvpe]", "hybrid[fcpe+harvest]", "hybrid[fcpe+yin]", "hybrid[rmvpe+harvest]", "hybrid[rmvpe+yin]", "hybrid[harvest+yin]"], value="hybrid[pm+dio]", interactive=True, allow_custom_value=True, visible=method0.value == "hybrid")
|
| 62 |
-
hop_length0 = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
|
| 63 |
-
with gr.Accordion(translations["f0_file"], open=False):
|
| 64 |
-
upload_f0_file0 = gr.File(label=translations["upload_f0"], file_types=[".txt"])
|
| 65 |
-
f0_file_dropdown0 = gr.Dropdown(label=translations["f0_file_2"], value="", choices=f0_file, allow_custom_value=True, interactive=True)
|
| 66 |
-
refresh_f0_file0 = gr.Button(translations["refresh"])
|
| 67 |
-
with gr.Accordion(translations["hubert_model"], open=False):
|
| 68 |
-
embed_mode1 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 69 |
-
embedders0 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 70 |
-
custom_embedders0 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders0.value == "custom")
|
| 71 |
-
with gr.Group():
|
| 72 |
-
with gr.Row():
|
| 73 |
-
formant_shifting1 = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
|
| 74 |
-
split_audio0 = gr.Checkbox(label=translations["split_audio"], value=False, interactive=True)
|
| 75 |
-
cleaner1 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
|
| 76 |
-
with gr.Row():
|
| 77 |
-
autotune3 = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 78 |
-
checkpointing0 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
| 79 |
-
proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
|
| 80 |
-
with gr.Column():
|
| 81 |
-
resample_sr0 = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
|
| 82 |
-
proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
|
| 83 |
-
f0_autotune_strength0 = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune3.value)
|
| 84 |
-
clean_strength1 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner1.value)
|
| 85 |
-
filter_radius0 = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
|
| 86 |
-
rms_mix_rate0 = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
|
| 87 |
-
protect0 = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
|
| 88 |
-
with gr.Row():
|
| 89 |
-
formant_qfrency1 = gr.Slider(value=1.0, label=translations["formant_qfrency"], info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 90 |
-
formant_timbre1 = gr.Slider(value=1.0, label=translations["formant_timbre"], info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 91 |
-
with gr.Row():
|
| 92 |
-
gr.Markdown(translations["output_tts_markdown"])
|
| 93 |
-
with gr.Row():
|
| 94 |
-
tts_voice_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["output_text_to_speech"])
|
| 95 |
-
tts_voice_convert = gr.Audio(show_download_button=True, interactive=False, label=translations["output_file_tts_convert"])
|
| 96 |
-
with gr.Row():
|
| 97 |
-
proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
|
| 98 |
-
translate_button.click(fn=google_translate, inputs=[prompt, source_lang, target_lang], outputs=[prompt], api_name="google_translate")
|
| 99 |
-
with gr.Row():
|
| 100 |
-
unlock_full_method3.change(fn=unlock_f0, inputs=[unlock_full_method3], outputs=[method0])
|
| 101 |
-
upload_f0_file0.upload(fn=lambda inp: shutil_move(inp.name, configs["f0_path"]), inputs=[upload_f0_file0], outputs=[f0_file_dropdown0])
|
| 102 |
-
refresh_f0_file0.click(fn=change_f0_choices, inputs=[], outputs=[f0_file_dropdown0])
|
| 103 |
-
with gr.Row():
|
| 104 |
-
embed_mode1.change(fn=visible_embedders, inputs=[embed_mode1], outputs=[embedders0])
|
| 105 |
-
autotune3.change(fn=visible, inputs=[autotune3], outputs=[f0_autotune_strength0])
|
| 106 |
-
model_pth0.change(fn=get_index, inputs=[model_pth0], outputs=[model_index0])
|
| 107 |
-
with gr.Row():
|
| 108 |
-
cleaner1.change(fn=visible, inputs=[cleaner1], outputs=[clean_strength1])
|
| 109 |
-
method0.change(fn=lambda method, hybrid: [visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method0, hybrid_method0], outputs=[hybrid_method0, hop_length0])
|
| 110 |
-
hybrid_method0.change(fn=hoplength_show, inputs=[method0, hybrid_method0], outputs=[hop_length0])
|
| 111 |
-
with gr.Row():
|
| 112 |
-
refresh1.click(fn=change_models_choices, inputs=[], outputs=[model_pth0, model_index0])
|
| 113 |
-
embedders0.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders0], outputs=[custom_embedders0])
|
| 114 |
-
formant_shifting1.change(fn=lambda a: [visible(a)]*2, inputs=[formant_shifting1], outputs=[formant_qfrency1, formant_timbre1])
|
| 115 |
-
with gr.Row():
|
| 116 |
-
model_index0.change(fn=index_strength_show, inputs=[model_index0], outputs=[index_strength0])
|
| 117 |
-
txt_input.upload(fn=process_input, inputs=[txt_input], outputs=[prompt])
|
| 118 |
-
use_txt.change(fn=visible, inputs=[use_txt], outputs=[txt_input])
|
| 119 |
-
with gr.Row():
|
| 120 |
-
google_tts_check_box.change(fn=change_tts_voice_choices, inputs=[google_tts_check_box], outputs=[tts_voice])
|
| 121 |
-
tts_button.click(
|
| 122 |
-
fn=TTS,
|
| 123 |
-
inputs=[
|
| 124 |
-
prompt,
|
| 125 |
-
tts_voice,
|
| 126 |
-
speed,
|
| 127 |
-
output_audio0,
|
| 128 |
-
tts_pitch,
|
| 129 |
-
google_tts_check_box,
|
| 130 |
-
txt_input
|
| 131 |
-
],
|
| 132 |
-
outputs=[tts_voice_audio],
|
| 133 |
-
api_name="text-to-speech"
|
| 134 |
-
)
|
| 135 |
-
convert_button0.click(
|
| 136 |
-
fn=convert_tts,
|
| 137 |
-
inputs=[
|
| 138 |
-
cleaner1,
|
| 139 |
-
autotune3,
|
| 140 |
-
pitch0,
|
| 141 |
-
clean_strength1,
|
| 142 |
-
model_pth0,
|
| 143 |
-
model_index0,
|
| 144 |
-
index_strength0,
|
| 145 |
-
output_audio0,
|
| 146 |
-
output_audio1,
|
| 147 |
-
export_format0,
|
| 148 |
-
method0,
|
| 149 |
-
hybrid_method0,
|
| 150 |
-
hop_length0,
|
| 151 |
-
embedders0,
|
| 152 |
-
custom_embedders0,
|
| 153 |
-
resample_sr0,
|
| 154 |
-
filter_radius0,
|
| 155 |
-
rms_mix_rate0,
|
| 156 |
-
protect0,
|
| 157 |
-
split_audio0,
|
| 158 |
-
f0_autotune_strength0,
|
| 159 |
-
checkpointing0,
|
| 160 |
-
onnx_f0_mode1,
|
| 161 |
-
formant_shifting1,
|
| 162 |
-
formant_qfrency1,
|
| 163 |
-
formant_timbre1,
|
| 164 |
-
f0_file_dropdown0,
|
| 165 |
-
embed_mode1,
|
| 166 |
-
proposal_pitch,
|
| 167 |
-
proposal_pitch_threshold
|
| 168 |
-
],
|
| 169 |
-
outputs=[tts_voice_convert],
|
| 170 |
-
api_name="convert_tts"
|
| 171 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/inference/child/convert_with_whisper.py
DELETED
|
@@ -1,160 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.inference import convert_with_whisper
|
| 9 |
-
from main.app.variables import translations, paths_for_files, sample_rate_choice, model_name, index_path, method_f0, embedders_mode, embedders_model, configs
|
| 10 |
-
from main.app.core.ui import visible, change_audios_choices, unlock_f0, hoplength_show, change_models_choices, get_index, index_strength_show, visible_embedders, shutil_move
|
| 11 |
-
|
| 12 |
-
def convert_with_whisper_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["convert_with_whisper_info"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
with gr.Column():
|
| 17 |
-
with gr.Group():
|
| 18 |
-
with gr.Row():
|
| 19 |
-
cleaner2 = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
|
| 20 |
-
autotune2 = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 21 |
-
checkpointing2 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
| 22 |
-
formant_shifting2 = gr.Checkbox(label=translations["formantshift"], value=False, interactive=True)
|
| 23 |
-
proposal_pitch = gr.Checkbox(label=translations["proposal_pitch"], value=False, interactive=True)
|
| 24 |
-
with gr.Row():
|
| 25 |
-
num_spk = gr.Slider(minimum=2, maximum=8, step=1, info=translations["num_spk_info"], label=translations["num_spk"], value=2, interactive=True)
|
| 26 |
-
with gr.Row():
|
| 27 |
-
with gr.Column():
|
| 28 |
-
convert_button3 = gr.Button(translations["convert_audio"], variant="primary")
|
| 29 |
-
with gr.Row():
|
| 30 |
-
with gr.Column():
|
| 31 |
-
with gr.Accordion(translations["model_accordion"] + " 1", open=True):
|
| 32 |
-
with gr.Row():
|
| 33 |
-
model_pth2 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 34 |
-
model_index2 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 35 |
-
with gr.Row():
|
| 36 |
-
refresh2 = gr.Button(translations["refresh"])
|
| 37 |
-
with gr.Row():
|
| 38 |
-
pitch3 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 39 |
-
index_strength2 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index2.value != "")
|
| 40 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 41 |
-
with gr.Column():
|
| 42 |
-
export_format2 = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
|
| 43 |
-
input_audio1 = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, info=translations["provide_audio"], allow_custom_value=True, interactive=True)
|
| 44 |
-
output_audio2 = gr.Textbox(label=translations["output_path"], value="audios/output.wav", placeholder="audios/output.wav", info=translations["output_path_info"], interactive=True)
|
| 45 |
-
with gr.Column():
|
| 46 |
-
refresh4 = gr.Button(translations["refresh"])
|
| 47 |
-
with gr.Row():
|
| 48 |
-
input2 = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
|
| 49 |
-
with gr.Column():
|
| 50 |
-
with gr.Accordion(translations["model_accordion"] + " 2", open=True):
|
| 51 |
-
with gr.Row():
|
| 52 |
-
model_pth3 = gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 53 |
-
model_index3 = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 54 |
-
with gr.Row():
|
| 55 |
-
refresh3 = gr.Button(translations["refresh"])
|
| 56 |
-
with gr.Row():
|
| 57 |
-
pitch4 = gr.Slider(minimum=-20, maximum=20, step=1, info=translations["pitch_info"], label=translations["pitch"], value=0, interactive=True)
|
| 58 |
-
index_strength3 = gr.Slider(label=translations["index_strength"], info=translations["index_strength_info"], minimum=0, maximum=1, value=0.5, step=0.01, interactive=True, visible=model_index3.value != "")
|
| 59 |
-
with gr.Accordion(translations["setting"], open=False):
|
| 60 |
-
with gr.Row():
|
| 61 |
-
model_size = gr.Radio(label=translations["model_size"], info=translations["model_size_info"], choices=["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"], value="medium", interactive=True)
|
| 62 |
-
with gr.Accordion(translations["f0_method"], open=False):
|
| 63 |
-
with gr.Group():
|
| 64 |
-
with gr.Row():
|
| 65 |
-
onnx_f0_mode4 = gr.Checkbox(label=translations["f0_onnx_mode"], info=translations["f0_onnx_mode_info"], value=False, interactive=True)
|
| 66 |
-
unlock_full_method2 = gr.Checkbox(label=translations["f0_unlock"], info=translations["f0_unlock_info"], value=False, interactive=True)
|
| 67 |
-
method3 = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 68 |
-
hybrid_method3 = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=["hybrid[pm+dio]", "hybrid[pm+crepe-tiny]", "hybrid[pm+crepe]", "hybrid[pm+fcpe]", "hybrid[pm+rmvpe]", "hybrid[pm+harvest]", "hybrid[pm+yin]", "hybrid[dio+crepe-tiny]", "hybrid[dio+crepe]", "hybrid[dio+fcpe]", "hybrid[dio+rmvpe]", "hybrid[dio+harvest]", "hybrid[dio+yin]", "hybrid[crepe-tiny+crepe]", "hybrid[crepe-tiny+fcpe]", "hybrid[crepe-tiny+rmvpe]", "hybrid[crepe-tiny+harvest]", "hybrid[crepe+fcpe]", "hybrid[crepe+rmvpe]", "hybrid[crepe+harvest]", "hybrid[crepe+yin]", "hybrid[fcpe+rmvpe]", "hybrid[fcpe+harvest]", "hybrid[fcpe+yin]", "hybrid[rmvpe+harvest]", "hybrid[rmvpe+yin]", "hybrid[harvest+yin]"], value="hybrid[pm+dio]", interactive=True, allow_custom_value=True, visible=method3.value == "hybrid")
|
| 69 |
-
hop_length3 = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
|
| 70 |
-
with gr.Accordion(translations["hubert_model"], open=False):
|
| 71 |
-
embed_mode3 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 72 |
-
embedders3 = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 73 |
-
custom_embedders3 = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=embedders3.value == "custom")
|
| 74 |
-
with gr.Column():
|
| 75 |
-
resample_sr3 = gr.Radio(choices=[0]+sample_rate_choice, label=translations["resample"], info=translations["resample_info"], value=0, interactive=True)
|
| 76 |
-
proposal_pitch_threshold = gr.Slider(minimum=50.0, maximum=1200.0, label=translations["proposal_pitch_threshold"], info=translations["proposal_pitch_threshold_info"], value=255.0, step=0.1, interactive=True, visible=proposal_pitch.value)
|
| 77 |
-
clean_strength3 = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner2.value)
|
| 78 |
-
f0_autotune_strength3 = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune2.value)
|
| 79 |
-
filter_radius3 = gr.Slider(minimum=0, maximum=7, label=translations["filter_radius"], info=translations["filter_radius_info"], value=3, step=1, interactive=True)
|
| 80 |
-
rms_mix_rate3 = gr.Slider(minimum=0, maximum=1, label=translations["rms_mix_rate"], info=translations["rms_mix_rate_info"], value=1, step=0.1, interactive=True)
|
| 81 |
-
protect3 = gr.Slider(minimum=0, maximum=1, label=translations["protect"], info=translations["protect_info"], value=0.5, step=0.01, interactive=True)
|
| 82 |
-
with gr.Row():
|
| 83 |
-
formant_qfrency3 = gr.Slider(value=1.0, label=translations["formant_qfrency"] + " 1", info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 84 |
-
formant_timbre3 = gr.Slider(value=1.0, label=translations["formant_timbre"] + " 1", info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 85 |
-
with gr.Row():
|
| 86 |
-
formant_qfrency4 = gr.Slider(value=1.0, label=translations["formant_qfrency"] + " 2", info=translations["formant_qfrency"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 87 |
-
formant_timbre4 = gr.Slider(value=1.0, label=translations["formant_timbre"] + " 2", info=translations["formant_timbre"], minimum=0.0, maximum=16.0, step=0.1, interactive=True, visible=False)
|
| 88 |
-
with gr.Row():
|
| 89 |
-
gr.Markdown(translations["input_output"])
|
| 90 |
-
with gr.Row():
|
| 91 |
-
play_audio2 = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 92 |
-
play_audio3 = gr.Audio(show_download_button=True, interactive=False, label=translations["output_file_tts_convert"])
|
| 93 |
-
with gr.Row():
|
| 94 |
-
autotune2.change(fn=visible, inputs=[autotune2], outputs=[f0_autotune_strength3])
|
| 95 |
-
cleaner2.change(fn=visible, inputs=[cleaner2], outputs=[clean_strength3])
|
| 96 |
-
method3.change(fn=lambda method, hybrid: [visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[method3, hybrid_method3], outputs=[hybrid_method3, hop_length3])
|
| 97 |
-
with gr.Row():
|
| 98 |
-
hybrid_method3.change(fn=hoplength_show, inputs=[method3, hybrid_method3], outputs=[hop_length3])
|
| 99 |
-
refresh2.click(fn=change_models_choices, inputs=[], outputs=[model_pth2, model_index2])
|
| 100 |
-
model_pth2.change(fn=get_index, inputs=[model_pth2], outputs=[model_index2])
|
| 101 |
-
with gr.Row():
|
| 102 |
-
refresh3.click(fn=change_models_choices, inputs=[], outputs=[model_pth3, model_index3])
|
| 103 |
-
model_pth3.change(fn=get_index, inputs=[model_pth3], outputs=[model_index3])
|
| 104 |
-
input2.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[input2], outputs=[input_audio1])
|
| 105 |
-
with gr.Row():
|
| 106 |
-
input_audio1.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio1], outputs=[play_audio2])
|
| 107 |
-
formant_shifting2.change(fn=lambda a: [visible(a)]*4, inputs=[formant_shifting2], outputs=[formant_qfrency3, formant_timbre3, formant_qfrency4, formant_timbre4])
|
| 108 |
-
embedders3.change(fn=lambda embedders: visible(embedders == "custom"), inputs=[embedders3], outputs=[custom_embedders3])
|
| 109 |
-
with gr.Row():
|
| 110 |
-
refresh4.click(fn=change_audios_choices, inputs=[input_audio1], outputs=[input_audio1])
|
| 111 |
-
model_index2.change(fn=index_strength_show, inputs=[model_index2], outputs=[index_strength2])
|
| 112 |
-
model_index3.change(fn=index_strength_show, inputs=[model_index3], outputs=[index_strength3])
|
| 113 |
-
with gr.Row():
|
| 114 |
-
unlock_full_method2.change(fn=unlock_f0, inputs=[unlock_full_method2], outputs=[method3])
|
| 115 |
-
embed_mode3.change(fn=visible_embedders, inputs=[embed_mode3], outputs=[embedders3])
|
| 116 |
-
proposal_pitch.change(fn=visible, inputs=[proposal_pitch], outputs=[proposal_pitch_threshold])
|
| 117 |
-
with gr.Row():
|
| 118 |
-
convert_button3.click(
|
| 119 |
-
fn=convert_with_whisper,
|
| 120 |
-
inputs=[
|
| 121 |
-
num_spk,
|
| 122 |
-
model_size,
|
| 123 |
-
cleaner2,
|
| 124 |
-
clean_strength3,
|
| 125 |
-
autotune2,
|
| 126 |
-
f0_autotune_strength3,
|
| 127 |
-
checkpointing2,
|
| 128 |
-
model_pth2,
|
| 129 |
-
model_pth3,
|
| 130 |
-
model_index2,
|
| 131 |
-
model_index3,
|
| 132 |
-
pitch3,
|
| 133 |
-
pitch4,
|
| 134 |
-
index_strength2,
|
| 135 |
-
index_strength3,
|
| 136 |
-
export_format2,
|
| 137 |
-
input_audio1,
|
| 138 |
-
output_audio2,
|
| 139 |
-
onnx_f0_mode4,
|
| 140 |
-
method3,
|
| 141 |
-
hybrid_method3,
|
| 142 |
-
hop_length3,
|
| 143 |
-
embed_mode3,
|
| 144 |
-
embedders3,
|
| 145 |
-
custom_embedders3,
|
| 146 |
-
resample_sr3,
|
| 147 |
-
filter_radius3,
|
| 148 |
-
rms_mix_rate3,
|
| 149 |
-
protect3,
|
| 150 |
-
formant_shifting2,
|
| 151 |
-
formant_qfrency3,
|
| 152 |
-
formant_timbre3,
|
| 153 |
-
formant_qfrency4,
|
| 154 |
-
formant_timbre4,
|
| 155 |
-
proposal_pitch,
|
| 156 |
-
proposal_pitch_threshold
|
| 157 |
-
],
|
| 158 |
-
outputs=[play_audio3],
|
| 159 |
-
api_name="convert_with_whisper"
|
| 160 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/inference/child/separate.py
DELETED
|
@@ -1,108 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.downloads import download_url
|
| 9 |
-
from main.app.core.separate import separator_music
|
| 10 |
-
from main.app.core.ui import visible, valueFalse_interactive, change_audios_choices, shutil_move
|
| 11 |
-
from main.app.variables import translations, uvr_model, paths_for_files, mdx_model, sample_rate_choice, configs
|
| 12 |
-
|
| 13 |
-
def separate_tab():
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["4_part"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
with gr.Column():
|
| 18 |
-
with gr.Group():
|
| 19 |
-
with gr.Row():
|
| 20 |
-
cleaner = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True, min_width=140)
|
| 21 |
-
backing = gr.Checkbox(label=translations["separator_backing"], value=False, interactive=True, min_width=140)
|
| 22 |
-
reverb = gr.Checkbox(label=translations["dereveb_audio"], value=False, interactive=True, min_width=140)
|
| 23 |
-
backing_reverb = gr.Checkbox(label=translations["dereveb_backing"], value=False, interactive=False, min_width=140)
|
| 24 |
-
denoise = gr.Checkbox(label=translations["denoise_mdx"], value=False, interactive=False, min_width=140)
|
| 25 |
-
with gr.Row():
|
| 26 |
-
separator_model = gr.Dropdown(label=translations["separator_model"], value=uvr_model[0], choices=uvr_model, interactive=True)
|
| 27 |
-
separator_backing_model = gr.Dropdown(label=translations["separator_backing_model"], value="Version-1", choices=["Version-1", "Version-2"], interactive=True, visible=backing.value)
|
| 28 |
-
with gr.Row():
|
| 29 |
-
with gr.Column():
|
| 30 |
-
separator_button = gr.Button(translations["separator_tab"], variant="primary")
|
| 31 |
-
with gr.Row():
|
| 32 |
-
with gr.Column():
|
| 33 |
-
with gr.Group():
|
| 34 |
-
with gr.Row():
|
| 35 |
-
shifts = gr.Slider(label=translations["shift"], info=translations["shift_info"], minimum=1, maximum=20, value=2, step=1, interactive=True)
|
| 36 |
-
segment_size = gr.Slider(label=translations["segments_size"], info=translations["segments_size_info"], minimum=32, maximum=3072, value=256, step=32, interactive=True)
|
| 37 |
-
with gr.Row():
|
| 38 |
-
mdx_batch_size = gr.Slider(label=translations["batch_size"], info=translations["mdx_batch_size_info"], minimum=1, maximum=64, value=1, step=1, interactive=True, visible=backing.value or reverb.value or separator_model.value in mdx_model)
|
| 39 |
-
with gr.Column():
|
| 40 |
-
with gr.Group():
|
| 41 |
-
with gr.Row():
|
| 42 |
-
overlap = gr.Radio(label=translations["overlap"], info=translations["overlap_info"], choices=["0.25", "0.5", "0.75", "0.99"], value="0.25", interactive=True)
|
| 43 |
-
with gr.Row():
|
| 44 |
-
mdx_hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=8192, value=1024, step=1, interactive=True, visible=backing.value or reverb.value or separator_model.value in mdx_model)
|
| 45 |
-
with gr.Row():
|
| 46 |
-
with gr.Column():
|
| 47 |
-
input = gr.File(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"])
|
| 48 |
-
with gr.Accordion(translations["use_url"], open=False):
|
| 49 |
-
url = gr.Textbox(label=translations["url_audio"], value="", placeholder="https://www.youtube.com/...", scale=6)
|
| 50 |
-
download_button = gr.Button(translations["downloads"])
|
| 51 |
-
with gr.Column():
|
| 52 |
-
with gr.Row():
|
| 53 |
-
clean_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.5, step=0.1, interactive=True, visible=cleaner.value)
|
| 54 |
-
sample_rate1 = gr.Radio(choices=sample_rate_choice, value=44100, label=translations["sr"], info=translations["sr_info"], interactive=True)
|
| 55 |
-
with gr.Accordion(translations["input_output"], open=False):
|
| 56 |
-
format = gr.Radio(label=translations["export_format"], info=translations["export_info"], choices=["wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"], value="wav", interactive=True)
|
| 57 |
-
input_audio = gr.Dropdown(label=translations["audio_path"], value="", choices=paths_for_files, allow_custom_value=True, interactive=True)
|
| 58 |
-
refresh_separator = gr.Button(translations["refresh"])
|
| 59 |
-
output_separator = gr.Textbox(label=translations["output_folder"], value="audios", placeholder="audios", info=translations["output_folder_info"], interactive=True)
|
| 60 |
-
audio_input = gr.Audio(show_download_button=True, interactive=False, label=translations["input_audio"])
|
| 61 |
-
with gr.Row():
|
| 62 |
-
gr.Markdown(translations["output_separator"])
|
| 63 |
-
with gr.Row():
|
| 64 |
-
instruments_audio = gr.Audio(show_download_button=True, interactive=False, label=translations["instruments"])
|
| 65 |
-
original_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["original_vocal"])
|
| 66 |
-
main_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["main_vocal"], visible=backing.value)
|
| 67 |
-
backing_vocals = gr.Audio(show_download_button=True, interactive=False, label=translations["backing_vocal"], visible=backing.value)
|
| 68 |
-
with gr.Row():
|
| 69 |
-
separator_model.change(fn=lambda a, b, c: [visible(a or b or c in mdx_model), visible(a or b or c in mdx_model), valueFalse_interactive(a or b or c in mdx_model), visible(c not in mdx_model)], inputs=[backing, reverb, separator_model], outputs=[mdx_batch_size, mdx_hop_length, denoise, shifts])
|
| 70 |
-
backing.change(fn=lambda a, b, c: [visible(a or b or c in mdx_model), visible(a or b or c in mdx_model), valueFalse_interactive(a or b or c in mdx_model), visible(a), visible(a), visible(a), valueFalse_interactive(a and b)], inputs=[backing, reverb, separator_model], outputs=[mdx_batch_size, mdx_hop_length, denoise, separator_backing_model, main_vocals, backing_vocals, backing_reverb])
|
| 71 |
-
reverb.change(fn=lambda a, b, c: [visible(a or b or c in mdx_model), visible(a or b or c in mdx_model), valueFalse_interactive(a or b or c in mdx_model), valueFalse_interactive(a and b)], inputs=[backing, reverb, separator_model], outputs=[mdx_batch_size, mdx_hop_length, denoise, backing_reverb])
|
| 72 |
-
with gr.Row():
|
| 73 |
-
input_audio.change(fn=lambda audio: audio if os.path.isfile(audio) else None, inputs=[input_audio], outputs=[audio_input])
|
| 74 |
-
cleaner.change(fn=visible, inputs=[cleaner], outputs=[clean_strength])
|
| 75 |
-
with gr.Row():
|
| 76 |
-
input.upload(fn=lambda audio_in: shutil_move(audio_in.name, configs["audios_path"]), inputs=[input], outputs=[input_audio])
|
| 77 |
-
refresh_separator.click(fn=change_audios_choices, inputs=[input_audio], outputs=[input_audio])
|
| 78 |
-
with gr.Row():
|
| 79 |
-
download_button.click(
|
| 80 |
-
fn=download_url,
|
| 81 |
-
inputs=[url],
|
| 82 |
-
outputs=[input_audio, audio_input, url],
|
| 83 |
-
api_name='download_url'
|
| 84 |
-
)
|
| 85 |
-
separator_button.click(
|
| 86 |
-
fn=separator_music,
|
| 87 |
-
inputs=[
|
| 88 |
-
input_audio,
|
| 89 |
-
output_separator,
|
| 90 |
-
format,
|
| 91 |
-
shifts,
|
| 92 |
-
segment_size,
|
| 93 |
-
overlap,
|
| 94 |
-
cleaner,
|
| 95 |
-
clean_strength,
|
| 96 |
-
denoise,
|
| 97 |
-
separator_model,
|
| 98 |
-
separator_backing_model,
|
| 99 |
-
backing,
|
| 100 |
-
reverb,
|
| 101 |
-
backing_reverb,
|
| 102 |
-
mdx_hop_length,
|
| 103 |
-
mdx_batch_size,
|
| 104 |
-
sample_rate1
|
| 105 |
-
],
|
| 106 |
-
outputs=[original_vocals, instruments_audio, main_vocals, backing_vocals],
|
| 107 |
-
api_name='separator_music'
|
| 108 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/inference/inference.py
DELETED
|
@@ -1,30 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import translations, configs
|
| 9 |
-
from main.app.tabs.inference.child.convert import convert_tab
|
| 10 |
-
from main.app.tabs.inference.child.separate import separate_tab
|
| 11 |
-
from main.app.tabs.inference.child.convert_tts import convert_tts_tab
|
| 12 |
-
from main.app.tabs.inference.child.convert_with_whisper import convert_with_whisper_tab
|
| 13 |
-
|
| 14 |
-
def inference_tab():
|
| 15 |
-
with gr.TabItem(translations["inference"], visible=configs.get("inference_tab", True)):
|
| 16 |
-
with gr.TabItem(translations["separator_tab"], visible=configs.get("separator_tab", True)):
|
| 17 |
-
gr.Markdown(f"## {translations['separator_tab']}")
|
| 18 |
-
separate_tab()
|
| 19 |
-
|
| 20 |
-
with gr.TabItem(translations["convert_audio"], visible=configs.get("convert_tab", True)):
|
| 21 |
-
gr.Markdown(f"## {translations['convert_audio']}")
|
| 22 |
-
convert_tab()
|
| 23 |
-
|
| 24 |
-
with gr.TabItem(translations["convert_with_whisper"], visible=configs.get("convert_with_whisper", True)):
|
| 25 |
-
gr.Markdown(f"## {translations['convert_with_whisper']}")
|
| 26 |
-
convert_with_whisper_tab()
|
| 27 |
-
|
| 28 |
-
with gr.TabItem(translations["convert_text"], visible=configs.get("tts_tab", True)):
|
| 29 |
-
gr.Markdown(translations["convert_text_markdown"])
|
| 30 |
-
convert_tts_tab()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/training/child/create_dataset.py
DELETED
|
@@ -1,71 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.training import create_dataset
|
| 9 |
-
from main.app.core.ui import visible, valueEmpty_visible1
|
| 10 |
-
from main.app.variables import translations, sample_rate_choice
|
| 11 |
-
|
| 12 |
-
def create_dataset_tab():
|
| 13 |
-
with gr.Row():
|
| 14 |
-
gr.Markdown(translations["create_dataset_markdown_2"])
|
| 15 |
-
with gr.Row():
|
| 16 |
-
dataset_url = gr.Textbox(label=translations["url_audio"], info=translations["create_dataset_url"], value="", placeholder="https://www.youtube.com/...", interactive=True)
|
| 17 |
-
output_dataset = gr.Textbox(label=translations["output_data"], info=translations["output_data_info"], value="dataset", placeholder="dataset", interactive=True)
|
| 18 |
-
with gr.Row():
|
| 19 |
-
with gr.Column():
|
| 20 |
-
with gr.Group():
|
| 21 |
-
with gr.Row():
|
| 22 |
-
separator_reverb = gr.Checkbox(label=translations["dereveb_audio"], value=False, interactive=True)
|
| 23 |
-
denoise_mdx = gr.Checkbox(label=translations["denoise"], value=False, interactive=True)
|
| 24 |
-
with gr.Row():
|
| 25 |
-
kim_vocal_version = gr.Radio(label=translations["model_ver"], info=translations["model_ver_info"], choices=["Version-1", "Version-2"], value="Version-2", interactive=True)
|
| 26 |
-
kim_vocal_overlap = gr.Radio(label=translations["overlap"], info=translations["overlap_info"], choices=["0.25", "0.5", "0.75", "0.99"], value="0.25", interactive=True)
|
| 27 |
-
with gr.Row():
|
| 28 |
-
kim_vocal_hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=8192, value=1024, step=1, interactive=True)
|
| 29 |
-
kim_vocal_batch_size = gr.Slider(label=translations["batch_size"], info=translations["mdx_batch_size_info"], minimum=1, maximum=64, value=1, step=1, interactive=True)
|
| 30 |
-
with gr.Row():
|
| 31 |
-
kim_vocal_segments_size = gr.Slider(label=translations["segments_size"], info=translations["segments_size_info"], minimum=32, maximum=3072, value=256, step=32, interactive=True)
|
| 32 |
-
with gr.Row():
|
| 33 |
-
sample_rate0 = gr.Radio(choices=sample_rate_choice, value=44100, label=translations["sr"], info=translations["sr_info"], interactive=True)
|
| 34 |
-
with gr.Column():
|
| 35 |
-
create_button = gr.Button(translations["createdataset"], variant="primary", scale=2, min_width=4000)
|
| 36 |
-
with gr.Group():
|
| 37 |
-
with gr.Row():
|
| 38 |
-
clean_audio = gr.Checkbox(label=translations["clear_audio"], value=False, interactive=True)
|
| 39 |
-
skip = gr.Checkbox(label=translations["skip"], value=False, interactive=True)
|
| 40 |
-
with gr.Row():
|
| 41 |
-
dataset_clean_strength = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.5, label=translations["clean_strength"], info=translations["clean_strength_info"], interactive=True, visible=clean_audio.value)
|
| 42 |
-
with gr.Row():
|
| 43 |
-
skip_start = gr.Textbox(label=translations["skip_start"], info=translations["skip_start_info"], value="", placeholder="0,...", interactive=True, visible=skip.value)
|
| 44 |
-
skip_end = gr.Textbox(label=translations["skip_end"], info=translations["skip_end_info"], value="", placeholder="0,...", interactive=True, visible=skip.value)
|
| 45 |
-
create_dataset_info = gr.Textbox(label=translations["create_dataset_info"], value="", interactive=False)
|
| 46 |
-
with gr.Row():
|
| 47 |
-
clean_audio.change(fn=visible, inputs=[clean_audio], outputs=[dataset_clean_strength])
|
| 48 |
-
skip.change(fn=lambda a: [valueEmpty_visible1(a)]*2, inputs=[skip], outputs=[skip_start, skip_end])
|
| 49 |
-
with gr.Row():
|
| 50 |
-
create_button.click(
|
| 51 |
-
fn=create_dataset,
|
| 52 |
-
inputs=[
|
| 53 |
-
dataset_url,
|
| 54 |
-
output_dataset,
|
| 55 |
-
clean_audio,
|
| 56 |
-
dataset_clean_strength,
|
| 57 |
-
separator_reverb,
|
| 58 |
-
kim_vocal_version,
|
| 59 |
-
kim_vocal_overlap,
|
| 60 |
-
kim_vocal_segments_size,
|
| 61 |
-
denoise_mdx,
|
| 62 |
-
skip,
|
| 63 |
-
skip_start,
|
| 64 |
-
skip_end,
|
| 65 |
-
kim_vocal_hop_length,
|
| 66 |
-
kim_vocal_batch_size,
|
| 67 |
-
sample_rate0
|
| 68 |
-
],
|
| 69 |
-
outputs=[create_dataset_info],
|
| 70 |
-
api_name="create_dataset"
|
| 71 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/training/child/training.py
DELETED
|
@@ -1,237 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.core.process import zip_file
|
| 9 |
-
from main.app.core.training import preprocess, extract, create_index, training
|
| 10 |
-
from main.app.variables import translations, model_name, index_path, method_f0, embedders_mode, embedders_model, pretrainedD, pretrainedG, config
|
| 11 |
-
from main.app.core.ui import gr_warning, visible, unlock_f0, hoplength_show, change_models_choices, get_gpu_info, visible_embedders, pitch_guidance_lock, vocoders_lock, unlock_ver, unlock_vocoder, change_pretrained_choices, gpu_number_str, shutil_move
|
| 12 |
-
|
| 13 |
-
def training_model_tab():
|
| 14 |
-
with gr.Row():
|
| 15 |
-
gr.Markdown(translations["training_markdown"])
|
| 16 |
-
with gr.Row():
|
| 17 |
-
with gr.Column():
|
| 18 |
-
with gr.Row():
|
| 19 |
-
with gr.Column():
|
| 20 |
-
training_name = gr.Textbox(label=translations["modelname"], info=translations["training_model_name"], value="", placeholder=translations["modelname"], interactive=True)
|
| 21 |
-
training_sr = gr.Radio(label=translations["sample_rate"], info=translations["sample_rate_info"], choices=["32k", "40k", "48k"], value="48k", interactive=True)
|
| 22 |
-
training_ver = gr.Radio(label=translations["training_version"], info=translations["training_version_info"], choices=["v1", "v2"], value="v2", interactive=True)
|
| 23 |
-
with gr.Row():
|
| 24 |
-
clean_dataset = gr.Checkbox(label=translations["clear_dataset"], value=False, interactive=True)
|
| 25 |
-
preprocess_cut = gr.Checkbox(label=translations["split_audio"], value=True, interactive=True)
|
| 26 |
-
process_effects = gr.Checkbox(label=translations["preprocess_effect"], value=False, interactive=True)
|
| 27 |
-
checkpointing1 = gr.Checkbox(label=translations["memory_efficient_training"], value=False, interactive=True)
|
| 28 |
-
training_f0 = gr.Checkbox(label=translations["training_pitch"], value=True, interactive=True)
|
| 29 |
-
upload = gr.Checkbox(label=translations["upload_dataset"], value=False, interactive=True)
|
| 30 |
-
with gr.Row():
|
| 31 |
-
clean_dataset_strength = gr.Slider(label=translations["clean_strength"], info=translations["clean_strength_info"], minimum=0, maximum=1, value=0.7, step=0.1, interactive=True, visible=clean_dataset.value)
|
| 32 |
-
with gr.Column():
|
| 33 |
-
preprocess_button = gr.Button(translations["preprocess_button"], scale=2)
|
| 34 |
-
upload_dataset = gr.Files(label=translations["drop_audio"], file_types=[".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3"], visible=upload.value)
|
| 35 |
-
preprocess_info = gr.Textbox(label=translations["preprocess_info"], value="", interactive=False)
|
| 36 |
-
with gr.Column():
|
| 37 |
-
with gr.Row():
|
| 38 |
-
with gr.Column():
|
| 39 |
-
with gr.Accordion(label=translations["f0_method"], open=False):
|
| 40 |
-
with gr.Group():
|
| 41 |
-
with gr.Row():
|
| 42 |
-
onnx_f0_mode2 = gr.Checkbox(label=translations["f0_onnx_mode"], value=False, interactive=True)
|
| 43 |
-
unlock_full_method4 = gr.Checkbox(label=translations["f0_unlock"], value=False, interactive=True)
|
| 44 |
-
autotune = gr.Checkbox(label=translations["autotune"], value=False, interactive=True)
|
| 45 |
-
extract_method = gr.Radio(label=translations["f0_method"], info=translations["f0_method_info"], choices=method_f0, value="rmvpe", interactive=True)
|
| 46 |
-
extract_hybrid_method = gr.Dropdown(label=translations["f0_method_hybrid"], info=translations["f0_method_hybrid_info"], choices=["hybrid[pm+dio]", "hybrid[pm+crepe-tiny]", "hybrid[pm+crepe]", "hybrid[pm+fcpe]", "hybrid[pm+rmvpe]", "hybrid[pm+harvest]", "hybrid[pm+yin]", "hybrid[dio+crepe-tiny]", "hybrid[dio+crepe]", "hybrid[dio+fcpe]", "hybrid[dio+rmvpe]", "hybrid[dio+harvest]", "hybrid[dio+yin]", "hybrid[crepe-tiny+crepe]", "hybrid[crepe-tiny+fcpe]", "hybrid[crepe-tiny+rmvpe]", "hybrid[crepe-tiny+harvest]", "hybrid[crepe+fcpe]", "hybrid[crepe+rmvpe]", "hybrid[crepe+harvest]", "hybrid[crepe+yin]", "hybrid[fcpe+rmvpe]", "hybrid[fcpe+harvest]", "hybrid[fcpe+yin]", "hybrid[rmvpe+harvest]", "hybrid[rmvpe+yin]", "hybrid[harvest+yin]"], value="hybrid[pm+dio]", interactive=True, allow_custom_value=True, visible=extract_method.value == "hybrid")
|
| 47 |
-
extract_hop_length = gr.Slider(label="Hop length", info=translations["hop_length_info"], minimum=1, maximum=512, value=128, step=1, interactive=True, visible=False)
|
| 48 |
-
f0_autotune_strength = gr.Slider(minimum=0, maximum=1, label=translations["autotune_rate"], info=translations["autotune_rate_info"], value=1, step=0.1, interactive=True, visible=autotune.value)
|
| 49 |
-
with gr.Accordion(label=translations["hubert_model"], open=False):
|
| 50 |
-
with gr.Group():
|
| 51 |
-
embed_mode2 = gr.Radio(label=translations["embed_mode"], info=translations["embed_mode_info"], value="fairseq", choices=embedders_mode, interactive=True, visible=True)
|
| 52 |
-
extract_embedders = gr.Radio(label=translations["hubert_model"], info=translations["hubert_info"], choices=embedders_model, value="hubert_base", interactive=True)
|
| 53 |
-
with gr.Row():
|
| 54 |
-
extract_embedders_custom = gr.Textbox(label=translations["modelname"], info=translations["modelname_info"], value="", placeholder="hubert_base", interactive=True, visible=extract_embedders.value == "custom")
|
| 55 |
-
with gr.Column():
|
| 56 |
-
extract_button = gr.Button(translations["extract_button"], scale=2)
|
| 57 |
-
extract_info = gr.Textbox(label=translations["extract_info"], value="", interactive=False)
|
| 58 |
-
with gr.Column():
|
| 59 |
-
with gr.Row():
|
| 60 |
-
with gr.Column():
|
| 61 |
-
total_epochs = gr.Slider(label=translations["total_epoch"], info=translations["total_epoch_info"], minimum=1, maximum=10000, value=300, step=1, interactive=True)
|
| 62 |
-
save_epochs = gr.Slider(label=translations["save_epoch"], info=translations["save_epoch_info"], minimum=1, maximum=10000, value=50, step=1, interactive=True)
|
| 63 |
-
with gr.Column():
|
| 64 |
-
index_button = gr.Button(f"3. {translations['create_index']}", variant="primary", scale=2)
|
| 65 |
-
training_button = gr.Button(f"4. {translations['training_model']}", variant="primary", scale=2)
|
| 66 |
-
with gr.Row():
|
| 67 |
-
with gr.Accordion(label=translations["setting"], open=False):
|
| 68 |
-
with gr.Row():
|
| 69 |
-
index_algorithm = gr.Radio(label=translations["index_algorithm"], info=translations["index_algorithm_info"], choices=["Auto", "Faiss", "KMeans"], value="Auto", interactive=True)
|
| 70 |
-
with gr.Row():
|
| 71 |
-
cache_in_gpu = gr.Checkbox(label=translations["cache_in_gpu"], info=translations["cache_in_gpu_info"], value=True, interactive=True)
|
| 72 |
-
rms_extract = gr.Checkbox(label=translations["train&energy"], info=translations["train&energy_info"], value=False, interactive=True)
|
| 73 |
-
overtraining_detector = gr.Checkbox(label=translations["overtraining_detector"], info=translations["overtraining_detector_info"], value=False, interactive=True)
|
| 74 |
-
with gr.Row():
|
| 75 |
-
custom_dataset = gr.Checkbox(label=translations["custom_dataset"], info=translations["custom_dataset_info"], value=False, interactive=True)
|
| 76 |
-
save_only_latest = gr.Checkbox(label=translations["save_only_latest"], info=translations["save_only_latest_info"], value=True, interactive=True)
|
| 77 |
-
save_every_weights = gr.Checkbox(label=translations["save_every_weights"], info=translations["save_every_weights_info"], value=True, interactive=True)
|
| 78 |
-
with gr.Row():
|
| 79 |
-
clean_up = gr.Checkbox(label=translations["cleanup_training"], info=translations["cleanup_training_info"], value=False, interactive=True)
|
| 80 |
-
not_use_pretrain = gr.Checkbox(label=translations["not_use_pretrain_2"], info=translations["not_use_pretrain_info"], value=False, interactive=True)
|
| 81 |
-
custom_pretrain = gr.Checkbox(label=translations["custom_pretrain"], info=translations["custom_pretrain_info"], value=False, interactive=True)
|
| 82 |
-
with gr.Column():
|
| 83 |
-
dataset_path = gr.Textbox(label=translations["dataset_folder"], value="dataset", interactive=True, visible=custom_dataset.value)
|
| 84 |
-
with gr.Column():
|
| 85 |
-
threshold = gr.Slider(minimum=1, maximum=100, value=50, step=1, label=translations["threshold"], interactive=True, visible=overtraining_detector.value)
|
| 86 |
-
with gr.Accordion(translations["setting_cpu_gpu"], open=False):
|
| 87 |
-
with gr.Column():
|
| 88 |
-
gpu_number = gr.Textbox(label=translations["gpu_number"], value=gpu_number_str(), info=translations["gpu_number_info"], interactive=True)
|
| 89 |
-
gpu_info = gr.Textbox(label=translations["gpu_info"], value=get_gpu_info(), info=translations["gpu_info_2"], interactive=False)
|
| 90 |
-
cpu_core = gr.Slider(label=translations["cpu_core"], info=translations["cpu_core_info"], minimum=1, maximum=os.cpu_count(), value=os.cpu_count(), step=1, interactive=True)
|
| 91 |
-
train_batch_size = gr.Slider(label=translations["batch_size"], info=translations["batch_size_info"], minimum=1, maximum=64, value=8, step=1, interactive=True)
|
| 92 |
-
with gr.Row():
|
| 93 |
-
vocoders = gr.Radio(label=translations["vocoder"], info=translations["vocoder_info"], choices=["Default", "MRF-HiFi-GAN", "RefineGAN"], value="Default", interactive=True)
|
| 94 |
-
with gr.Row():
|
| 95 |
-
deterministic = gr.Checkbox(label=translations["deterministic"], info=translations["deterministic_info"], value=False, interactive=config.device.startswith("cuda"))
|
| 96 |
-
benchmark = gr.Checkbox(label=translations["benchmark"], info=translations["benchmark_info"], value=False, interactive=config.device.startswith("cuda"))
|
| 97 |
-
with gr.Row():
|
| 98 |
-
optimizer = gr.Radio(label=translations["optimizer"], info=translations["optimizer_info"], value="AdamW", choices=["AdamW", "RAdam"], interactive=True)
|
| 99 |
-
with gr.Row():
|
| 100 |
-
model_author = gr.Textbox(label=translations["training_author"], info=translations["training_author_info"], value="", placeholder=translations["training_author"], interactive=True)
|
| 101 |
-
with gr.Row():
|
| 102 |
-
with gr.Column():
|
| 103 |
-
with gr.Accordion(translations["custom_pretrain_info"], open=False, visible=custom_pretrain.value and not not_use_pretrain.value) as pretrain_setting:
|
| 104 |
-
pretrained_D = gr.Dropdown(label=translations["pretrain_file"].format(dg="D"), choices=pretrainedD, value=pretrainedD[0] if len(pretrainedD) > 0 else '', interactive=True, allow_custom_value=True)
|
| 105 |
-
pretrained_G = gr.Dropdown(label=translations["pretrain_file"].format(dg="G"), choices=pretrainedG, value=pretrainedG[0] if len(pretrainedG) > 0 else '', interactive=True, allow_custom_value=True)
|
| 106 |
-
refresh_pretrain = gr.Button(translations["refresh"], scale=2)
|
| 107 |
-
with gr.Row():
|
| 108 |
-
training_info = gr.Textbox(label=translations["train_info"], value="", interactive=False)
|
| 109 |
-
with gr.Row():
|
| 110 |
-
with gr.Column():
|
| 111 |
-
with gr.Accordion(translations["export_model"], open=False):
|
| 112 |
-
with gr.Row():
|
| 113 |
-
model_file= gr.Dropdown(label=translations["model_name"], choices=model_name, value=model_name[0] if len(model_name) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 114 |
-
index_file = gr.Dropdown(label=translations["index_path"], choices=index_path, value=index_path[0] if len(index_path) >= 1 else "", interactive=True, allow_custom_value=True)
|
| 115 |
-
with gr.Row():
|
| 116 |
-
refresh_file = gr.Button(f"1. {translations['refresh']}", scale=2)
|
| 117 |
-
zip_model = gr.Button(translations["zip_model"], variant="primary", scale=2)
|
| 118 |
-
with gr.Row():
|
| 119 |
-
zip_output = gr.File(label=translations["output_zip"], file_types=[".zip"], interactive=False, visible=False)
|
| 120 |
-
with gr.Row():
|
| 121 |
-
vocoders.change(fn=pitch_guidance_lock, inputs=[vocoders], outputs=[training_f0])
|
| 122 |
-
training_f0.change(fn=vocoders_lock, inputs=[training_f0, vocoders], outputs=[vocoders])
|
| 123 |
-
unlock_full_method4.change(fn=unlock_f0, inputs=[unlock_full_method4], outputs=[extract_method])
|
| 124 |
-
with gr.Row():
|
| 125 |
-
refresh_file.click(fn=change_models_choices, inputs=[], outputs=[model_file, index_file])
|
| 126 |
-
zip_model.click(fn=zip_file, inputs=[training_name, model_file, index_file], outputs=[zip_output])
|
| 127 |
-
dataset_path.change(fn=lambda folder: os.makedirs(folder, exist_ok=True), inputs=[dataset_path], outputs=[])
|
| 128 |
-
with gr.Row():
|
| 129 |
-
upload.change(fn=visible, inputs=[upload], outputs=[upload_dataset])
|
| 130 |
-
overtraining_detector.change(fn=visible, inputs=[overtraining_detector], outputs=[threshold])
|
| 131 |
-
clean_dataset.change(fn=visible, inputs=[clean_dataset], outputs=[clean_dataset_strength])
|
| 132 |
-
with gr.Row():
|
| 133 |
-
custom_dataset.change(fn=lambda custom_dataset: [visible(custom_dataset), "dataset"],inputs=[custom_dataset], outputs=[dataset_path, dataset_path])
|
| 134 |
-
training_ver.change(fn=unlock_vocoder, inputs=[training_ver, vocoders], outputs=[vocoders])
|
| 135 |
-
vocoders.change(fn=unlock_ver, inputs=[training_ver, vocoders], outputs=[training_ver])
|
| 136 |
-
with gr.Row():
|
| 137 |
-
extract_method.change(fn=lambda method, hybrid: [visible(method == "hybrid"), hoplength_show(method, hybrid)], inputs=[extract_method, extract_hybrid_method], outputs=[extract_hybrid_method, extract_hop_length])
|
| 138 |
-
extract_hybrid_method.change(fn=hoplength_show, inputs=[extract_method, extract_hybrid_method], outputs=[extract_hop_length])
|
| 139 |
-
with gr.Row():
|
| 140 |
-
autotune.change(fn=visible, inputs=[autotune], outputs=[f0_autotune_strength])
|
| 141 |
-
upload_dataset.upload(
|
| 142 |
-
fn=lambda files, folder: [shutil_move(f.name, os.path.join(folder, os.path.split(f.name)[1])) for f in files] if folder != "" else gr_warning(translations["dataset_folder1"]),
|
| 143 |
-
inputs=[upload_dataset, dataset_path],
|
| 144 |
-
outputs=[],
|
| 145 |
-
api_name="upload_dataset"
|
| 146 |
-
)
|
| 147 |
-
with gr.Row():
|
| 148 |
-
not_use_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
| 149 |
-
custom_pretrain.change(fn=lambda a, b: visible(a and not b), inputs=[custom_pretrain, not_use_pretrain], outputs=[pretrain_setting])
|
| 150 |
-
refresh_pretrain.click(fn=change_pretrained_choices, inputs=[], outputs=[pretrained_D, pretrained_G])
|
| 151 |
-
with gr.Row():
|
| 152 |
-
preprocess_button.click(
|
| 153 |
-
fn=preprocess,
|
| 154 |
-
inputs=[
|
| 155 |
-
training_name,
|
| 156 |
-
training_sr,
|
| 157 |
-
cpu_core,
|
| 158 |
-
preprocess_cut,
|
| 159 |
-
process_effects,
|
| 160 |
-
dataset_path,
|
| 161 |
-
clean_dataset,
|
| 162 |
-
clean_dataset_strength
|
| 163 |
-
],
|
| 164 |
-
outputs=[preprocess_info],
|
| 165 |
-
api_name="preprocess"
|
| 166 |
-
)
|
| 167 |
-
with gr.Row():
|
| 168 |
-
embed_mode2.change(fn=visible_embedders, inputs=[embed_mode2], outputs=[extract_embedders])
|
| 169 |
-
extract_embedders.change(fn=lambda extract_embedders: visible(extract_embedders == "custom"), inputs=[extract_embedders], outputs=[extract_embedders_custom])
|
| 170 |
-
with gr.Row():
|
| 171 |
-
extract_button.click(
|
| 172 |
-
fn=extract,
|
| 173 |
-
inputs=[
|
| 174 |
-
training_name,
|
| 175 |
-
training_ver,
|
| 176 |
-
extract_method,
|
| 177 |
-
training_f0,
|
| 178 |
-
extract_hop_length,
|
| 179 |
-
cpu_core,
|
| 180 |
-
gpu_number,
|
| 181 |
-
training_sr,
|
| 182 |
-
extract_embedders,
|
| 183 |
-
extract_embedders_custom,
|
| 184 |
-
onnx_f0_mode2,
|
| 185 |
-
embed_mode2,
|
| 186 |
-
autotune,
|
| 187 |
-
f0_autotune_strength,
|
| 188 |
-
extract_hybrid_method,
|
| 189 |
-
rms_extract
|
| 190 |
-
],
|
| 191 |
-
outputs=[extract_info],
|
| 192 |
-
api_name="extract"
|
| 193 |
-
)
|
| 194 |
-
with gr.Row():
|
| 195 |
-
index_button.click(
|
| 196 |
-
fn=create_index,
|
| 197 |
-
inputs=[
|
| 198 |
-
training_name,
|
| 199 |
-
training_ver,
|
| 200 |
-
index_algorithm
|
| 201 |
-
],
|
| 202 |
-
outputs=[training_info],
|
| 203 |
-
api_name="create_index"
|
| 204 |
-
)
|
| 205 |
-
with gr.Row():
|
| 206 |
-
training_button.click(
|
| 207 |
-
fn=training,
|
| 208 |
-
inputs=[
|
| 209 |
-
training_name,
|
| 210 |
-
training_ver,
|
| 211 |
-
save_epochs,
|
| 212 |
-
save_only_latest,
|
| 213 |
-
save_every_weights,
|
| 214 |
-
total_epochs,
|
| 215 |
-
training_sr,
|
| 216 |
-
train_batch_size,
|
| 217 |
-
gpu_number,
|
| 218 |
-
training_f0,
|
| 219 |
-
not_use_pretrain,
|
| 220 |
-
custom_pretrain,
|
| 221 |
-
pretrained_G,
|
| 222 |
-
pretrained_D,
|
| 223 |
-
overtraining_detector,
|
| 224 |
-
threshold,
|
| 225 |
-
clean_up,
|
| 226 |
-
cache_in_gpu,
|
| 227 |
-
model_author,
|
| 228 |
-
vocoders,
|
| 229 |
-
checkpointing1,
|
| 230 |
-
deterministic,
|
| 231 |
-
benchmark,
|
| 232 |
-
optimizer,
|
| 233 |
-
rms_extract
|
| 234 |
-
],
|
| 235 |
-
outputs=[training_info],
|
| 236 |
-
api_name="training_model"
|
| 237 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/tabs/training/training.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
|
| 4 |
-
import gradio as gr
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.app.variables import translations, configs
|
| 9 |
-
from main.app.tabs.training.child.training import training_model_tab
|
| 10 |
-
from main.app.tabs.training.child.create_dataset import create_dataset_tab
|
| 11 |
-
|
| 12 |
-
def training_tab():
|
| 13 |
-
with gr.TabItem(translations["training_model"], visible=configs.get("create_and_training_tab", True)):
|
| 14 |
-
with gr.TabItem(translations["createdataset"], visible=configs.get("create_dataset_tab", True)):
|
| 15 |
-
gr.Markdown(translations["create_dataset_markdown"])
|
| 16 |
-
create_dataset_tab()
|
| 17 |
-
|
| 18 |
-
with gr.TabItem(translations["training_model"], visible=configs.get("training_tab", True)):
|
| 19 |
-
gr.Markdown(f"## {translations['training_model']}")
|
| 20 |
-
training_model_tab()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/app/variables.py
DELETED
|
@@ -1,106 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import csv
|
| 4 |
-
import json
|
| 5 |
-
import codecs
|
| 6 |
-
import logging
|
| 7 |
-
import urllib.request
|
| 8 |
-
import logging.handlers
|
| 9 |
-
|
| 10 |
-
sys.path.append(os.getcwd())
|
| 11 |
-
|
| 12 |
-
from main.configs.config import Config
|
| 13 |
-
|
| 14 |
-
logger = logging.getLogger(__name__)
|
| 15 |
-
logger.propagate = False
|
| 16 |
-
|
| 17 |
-
config = Config()
|
| 18 |
-
python = sys.executable
|
| 19 |
-
translations = config.translations
|
| 20 |
-
configs_json = os.path.join("main", "configs", "config.json")
|
| 21 |
-
configs = json.load(open(configs_json, "r"))
|
| 22 |
-
|
| 23 |
-
if not logger.hasHandlers():
|
| 24 |
-
console_handler = logging.StreamHandler()
|
| 25 |
-
console_formatter = logging.Formatter(fmt="\n%(asctime)s.%(msecs)03d | %(levelname)s | %(module)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
| 26 |
-
console_handler.setFormatter(console_formatter)
|
| 27 |
-
console_handler.setLevel(logging.DEBUG if config.debug_mode else logging.INFO)
|
| 28 |
-
file_handler = logging.handlers.RotatingFileHandler(os.path.join(configs["logs_path"], "app.log"), maxBytes=5*1024*1024, backupCount=3, encoding='utf-8')
|
| 29 |
-
file_formatter = logging.Formatter(fmt="\n%(asctime)s.%(msecs)03d | %(levelname)s | %(module)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
| 30 |
-
file_handler.setFormatter(file_formatter)
|
| 31 |
-
file_handler.setLevel(logging.DEBUG)
|
| 32 |
-
logger.addHandler(console_handler)
|
| 33 |
-
logger.addHandler(file_handler)
|
| 34 |
-
logger.setLevel(logging.DEBUG)
|
| 35 |
-
|
| 36 |
-
if config.device in ["cpu", "mps", "ocl:0"] and configs.get("fp16", False):
|
| 37 |
-
logger.warning(translations["fp16_not_support"])
|
| 38 |
-
configs["fp16"] = config.is_half = False
|
| 39 |
-
|
| 40 |
-
with open(configs_json, "w") as f:
|
| 41 |
-
json.dump(configs, f, indent=4)
|
| 42 |
-
|
| 43 |
-
models = {}
|
| 44 |
-
model_options = {}
|
| 45 |
-
|
| 46 |
-
method_f0 = ["mangio-crepe-full", "crepe-full", "fcpe", "rmvpe", "harvest", "pyin", "hybrid"]
|
| 47 |
-
method_f0_full = ["pm-ac", "pm-cc", "pm-shs", "dio", "mangio-crepe-tiny", "mangio-crepe-small", "mangio-crepe-medium", "mangio-crepe-large", "mangio-crepe-full", "crepe-tiny", "crepe-small", "crepe-medium", "crepe-large", "crepe-full", "fcpe", "fcpe-legacy", "rmvpe", "rmvpe-legacy", "harvest", "yin", "pyin", "swipe", "piptrack", "fcn", "hybrid"]
|
| 48 |
-
|
| 49 |
-
embedders_mode = ["fairseq", "onnx", "transformers", "spin"]
|
| 50 |
-
embedders_model = ["contentvec_base", "hubert_base", "vietnamese_hubert_base", "japanese_hubert_base", "korean_hubert_base", "chinese_hubert_base", "portuguese_hubert_base", "custom"]
|
| 51 |
-
|
| 52 |
-
paths_for_files = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["audios_path"]) for f in files if os.path.splitext(f)[1].lower() in (".wav", ".mp3", ".flac", ".ogg", ".opus", ".m4a", ".mp4", ".aac", ".alac", ".wma", ".aiff", ".webm", ".ac3")])
|
| 53 |
-
|
| 54 |
-
model_name = sorted(list(model for model in os.listdir(configs["weights_path"]) if model.endswith((".pth", ".onnx")) and not model.startswith("G_") and not model.startswith("D_")))
|
| 55 |
-
index_path = sorted([os.path.join(root, name) for root, _, files in os.walk(configs["logs_path"], topdown=False) for name in files if name.endswith(".index") and "trained" not in name])
|
| 56 |
-
|
| 57 |
-
pretrainedD = [model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "D" in model]
|
| 58 |
-
pretrainedG = [model for model in os.listdir(configs["pretrained_custom_path"]) if model.endswith(".pth") and "G" in model]
|
| 59 |
-
|
| 60 |
-
presets_file = sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".conversion.json")))
|
| 61 |
-
audio_effect_presets_file = sorted(list(f for f in os.listdir(configs["presets_path"]) if f.endswith(".effect.json")))
|
| 62 |
-
f0_file = sorted([os.path.abspath(os.path.join(root, f)) for root, _, files in os.walk(configs["f0_path"]) for f in files if f.endswith(".txt")])
|
| 63 |
-
|
| 64 |
-
language = configs.get("language", "vi-VN")
|
| 65 |
-
theme = configs.get("theme", "NoCrypt/miku")
|
| 66 |
-
|
| 67 |
-
edgetts = configs.get("edge_tts", ["vi-VN-HoaiMyNeural", "vi-VN-NamMinhNeural"])
|
| 68 |
-
google_tts_voice = configs.get("google_tts_voice", ["vi", "en"])
|
| 69 |
-
|
| 70 |
-
mdx_model = configs.get("mdx_model", "MDXNET_Main")
|
| 71 |
-
uvr_model = configs.get("demucs_model", "HD_MMI") + mdx_model
|
| 72 |
-
|
| 73 |
-
font = configs.get("font", "https://fonts.googleapis.com/css2?family=Courgette&display=swap")
|
| 74 |
-
sample_rate_choice = [8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000, 96000]
|
| 75 |
-
csv_path = configs["csv_path"]
|
| 76 |
-
|
| 77 |
-
if "--allow_all_disk" in sys.argv and sys.platform == "win32":
|
| 78 |
-
try:
|
| 79 |
-
import win32api
|
| 80 |
-
except:
|
| 81 |
-
os.system(f"{python} -m pip install pywin32")
|
| 82 |
-
import win32api
|
| 83 |
-
|
| 84 |
-
allow_disk = win32api.GetLogicalDriveStrings().split('\x00')[:-1]
|
| 85 |
-
else: allow_disk = []
|
| 86 |
-
|
| 87 |
-
try:
|
| 88 |
-
if os.path.exists(csv_path): reader = list(csv.DictReader(open(csv_path, newline='', encoding='utf-8')))
|
| 89 |
-
else:
|
| 90 |
-
reader = list(csv.DictReader([line.decode('utf-8') for line in urllib.request.urlopen(codecs.decode("uggcf://qbpf.tbbtyr.pbz/fcernqfurrgf/q/1gNHnDeRULtEfz1Yieaw14USUQjWJy0Oq9k0DrCrjApb/rkcbeg?sbezng=pfi&tvq=1977693859", "rot13")).readlines()]))
|
| 91 |
-
writer = csv.DictWriter(open(csv_path, mode='w', newline='', encoding='utf-8'), fieldnames=reader[0].keys())
|
| 92 |
-
writer.writeheader()
|
| 93 |
-
writer.writerows(reader)
|
| 94 |
-
|
| 95 |
-
for row in reader:
|
| 96 |
-
filename = row['Filename']
|
| 97 |
-
url = None
|
| 98 |
-
|
| 99 |
-
for value in row.values():
|
| 100 |
-
if isinstance(value, str) and "huggingface" in value:
|
| 101 |
-
url = value
|
| 102 |
-
break
|
| 103 |
-
|
| 104 |
-
if url: models[filename] = url
|
| 105 |
-
except:
|
| 106 |
-
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/config.json
DELETED
|
@@ -1,584 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"language": "vi-VN",
|
| 3 |
-
"support_language": [
|
| 4 |
-
"en-US",
|
| 5 |
-
"vi-VN"
|
| 6 |
-
],
|
| 7 |
-
"theme": "NoCrypt/miku",
|
| 8 |
-
"themes": [
|
| 9 |
-
"NoCrypt/miku",
|
| 10 |
-
"gstaff/xkcd",
|
| 11 |
-
"JohnSmith9982/small_and_pretty",
|
| 12 |
-
"ParityError/Interstellar",
|
| 13 |
-
"earneleh/paris",
|
| 14 |
-
"shivi/calm_seafoam",
|
| 15 |
-
"Hev832/Applio",
|
| 16 |
-
"YTheme/Minecraft",
|
| 17 |
-
"gstaff/sketch",
|
| 18 |
-
"SebastianBravo/simci_css",
|
| 19 |
-
"allenai/gradio-theme",
|
| 20 |
-
"Nymbo/Nymbo_Theme_5",
|
| 21 |
-
"lone17/kotaemon",
|
| 22 |
-
"Zarkel/IBM_Carbon_Theme",
|
| 23 |
-
"SherlockRamos/Feliz",
|
| 24 |
-
"freddyaboulton/dracula_revamped",
|
| 25 |
-
"freddyaboulton/bad-theme-space",
|
| 26 |
-
"gradio/dracula_revamped",
|
| 27 |
-
"abidlabs/dracula_revamped",
|
| 28 |
-
"gradio/dracula_test",
|
| 29 |
-
"gradio/seafoam",
|
| 30 |
-
"gradio/glass",
|
| 31 |
-
"gradio/monochrome",
|
| 32 |
-
"gradio/soft",
|
| 33 |
-
"gradio/default",
|
| 34 |
-
"gradio/base",
|
| 35 |
-
"abidlabs/pakistan",
|
| 36 |
-
"dawood/microsoft_windows",
|
| 37 |
-
"ysharma/steampunk",
|
| 38 |
-
"ysharma/huggingface",
|
| 39 |
-
"abidlabs/Lime",
|
| 40 |
-
"freddyaboulton/this-theme-does-not-exist-2",
|
| 41 |
-
"aliabid94/new-theme",
|
| 42 |
-
"aliabid94/test2",
|
| 43 |
-
"aliabid94/test3",
|
| 44 |
-
"aliabid94/test4",
|
| 45 |
-
"abidlabs/banana",
|
| 46 |
-
"freddyaboulton/test-blue",
|
| 47 |
-
"gstaff/whiteboard",
|
| 48 |
-
"ysharma/llamas",
|
| 49 |
-
"abidlabs/font-test",
|
| 50 |
-
"YenLai/Superhuman",
|
| 51 |
-
"bethecloud/storj_theme",
|
| 52 |
-
"sudeepshouche/minimalist",
|
| 53 |
-
"knotdgaf/gradiotest",
|
| 54 |
-
"ParityError/Anime",
|
| 55 |
-
"Ajaxon6255/Emerald_Isle",
|
| 56 |
-
"ParityError/LimeFace",
|
| 57 |
-
"finlaymacklon/smooth_slate",
|
| 58 |
-
"finlaymacklon/boxy_violet",
|
| 59 |
-
"derekzen/stardust",
|
| 60 |
-
"EveryPizza/Cartoony-Gradio-Theme",
|
| 61 |
-
"Ifeanyi/Cyanister",
|
| 62 |
-
"Tshackelton/IBMPlex-DenseReadable",
|
| 63 |
-
"snehilsanyal/scikit-learn",
|
| 64 |
-
"Himhimhim/xkcd",
|
| 65 |
-
"nota-ai/theme",
|
| 66 |
-
"rawrsor1/Everforest",
|
| 67 |
-
"rottenlittlecreature/Moon_Goblin",
|
| 68 |
-
"abidlabs/test-yellow",
|
| 69 |
-
"abidlabs/test-yellow3",
|
| 70 |
-
"idspicQstitho/dracula_revamped",
|
| 71 |
-
"kfahn/AnimalPose",
|
| 72 |
-
"HaleyCH/HaleyCH_Theme",
|
| 73 |
-
"simulKitke/dracula_test",
|
| 74 |
-
"braintacles/CrimsonNight",
|
| 75 |
-
"wentaohe/whiteboardv2",
|
| 76 |
-
"reilnuud/polite",
|
| 77 |
-
"remilia/Ghostly",
|
| 78 |
-
"Franklisi/darkmode",
|
| 79 |
-
"coding-alt/soft",
|
| 80 |
-
"xiaobaiyuan/theme_land",
|
| 81 |
-
"step-3-profit/Midnight-Deep",
|
| 82 |
-
"xiaobaiyuan/theme_demo",
|
| 83 |
-
"Taithrah/Minimal",
|
| 84 |
-
"Insuz/SimpleIndigo",
|
| 85 |
-
"zkunn/Alipay_Gradio_theme",
|
| 86 |
-
"Insuz/Mocha",
|
| 87 |
-
"xiaobaiyuan/theme_brief",
|
| 88 |
-
"Ama434/434-base-Barlow",
|
| 89 |
-
"Ama434/def_barlow",
|
| 90 |
-
"Ama434/neutral-barlow",
|
| 91 |
-
"dawood/dracula_test",
|
| 92 |
-
"nuttea/Softblue",
|
| 93 |
-
"BlueDancer/Alien_Diffusion",
|
| 94 |
-
"naughtondale/monochrome",
|
| 95 |
-
"Dagfinn1962/standard",
|
| 96 |
-
"default"
|
| 97 |
-
],
|
| 98 |
-
"mdx_model": [
|
| 99 |
-
"Main_340",
|
| 100 |
-
"Main_390",
|
| 101 |
-
"Main_406",
|
| 102 |
-
"Main_427",
|
| 103 |
-
"Main_438",
|
| 104 |
-
"Inst_full_292",
|
| 105 |
-
"Inst_HQ_1",
|
| 106 |
-
"Inst_HQ_2",
|
| 107 |
-
"Inst_HQ_3",
|
| 108 |
-
"Inst_HQ_4",
|
| 109 |
-
"Inst_HQ_5",
|
| 110 |
-
"Kim_Vocal_1",
|
| 111 |
-
"Kim_Vocal_2",
|
| 112 |
-
"Kim_Inst",
|
| 113 |
-
"Inst_187_beta",
|
| 114 |
-
"Inst_82_beta",
|
| 115 |
-
"Inst_90_beta",
|
| 116 |
-
"Voc_FT",
|
| 117 |
-
"Crowd_HQ",
|
| 118 |
-
"Inst_1",
|
| 119 |
-
"Inst_2",
|
| 120 |
-
"Inst_3",
|
| 121 |
-
"MDXNET_1_9703",
|
| 122 |
-
"MDXNET_2_9682",
|
| 123 |
-
"MDXNET_3_9662",
|
| 124 |
-
"Inst_Main",
|
| 125 |
-
"MDXNET_Main",
|
| 126 |
-
"MDXNET_9482"
|
| 127 |
-
],
|
| 128 |
-
"demucs_model": [
|
| 129 |
-
"HT-Normal",
|
| 130 |
-
"HT-Tuned",
|
| 131 |
-
"HD_MMI",
|
| 132 |
-
"HT_6S"
|
| 133 |
-
],
|
| 134 |
-
"edge_tts": [
|
| 135 |
-
"af-ZA-AdriNeural",
|
| 136 |
-
"af-ZA-WillemNeural",
|
| 137 |
-
"sq-AL-AnilaNeural",
|
| 138 |
-
"sq-AL-IlirNeural",
|
| 139 |
-
"am-ET-AmehaNeural",
|
| 140 |
-
"am-ET-MekdesNeural",
|
| 141 |
-
"ar-DZ-AminaNeural",
|
| 142 |
-
"ar-DZ-IsmaelNeural",
|
| 143 |
-
"ar-BH-AliNeural",
|
| 144 |
-
"ar-BH-LailaNeural",
|
| 145 |
-
"ar-EG-SalmaNeural",
|
| 146 |
-
"ar-EG-ShakirNeural",
|
| 147 |
-
"ar-IQ-BasselNeural",
|
| 148 |
-
"ar-IQ-RanaNeural",
|
| 149 |
-
"ar-JO-SanaNeural",
|
| 150 |
-
"ar-JO-TaimNeural",
|
| 151 |
-
"ar-KW-FahedNeural",
|
| 152 |
-
"ar-KW-NouraNeural",
|
| 153 |
-
"ar-LB-LaylaNeural",
|
| 154 |
-
"ar-LB-RamiNeural",
|
| 155 |
-
"ar-LY-ImanNeural",
|
| 156 |
-
"ar-LY-OmarNeural",
|
| 157 |
-
"ar-MA-JamalNeural",
|
| 158 |
-
"ar-MA-MounaNeural",
|
| 159 |
-
"ar-OM-AbdullahNeural",
|
| 160 |
-
"ar-OM-AyshaNeural",
|
| 161 |
-
"ar-QA-AmalNeural",
|
| 162 |
-
"ar-QA-MoazNeural",
|
| 163 |
-
"ar-SA-HamedNeural",
|
| 164 |
-
"ar-SA-ZariyahNeural",
|
| 165 |
-
"ar-SY-AmanyNeural",
|
| 166 |
-
"ar-SY-LaithNeural",
|
| 167 |
-
"ar-TN-HediNeural",
|
| 168 |
-
"ar-TN-ReemNeural",
|
| 169 |
-
"ar-AE-FatimaNeural",
|
| 170 |
-
"ar-AE-HamdanNeural",
|
| 171 |
-
"ar-YE-MaryamNeural",
|
| 172 |
-
"ar-YE-SalehNeural",
|
| 173 |
-
"az-AZ-BabekNeural",
|
| 174 |
-
"az-AZ-BanuNeural",
|
| 175 |
-
"bn-BD-NabanitaNeural",
|
| 176 |
-
"bn-BD-PradeepNeural",
|
| 177 |
-
"bn-IN-BashkarNeural",
|
| 178 |
-
"bn-IN-TanishaaNeural",
|
| 179 |
-
"bs-BA-GoranNeural",
|
| 180 |
-
"bs-BA-VesnaNeural",
|
| 181 |
-
"bg-BG-BorislavNeural",
|
| 182 |
-
"bg-BG-KalinaNeural",
|
| 183 |
-
"my-MM-NilarNeural",
|
| 184 |
-
"my-MM-ThihaNeural",
|
| 185 |
-
"ca-ES-EnricNeural",
|
| 186 |
-
"ca-ES-JoanaNeural",
|
| 187 |
-
"zh-HK-HiuGaaiNeural",
|
| 188 |
-
"zh-HK-HiuMaanNeural",
|
| 189 |
-
"zh-HK-WanLungNeural",
|
| 190 |
-
"zh-CN-XiaoxiaoNeural",
|
| 191 |
-
"zh-CN-XiaoyiNeural",
|
| 192 |
-
"zh-CN-YunjianNeural",
|
| 193 |
-
"zh-CN-YunxiNeural",
|
| 194 |
-
"zh-CN-YunxiaNeural",
|
| 195 |
-
"zh-CN-YunyangNeural",
|
| 196 |
-
"zh-CN-liaoning-XiaobeiNeural",
|
| 197 |
-
"zh-TW-HsiaoChenNeural",
|
| 198 |
-
"zh-TW-YunJheNeural",
|
| 199 |
-
"zh-TW-HsiaoYuNeural",
|
| 200 |
-
"zh-CN-shaanxi-XiaoniNeural",
|
| 201 |
-
"hr-HR-GabrijelaNeural",
|
| 202 |
-
"hr-HR-SreckoNeural",
|
| 203 |
-
"cs-CZ-AntoninNeural",
|
| 204 |
-
"cs-CZ-VlastaNeural",
|
| 205 |
-
"da-DK-ChristelNeural",
|
| 206 |
-
"da-DK-JeppeNeural",
|
| 207 |
-
"nl-BE-ArnaudNeural",
|
| 208 |
-
"nl-BE-DenaNeural",
|
| 209 |
-
"nl-NL-ColetteNeural",
|
| 210 |
-
"nl-NL-FennaNeural",
|
| 211 |
-
"nl-NL-MaartenNeural",
|
| 212 |
-
"en-AU-NatashaNeural",
|
| 213 |
-
"en-AU-WilliamNeural",
|
| 214 |
-
"en-CA-ClaraNeural",
|
| 215 |
-
"en-CA-LiamNeural",
|
| 216 |
-
"en-HK-SamNeural",
|
| 217 |
-
"en-HK-YanNeural",
|
| 218 |
-
"en-IN-NeerjaExpressiveNeural",
|
| 219 |
-
"en-IN-NeerjaNeural",
|
| 220 |
-
"en-IN-PrabhatNeural",
|
| 221 |
-
"en-IE-ConnorNeural",
|
| 222 |
-
"en-IE-EmilyNeural",
|
| 223 |
-
"en-KE-AsiliaNeural",
|
| 224 |
-
"en-KE-ChilembaNeural",
|
| 225 |
-
"en-NZ-MitchellNeural",
|
| 226 |
-
"en-NZ-MollyNeural",
|
| 227 |
-
"en-NG-AbeoNeural",
|
| 228 |
-
"en-NG-EzinneNeural",
|
| 229 |
-
"en-PH-JamesNeural",
|
| 230 |
-
"en-PH-RosaNeural",
|
| 231 |
-
"en-SG-LunaNeural",
|
| 232 |
-
"en-SG-WayneNeural",
|
| 233 |
-
"en-ZA-LeahNeural",
|
| 234 |
-
"en-ZA-LukeNeural",
|
| 235 |
-
"en-TZ-ElimuNeural",
|
| 236 |
-
"en-TZ-ImaniNeural",
|
| 237 |
-
"en-GB-LibbyNeural",
|
| 238 |
-
"en-GB-MaisieNeural",
|
| 239 |
-
"en-GB-RyanNeural",
|
| 240 |
-
"en-GB-SoniaNeural",
|
| 241 |
-
"en-GB-ThomasNeural",
|
| 242 |
-
"en-US-AvaMultilingualNeural",
|
| 243 |
-
"en-US-AndrewMultilingualNeural",
|
| 244 |
-
"en-US-EmmaMultilingualNeural",
|
| 245 |
-
"en-US-BrianMultilingualNeural",
|
| 246 |
-
"en-US-AvaNeural",
|
| 247 |
-
"en-US-AndrewNeural",
|
| 248 |
-
"en-US-EmmaNeural",
|
| 249 |
-
"en-US-BrianNeural",
|
| 250 |
-
"en-US-AnaNeural",
|
| 251 |
-
"en-US-AriaNeural",
|
| 252 |
-
"en-US-ChristopherNeural",
|
| 253 |
-
"en-US-EricNeural",
|
| 254 |
-
"en-US-GuyNeural",
|
| 255 |
-
"en-US-JennyNeural",
|
| 256 |
-
"en-US-MichelleNeural",
|
| 257 |
-
"en-US-RogerNeural",
|
| 258 |
-
"en-US-SteffanNeural",
|
| 259 |
-
"et-EE-AnuNeural",
|
| 260 |
-
"et-EE-KertNeural",
|
| 261 |
-
"fil-PH-AngeloNeural",
|
| 262 |
-
"fil-PH-BlessicaNeural",
|
| 263 |
-
"fi-FI-HarriNeural",
|
| 264 |
-
"fi-FI-NooraNeural",
|
| 265 |
-
"fr-BE-CharlineNeural",
|
| 266 |
-
"fr-BE-GerardNeural",
|
| 267 |
-
"fr-CA-ThierryNeural",
|
| 268 |
-
"fr-CA-AntoineNeural",
|
| 269 |
-
"fr-CA-JeanNeural",
|
| 270 |
-
"fr-CA-SylvieNeural",
|
| 271 |
-
"fr-FR-VivienneMultilingualNeural",
|
| 272 |
-
"fr-FR-RemyMultilingualNeural",
|
| 273 |
-
"fr-FR-DeniseNeural",
|
| 274 |
-
"fr-FR-EloiseNeural",
|
| 275 |
-
"fr-FR-HenriNeural",
|
| 276 |
-
"fr-CH-ArianeNeural",
|
| 277 |
-
"fr-CH-FabriceNeural",
|
| 278 |
-
"gl-ES-RoiNeural",
|
| 279 |
-
"gl-ES-SabelaNeural",
|
| 280 |
-
"ka-GE-EkaNeural",
|
| 281 |
-
"ka-GE-GiorgiNeural",
|
| 282 |
-
"de-AT-IngridNeural",
|
| 283 |
-
"de-AT-JonasNeural",
|
| 284 |
-
"de-DE-SeraphinaMultilingualNeural",
|
| 285 |
-
"de-DE-FlorianMultilingualNeural",
|
| 286 |
-
"de-DE-AmalaNeural",
|
| 287 |
-
"de-DE-ConradNeural",
|
| 288 |
-
"de-DE-KatjaNeural",
|
| 289 |
-
"de-DE-KillianNeural",
|
| 290 |
-
"de-CH-JanNeural",
|
| 291 |
-
"de-CH-LeniNeural",
|
| 292 |
-
"el-GR-AthinaNeural",
|
| 293 |
-
"el-GR-NestorasNeural",
|
| 294 |
-
"gu-IN-DhwaniNeural",
|
| 295 |
-
"gu-IN-NiranjanNeural",
|
| 296 |
-
"he-IL-AvriNeural",
|
| 297 |
-
"he-IL-HilaNeural",
|
| 298 |
-
"hi-IN-MadhurNeural",
|
| 299 |
-
"hi-IN-SwaraNeural",
|
| 300 |
-
"hu-HU-NoemiNeural",
|
| 301 |
-
"hu-HU-TamasNeural",
|
| 302 |
-
"is-IS-GudrunNeural",
|
| 303 |
-
"is-IS-GunnarNeural",
|
| 304 |
-
"id-ID-ArdiNeural",
|
| 305 |
-
"id-ID-GadisNeural",
|
| 306 |
-
"ga-IE-ColmNeural",
|
| 307 |
-
"ga-IE-OrlaNeural",
|
| 308 |
-
"it-IT-GiuseppeNeural",
|
| 309 |
-
"it-IT-DiegoNeural",
|
| 310 |
-
"it-IT-ElsaNeural",
|
| 311 |
-
"it-IT-IsabellaNeural",
|
| 312 |
-
"ja-JP-KeitaNeural",
|
| 313 |
-
"ja-JP-NanamiNeural",
|
| 314 |
-
"jv-ID-DimasNeural",
|
| 315 |
-
"jv-ID-SitiNeural",
|
| 316 |
-
"kn-IN-GaganNeural",
|
| 317 |
-
"kn-IN-SapnaNeural",
|
| 318 |
-
"kk-KZ-AigulNeural",
|
| 319 |
-
"kk-KZ-DauletNeural",
|
| 320 |
-
"km-KH-PisethNeural",
|
| 321 |
-
"km-KH-SreymomNeural",
|
| 322 |
-
"ko-KR-HyunsuNeural",
|
| 323 |
-
"ko-KR-InJoonNeural",
|
| 324 |
-
"ko-KR-SunHiNeural",
|
| 325 |
-
"lo-LA-ChanthavongNeural",
|
| 326 |
-
"lo-LA-KeomanyNeural",
|
| 327 |
-
"lv-LV-EveritaNeural",
|
| 328 |
-
"lv-LV-NilsNeural",
|
| 329 |
-
"lt-LT-LeonasNeural",
|
| 330 |
-
"lt-LT-OnaNeural",
|
| 331 |
-
"mk-MK-AleksandarNeural",
|
| 332 |
-
"mk-MK-MarijaNeural",
|
| 333 |
-
"ms-MY-OsmanNeural",
|
| 334 |
-
"ms-MY-YasminNeural",
|
| 335 |
-
"ml-IN-MidhunNeural",
|
| 336 |
-
"ml-IN-SobhanaNeural",
|
| 337 |
-
"mt-MT-GraceNeural",
|
| 338 |
-
"mt-MT-JosephNeural",
|
| 339 |
-
"mr-IN-AarohiNeural",
|
| 340 |
-
"mr-IN-ManoharNeural",
|
| 341 |
-
"mn-MN-BataaNeural",
|
| 342 |
-
"mn-MN-YesuiNeural",
|
| 343 |
-
"ne-NP-HemkalaNeural",
|
| 344 |
-
"ne-NP-SagarNeural",
|
| 345 |
-
"nb-NO-FinnNeural",
|
| 346 |
-
"nb-NO-PernilleNeural",
|
| 347 |
-
"ps-AF-GulNawazNeural",
|
| 348 |
-
"ps-AF-LatifaNeural",
|
| 349 |
-
"fa-IR-DilaraNeural",
|
| 350 |
-
"fa-IR-FaridNeural",
|
| 351 |
-
"pl-PL-MarekNeural",
|
| 352 |
-
"pl-PL-ZofiaNeural",
|
| 353 |
-
"pt-BR-ThalitaNeural",
|
| 354 |
-
"pt-BR-AntonioNeural",
|
| 355 |
-
"pt-BR-FranciscaNeural",
|
| 356 |
-
"pt-PT-DuarteNeural",
|
| 357 |
-
"pt-PT-RaquelNeural",
|
| 358 |
-
"ro-RO-AlinaNeural",
|
| 359 |
-
"ro-RO-EmilNeural",
|
| 360 |
-
"ru-RU-DmitryNeural",
|
| 361 |
-
"ru-RU-SvetlanaNeural",
|
| 362 |
-
"sr-RS-NicholasNeural",
|
| 363 |
-
"sr-RS-SophieNeural",
|
| 364 |
-
"si-LK-SameeraNeural",
|
| 365 |
-
"si-LK-ThiliniNeural",
|
| 366 |
-
"sk-SK-LukasNeural",
|
| 367 |
-
"sk-SK-ViktoriaNeural",
|
| 368 |
-
"sl-SI-PetraNeural",
|
| 369 |
-
"sl-SI-RokNeural",
|
| 370 |
-
"so-SO-MuuseNeural",
|
| 371 |
-
"so-SO-UbaxNeural",
|
| 372 |
-
"es-AR-ElenaNeural",
|
| 373 |
-
"es-AR-TomasNeural",
|
| 374 |
-
"es-BO-MarceloNeural",
|
| 375 |
-
"es-BO-SofiaNeural",
|
| 376 |
-
"es-CL-CatalinaNeural",
|
| 377 |
-
"es-CL-LorenzoNeural",
|
| 378 |
-
"es-ES-XimenaNeural",
|
| 379 |
-
"es-CO-GonzaloNeural",
|
| 380 |
-
"es-CO-SalomeNeural",
|
| 381 |
-
"es-CR-JuanNeural",
|
| 382 |
-
"es-CR-MariaNeural",
|
| 383 |
-
"es-CU-BelkysNeural",
|
| 384 |
-
"es-CU-ManuelNeural",
|
| 385 |
-
"es-DO-EmilioNeural",
|
| 386 |
-
"es-DO-RamonaNeural",
|
| 387 |
-
"es-EC-AndreaNeural",
|
| 388 |
-
"es-EC-LuisNeural",
|
| 389 |
-
"es-SV-LorenaNeural",
|
| 390 |
-
"es-SV-RodrigoNeural",
|
| 391 |
-
"es-GQ-JavierNeural",
|
| 392 |
-
"es-GQ-TeresaNeural",
|
| 393 |
-
"es-GT-AndresNeural",
|
| 394 |
-
"es-GT-MartaNeural",
|
| 395 |
-
"es-HN-CarlosNeural",
|
| 396 |
-
"es-HN-KarlaNeural",
|
| 397 |
-
"es-MX-DaliaNeural",
|
| 398 |
-
"es-MX-JorgeNeural",
|
| 399 |
-
"es-NI-FedericoNeural",
|
| 400 |
-
"es-NI-YolandaNeural",
|
| 401 |
-
"es-PA-MargaritaNeural",
|
| 402 |
-
"es-PA-RobertoNeural",
|
| 403 |
-
"es-PY-MarioNeural",
|
| 404 |
-
"es-PY-TaniaNeural",
|
| 405 |
-
"es-PE-AlexNeural",
|
| 406 |
-
"es-PE-CamilaNeural",
|
| 407 |
-
"es-PR-KarinaNeural",
|
| 408 |
-
"es-PR-VictorNeural",
|
| 409 |
-
"es-ES-AlvaroNeural",
|
| 410 |
-
"es-ES-ElviraNeural",
|
| 411 |
-
"es-US-AlonsoNeural",
|
| 412 |
-
"es-US-PalomaNeural",
|
| 413 |
-
"es-UY-MateoNeural",
|
| 414 |
-
"es-UY-ValentinaNeural",
|
| 415 |
-
"es-VE-PaolaNeural",
|
| 416 |
-
"es-VE-SebastianNeural",
|
| 417 |
-
"su-ID-JajangNeural",
|
| 418 |
-
"su-ID-TutiNeural",
|
| 419 |
-
"sw-KE-RafikiNeural",
|
| 420 |
-
"sw-KE-ZuriNeural",
|
| 421 |
-
"sw-TZ-DaudiNeural",
|
| 422 |
-
"sw-TZ-RehemaNeural",
|
| 423 |
-
"sv-SE-MattiasNeural",
|
| 424 |
-
"sv-SE-SofieNeural",
|
| 425 |
-
"ta-IN-PallaviNeural",
|
| 426 |
-
"ta-IN-ValluvarNeural",
|
| 427 |
-
"ta-MY-KaniNeural",
|
| 428 |
-
"ta-MY-SuryaNeural",
|
| 429 |
-
"ta-SG-AnbuNeural",
|
| 430 |
-
"ta-SG-VenbaNeural",
|
| 431 |
-
"ta-LK-KumarNeural",
|
| 432 |
-
"ta-LK-SaranyaNeural",
|
| 433 |
-
"te-IN-MohanNeural",
|
| 434 |
-
"te-IN-ShrutiNeural",
|
| 435 |
-
"th-TH-NiwatNeural",
|
| 436 |
-
"th-TH-PremwadeeNeural",
|
| 437 |
-
"tr-TR-AhmetNeural",
|
| 438 |
-
"tr-TR-EmelNeural",
|
| 439 |
-
"uk-UA-OstapNeural",
|
| 440 |
-
"uk-UA-PolinaNeural",
|
| 441 |
-
"ur-IN-GulNeural",
|
| 442 |
-
"ur-IN-SalmanNeural",
|
| 443 |
-
"ur-PK-AsadNeural",
|
| 444 |
-
"ur-PK-UzmaNeural",
|
| 445 |
-
"uz-UZ-MadinaNeural",
|
| 446 |
-
"uz-UZ-SardorNeural",
|
| 447 |
-
"vi-VN-HoaiMyNeural",
|
| 448 |
-
"vi-VN-NamMinhNeural",
|
| 449 |
-
"cy-GB-AledNeural",
|
| 450 |
-
"cy-GB-NiaNeural",
|
| 451 |
-
"zu-ZA-ThandoNeural",
|
| 452 |
-
"zu-ZA-ThembaNeural"
|
| 453 |
-
],
|
| 454 |
-
"google_tts_voice": [
|
| 455 |
-
"af",
|
| 456 |
-
"am",
|
| 457 |
-
"ar",
|
| 458 |
-
"bg",
|
| 459 |
-
"bn",
|
| 460 |
-
"bs",
|
| 461 |
-
"ca",
|
| 462 |
-
"cs",
|
| 463 |
-
"cy",
|
| 464 |
-
"da",
|
| 465 |
-
"de",
|
| 466 |
-
"el",
|
| 467 |
-
"en",
|
| 468 |
-
"es",
|
| 469 |
-
"et",
|
| 470 |
-
"eu",
|
| 471 |
-
"fi",
|
| 472 |
-
"fr",
|
| 473 |
-
"fr-CA",
|
| 474 |
-
"gl",
|
| 475 |
-
"gu",
|
| 476 |
-
"ha",
|
| 477 |
-
"hi",
|
| 478 |
-
"hr",
|
| 479 |
-
"hu",
|
| 480 |
-
"id",
|
| 481 |
-
"is",
|
| 482 |
-
"it",
|
| 483 |
-
"iw",
|
| 484 |
-
"ja",
|
| 485 |
-
"jw",
|
| 486 |
-
"km",
|
| 487 |
-
"kn",
|
| 488 |
-
"ko",
|
| 489 |
-
"la",
|
| 490 |
-
"lt",
|
| 491 |
-
"lv",
|
| 492 |
-
"ml",
|
| 493 |
-
"mr",
|
| 494 |
-
"ms",
|
| 495 |
-
"my",
|
| 496 |
-
"ne",
|
| 497 |
-
"nl",
|
| 498 |
-
"no",
|
| 499 |
-
"pa",
|
| 500 |
-
"pl",
|
| 501 |
-
"pt",
|
| 502 |
-
"pt-PT",
|
| 503 |
-
"ro",
|
| 504 |
-
"ru",
|
| 505 |
-
"si",
|
| 506 |
-
"sk",
|
| 507 |
-
"sq",
|
| 508 |
-
"sr",
|
| 509 |
-
"su",
|
| 510 |
-
"sv",
|
| 511 |
-
"sw",
|
| 512 |
-
"ta",
|
| 513 |
-
"te",
|
| 514 |
-
"th",
|
| 515 |
-
"tl",
|
| 516 |
-
"tr",
|
| 517 |
-
"uk",
|
| 518 |
-
"ur",
|
| 519 |
-
"vi",
|
| 520 |
-
"yue",
|
| 521 |
-
"zh-CN",
|
| 522 |
-
"zh-TW",
|
| 523 |
-
"zh"
|
| 524 |
-
],
|
| 525 |
-
"fp16": false,
|
| 526 |
-
"editing_tab": true,
|
| 527 |
-
"inference_tab": true,
|
| 528 |
-
"create_and_training_tab": true,
|
| 529 |
-
"extra_tab": true,
|
| 530 |
-
"separator_tab": true,
|
| 531 |
-
"convert_tab": true,
|
| 532 |
-
"convert_with_whisper": true,
|
| 533 |
-
"tts_tab": true,
|
| 534 |
-
"effects_tab": true,
|
| 535 |
-
"quirk": true,
|
| 536 |
-
"create_dataset_tab": true,
|
| 537 |
-
"training_tab": true,
|
| 538 |
-
"fushion_tab": true,
|
| 539 |
-
"read_tab": true,
|
| 540 |
-
"onnx_tab": true,
|
| 541 |
-
"downloads_tab": true,
|
| 542 |
-
"f0_extractor_tab": true,
|
| 543 |
-
"settings_tab": true,
|
| 544 |
-
"report_bug_tab": false,
|
| 545 |
-
"font": "https://fonts.googleapis.com/css2?family=Roboto&display=swap",
|
| 546 |
-
"app_port": 7860,
|
| 547 |
-
"tensorboard_port": 6870,
|
| 548 |
-
"num_of_restart": 5,
|
| 549 |
-
"server_name": "0.0.0.0",
|
| 550 |
-
"app_show_error": true,
|
| 551 |
-
"delete_exists_file": false,
|
| 552 |
-
"audio_effects_path": "main/inference/audio_effects.py",
|
| 553 |
-
"convert_path": "main/inference/conversion/convert.py",
|
| 554 |
-
"separate_path": "main/inference/separator_music.py",
|
| 555 |
-
"create_dataset_path": "main/inference/create_dataset.py",
|
| 556 |
-
"preprocess_path": "main/inference/preprocess/preprocess.py",
|
| 557 |
-
"extract_path": "main/inference/extracting/extract.py",
|
| 558 |
-
"create_index_path": "main/inference/create_index.py",
|
| 559 |
-
"train_path": "main/inference/training/train.py",
|
| 560 |
-
"ico_path": "assets/ico.png",
|
| 561 |
-
"csv_path": "assets/spreadsheet.csv",
|
| 562 |
-
"weights_path": "assets/weights",
|
| 563 |
-
"logs_path": "assets/logs",
|
| 564 |
-
"binary_path": "assets/binary",
|
| 565 |
-
"f0_path": "assets/f0",
|
| 566 |
-
"language_path": "assets/languages",
|
| 567 |
-
"presets_path": "assets/presets",
|
| 568 |
-
"embedders_path": "assets/models/embedders",
|
| 569 |
-
"predictors_path": "assets/models/predictors",
|
| 570 |
-
"pretrained_custom_path": "assets/models/pretrained_custom",
|
| 571 |
-
"pretrained_v1_path": "assets/models/pretrained_v1",
|
| 572 |
-
"pretrained_v2_path": "assets/models/pretrained_v2",
|
| 573 |
-
"speaker_diarization_path": "assets/models/speaker_diarization",
|
| 574 |
-
"uvr5_path": "assets/models/uvr5",
|
| 575 |
-
"audios_path": "audios",
|
| 576 |
-
"demucs_segments_enable": true,
|
| 577 |
-
"demucs_cpu_mode": false,
|
| 578 |
-
"limit_f0": 8,
|
| 579 |
-
"debug_mode": false,
|
| 580 |
-
"pretrain_verify_shape": true,
|
| 581 |
-
"pretrain_strict": true,
|
| 582 |
-
"cpu_mode": false,
|
| 583 |
-
"brain": false
|
| 584 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/config.py
DELETED
|
@@ -1,101 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import torch
|
| 5 |
-
|
| 6 |
-
sys.path.append(os.getcwd())
|
| 7 |
-
|
| 8 |
-
from main.library import opencl
|
| 9 |
-
|
| 10 |
-
version_config_paths = [os.path.join(version, size) for version in ["v1", "v2"] for size in ["32000.json", "40000.json", "48000.json"]]
|
| 11 |
-
|
| 12 |
-
def singleton(cls):
|
| 13 |
-
instances = {}
|
| 14 |
-
|
| 15 |
-
def get_instance(*args, **kwargs):
|
| 16 |
-
if cls not in instances: instances[cls] = cls(*args, **kwargs)
|
| 17 |
-
return instances[cls]
|
| 18 |
-
|
| 19 |
-
return get_instance
|
| 20 |
-
|
| 21 |
-
@singleton
|
| 22 |
-
class Config:
|
| 23 |
-
def __init__(self):
|
| 24 |
-
self.device = "cuda:0" if torch.cuda.is_available() else ("ocl:0" if opencl.is_available() else "cpu")
|
| 25 |
-
self.configs_path = os.path.join("main", "configs", "config.json")
|
| 26 |
-
self.configs = json.load(open(self.configs_path, "r"))
|
| 27 |
-
self.translations = self.multi_language()
|
| 28 |
-
self.json_config = self.load_config_json()
|
| 29 |
-
self.gpu_mem = None
|
| 30 |
-
self.per_preprocess = 3.7
|
| 31 |
-
self.is_half = self.is_fp16()
|
| 32 |
-
self.brain = self.configs.get("brain", False)
|
| 33 |
-
self.cpu_mode = self.configs.get("cpu_mode", False)
|
| 34 |
-
if self.cpu_mode: self.device = "cpu"
|
| 35 |
-
self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
|
| 36 |
-
self.debug_mode = self.configs.get("debug_mode", False)
|
| 37 |
-
|
| 38 |
-
def multi_language(self):
|
| 39 |
-
try:
|
| 40 |
-
lang = self.configs.get("language", "vi-VN")
|
| 41 |
-
if len([l for l in os.listdir(self.configs["language_path"]) if l.endswith(".json")]) < 1: raise FileNotFoundError("Không tìm thấy bất cứ gói ngôn ngữ nào(No package languages found)")
|
| 42 |
-
|
| 43 |
-
if not lang: lang = "vi-VN"
|
| 44 |
-
if lang not in self.configs["support_language"]: raise ValueError("Ngôn ngữ không được hỗ trợ(Language not supported)")
|
| 45 |
-
|
| 46 |
-
lang_path = os.path.join(self.configs["language_path"], f"{lang}.json")
|
| 47 |
-
if not os.path.exists(lang_path): lang_path = os.path.join(self.configs["language_path"], "vi-VN.json")
|
| 48 |
-
|
| 49 |
-
with open(lang_path, encoding="utf-8") as f:
|
| 50 |
-
translations = json.load(f)
|
| 51 |
-
except json.JSONDecodeError:
|
| 52 |
-
print(self.translations["empty_json"].format(file=lang))
|
| 53 |
-
pass
|
| 54 |
-
|
| 55 |
-
return translations
|
| 56 |
-
|
| 57 |
-
def is_fp16(self):
|
| 58 |
-
fp16 = self.configs.get("fp16", False)
|
| 59 |
-
|
| 60 |
-
if self.device in ["cpu", "mps"] and fp16:
|
| 61 |
-
self.configs["fp16"] = False
|
| 62 |
-
fp16 = False
|
| 63 |
-
|
| 64 |
-
with open(self.configs_path, "w") as f:
|
| 65 |
-
json.dump(self.configs, f, indent=4)
|
| 66 |
-
|
| 67 |
-
if not fp16: self.preprocess_per = 3.0
|
| 68 |
-
return fp16
|
| 69 |
-
|
| 70 |
-
def load_config_json(self):
|
| 71 |
-
configs = {}
|
| 72 |
-
|
| 73 |
-
for config_file in version_config_paths:
|
| 74 |
-
try:
|
| 75 |
-
with open(os.path.join("main", "configs", config_file), "r") as f:
|
| 76 |
-
configs[config_file] = json.load(f)
|
| 77 |
-
except json.JSONDecodeError:
|
| 78 |
-
print(self.translations["empty_json"].format(file=config_file))
|
| 79 |
-
pass
|
| 80 |
-
|
| 81 |
-
return configs
|
| 82 |
-
|
| 83 |
-
def device_config(self):
|
| 84 |
-
if not self.cpu_mode:
|
| 85 |
-
if self.device.startswith("cuda"): self.set_cuda_config()
|
| 86 |
-
elif opencl.is_available(): self.device = "ocl:0"
|
| 87 |
-
elif self.has_mps(): self.device = "mps"
|
| 88 |
-
else: self.device = "cpu"
|
| 89 |
-
|
| 90 |
-
if self.gpu_mem is not None and self.gpu_mem <= 4:
|
| 91 |
-
self.preprocess_per = 3.0
|
| 92 |
-
return 1, 5, 30, 32
|
| 93 |
-
|
| 94 |
-
return (3, 10, 60, 65) if self.is_half else (1, 6, 38, 41)
|
| 95 |
-
|
| 96 |
-
def set_cuda_config(self):
|
| 97 |
-
i_device = int(self.device.split(":")[-1])
|
| 98 |
-
self.gpu_mem = torch.cuda.get_device_properties(i_device).total_memory // (1024**3)
|
| 99 |
-
|
| 100 |
-
def has_mps(self):
|
| 101 |
-
return torch.backends.mps.is_available()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v1/32000.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"epochs": 20000,
|
| 6 |
-
"learning_rate": 0.0001,
|
| 7 |
-
"betas": [0.8, 0.99],
|
| 8 |
-
"eps": 1e-09,
|
| 9 |
-
"batch_size": 4,
|
| 10 |
-
"lr_decay": 0.999875,
|
| 11 |
-
"segment_size": 12800,
|
| 12 |
-
"init_lr_ratio": 1,
|
| 13 |
-
"warmup_epochs": 0,
|
| 14 |
-
"c_mel": 45,
|
| 15 |
-
"c_kl": 1.0
|
| 16 |
-
},
|
| 17 |
-
"data": {
|
| 18 |
-
"max_wav_value": 32768.0,
|
| 19 |
-
"sample_rate": 32000,
|
| 20 |
-
"filter_length": 1024,
|
| 21 |
-
"hop_length": 320,
|
| 22 |
-
"win_length": 1024,
|
| 23 |
-
"n_mel_channels": 80,
|
| 24 |
-
"mel_fmin": 0.0,
|
| 25 |
-
"mel_fmax": null
|
| 26 |
-
},
|
| 27 |
-
"model": {
|
| 28 |
-
"inter_channels": 192,
|
| 29 |
-
"hidden_channels": 192,
|
| 30 |
-
"filter_channels": 768,
|
| 31 |
-
"text_enc_hidden_dim": 256,
|
| 32 |
-
"n_heads": 2,
|
| 33 |
-
"n_layers": 6,
|
| 34 |
-
"kernel_size": 3,
|
| 35 |
-
"p_dropout": 0,
|
| 36 |
-
"resblock": "1",
|
| 37 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 38 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 39 |
-
"upsample_rates": [10, 4, 2, 2, 2],
|
| 40 |
-
"upsample_initial_channel": 512,
|
| 41 |
-
"upsample_kernel_sizes": [16, 16, 4, 4, 4],
|
| 42 |
-
"use_spectral_norm": false,
|
| 43 |
-
"gin_channels": 256,
|
| 44 |
-
"spk_embed_dim": 109
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v1/40000.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"epochs": 20000,
|
| 6 |
-
"learning_rate": 0.0001,
|
| 7 |
-
"betas": [0.8, 0.99],
|
| 8 |
-
"eps": 1e-09,
|
| 9 |
-
"batch_size": 4,
|
| 10 |
-
"lr_decay": 0.999875,
|
| 11 |
-
"segment_size": 12800,
|
| 12 |
-
"init_lr_ratio": 1,
|
| 13 |
-
"warmup_epochs": 0,
|
| 14 |
-
"c_mel": 45,
|
| 15 |
-
"c_kl": 1.0
|
| 16 |
-
},
|
| 17 |
-
"data": {
|
| 18 |
-
"max_wav_value": 32768.0,
|
| 19 |
-
"sample_rate": 40000,
|
| 20 |
-
"filter_length": 2048,
|
| 21 |
-
"hop_length": 400,
|
| 22 |
-
"win_length": 2048,
|
| 23 |
-
"n_mel_channels": 125,
|
| 24 |
-
"mel_fmin": 0.0,
|
| 25 |
-
"mel_fmax": null
|
| 26 |
-
},
|
| 27 |
-
"model": {
|
| 28 |
-
"inter_channels": 192,
|
| 29 |
-
"hidden_channels": 192,
|
| 30 |
-
"filter_channels": 768,
|
| 31 |
-
"text_enc_hidden_dim": 256,
|
| 32 |
-
"n_heads": 2,
|
| 33 |
-
"n_layers": 6,
|
| 34 |
-
"kernel_size": 3,
|
| 35 |
-
"p_dropout": 0,
|
| 36 |
-
"resblock": "1",
|
| 37 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 38 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 39 |
-
"upsample_rates": [10, 10, 2, 2],
|
| 40 |
-
"upsample_initial_channel": 512,
|
| 41 |
-
"upsample_kernel_sizes": [16, 16, 4, 4],
|
| 42 |
-
"use_spectral_norm": false,
|
| 43 |
-
"gin_channels": 256,
|
| 44 |
-
"spk_embed_dim": 109
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v1/48000.json
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"epochs": 20000,
|
| 6 |
-
"learning_rate": 0.0001,
|
| 7 |
-
"betas": [0.8, 0.99],
|
| 8 |
-
"eps": 1e-09,
|
| 9 |
-
"batch_size": 4,
|
| 10 |
-
"lr_decay": 0.999875,
|
| 11 |
-
"segment_size": 11520,
|
| 12 |
-
"init_lr_ratio": 1,
|
| 13 |
-
"warmup_epochs": 0,
|
| 14 |
-
"c_mel": 45,
|
| 15 |
-
"c_kl": 1.0
|
| 16 |
-
},
|
| 17 |
-
"data": {
|
| 18 |
-
"max_wav_value": 32768.0,
|
| 19 |
-
"sample_rate": 48000,
|
| 20 |
-
"filter_length": 2048,
|
| 21 |
-
"hop_length": 480,
|
| 22 |
-
"win_length": 2048,
|
| 23 |
-
"n_mel_channels": 128,
|
| 24 |
-
"mel_fmin": 0.0,
|
| 25 |
-
"mel_fmax": null
|
| 26 |
-
},
|
| 27 |
-
"model": {
|
| 28 |
-
"inter_channels": 192,
|
| 29 |
-
"hidden_channels": 192,
|
| 30 |
-
"filter_channels": 768,
|
| 31 |
-
"text_enc_hidden_dim": 256,
|
| 32 |
-
"n_heads": 2,
|
| 33 |
-
"n_layers": 6,
|
| 34 |
-
"kernel_size": 3,
|
| 35 |
-
"p_dropout": 0,
|
| 36 |
-
"resblock": "1",
|
| 37 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 38 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 39 |
-
"upsample_rates": [10, 6, 2, 2, 2],
|
| 40 |
-
"upsample_initial_channel": 512,
|
| 41 |
-
"upsample_kernel_sizes": [16, 16, 4, 4, 4],
|
| 42 |
-
"use_spectral_norm": false,
|
| 43 |
-
"gin_channels": 256,
|
| 44 |
-
"spk_embed_dim": 109
|
| 45 |
-
}
|
| 46 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v2/32000.json
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"learning_rate": 0.0001,
|
| 6 |
-
"betas": [0.8, 0.99],
|
| 7 |
-
"eps": 1e-09,
|
| 8 |
-
"lr_decay": 0.999875,
|
| 9 |
-
"segment_size": 12800,
|
| 10 |
-
"c_mel": 45,
|
| 11 |
-
"c_kl": 1.0
|
| 12 |
-
},
|
| 13 |
-
"data": {
|
| 14 |
-
"max_wav_value": 32768.0,
|
| 15 |
-
"sample_rate": 32000,
|
| 16 |
-
"filter_length": 1024,
|
| 17 |
-
"hop_length": 320,
|
| 18 |
-
"win_length": 1024,
|
| 19 |
-
"n_mel_channels": 80,
|
| 20 |
-
"mel_fmin": 0.0,
|
| 21 |
-
"mel_fmax": null
|
| 22 |
-
},
|
| 23 |
-
"model": {
|
| 24 |
-
"inter_channels": 192,
|
| 25 |
-
"hidden_channels": 192,
|
| 26 |
-
"filter_channels": 768,
|
| 27 |
-
"text_enc_hidden_dim": 768,
|
| 28 |
-
"n_heads": 2,
|
| 29 |
-
"n_layers": 6,
|
| 30 |
-
"kernel_size": 3,
|
| 31 |
-
"p_dropout": 0,
|
| 32 |
-
"resblock": "1",
|
| 33 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 34 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 35 |
-
"upsample_rates": [10, 8, 2, 2],
|
| 36 |
-
"upsample_initial_channel": 512,
|
| 37 |
-
"upsample_kernel_sizes": [20, 16, 4, 4],
|
| 38 |
-
"use_spectral_norm": false,
|
| 39 |
-
"gin_channels": 256,
|
| 40 |
-
"spk_embed_dim": 109
|
| 41 |
-
}
|
| 42 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v2/40000.json
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"learning_rate": 0.0001,
|
| 6 |
-
"betas": [0.8, 0.99],
|
| 7 |
-
"eps": 1e-09,
|
| 8 |
-
"lr_decay": 0.999875,
|
| 9 |
-
"segment_size": 12800,
|
| 10 |
-
"c_mel": 45,
|
| 11 |
-
"c_kl": 1.0
|
| 12 |
-
},
|
| 13 |
-
"data": {
|
| 14 |
-
"max_wav_value": 32768.0,
|
| 15 |
-
"sample_rate": 40000,
|
| 16 |
-
"filter_length": 2048,
|
| 17 |
-
"hop_length": 400,
|
| 18 |
-
"win_length": 2048,
|
| 19 |
-
"n_mel_channels": 125,
|
| 20 |
-
"mel_fmin": 0.0,
|
| 21 |
-
"mel_fmax": null
|
| 22 |
-
},
|
| 23 |
-
"model": {
|
| 24 |
-
"inter_channels": 192,
|
| 25 |
-
"hidden_channels": 192,
|
| 26 |
-
"filter_channels": 768,
|
| 27 |
-
"text_enc_hidden_dim": 768,
|
| 28 |
-
"n_heads": 2,
|
| 29 |
-
"n_layers": 6,
|
| 30 |
-
"kernel_size": 3,
|
| 31 |
-
"p_dropout": 0,
|
| 32 |
-
"resblock": "1",
|
| 33 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 34 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 35 |
-
"upsample_rates": [10, 10, 2, 2],
|
| 36 |
-
"upsample_initial_channel": 512,
|
| 37 |
-
"upsample_kernel_sizes": [16, 16, 4, 4],
|
| 38 |
-
"use_spectral_norm": false,
|
| 39 |
-
"gin_channels": 256,
|
| 40 |
-
"spk_embed_dim": 109
|
| 41 |
-
}
|
| 42 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/configs/v2/48000.json
DELETED
|
@@ -1,42 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"train": {
|
| 3 |
-
"log_interval": 200,
|
| 4 |
-
"seed": 1234,
|
| 5 |
-
"learning_rate": 0.0001,
|
| 6 |
-
"betas": [0.8, 0.99],
|
| 7 |
-
"eps": 1e-09,
|
| 8 |
-
"lr_decay": 0.999875,
|
| 9 |
-
"segment_size": 17280,
|
| 10 |
-
"c_mel": 45,
|
| 11 |
-
"c_kl": 1.0
|
| 12 |
-
},
|
| 13 |
-
"data": {
|
| 14 |
-
"max_wav_value": 32768.0,
|
| 15 |
-
"sample_rate": 48000,
|
| 16 |
-
"filter_length": 2048,
|
| 17 |
-
"hop_length": 480,
|
| 18 |
-
"win_length": 2048,
|
| 19 |
-
"n_mel_channels": 128,
|
| 20 |
-
"mel_fmin": 0.0,
|
| 21 |
-
"mel_fmax": null
|
| 22 |
-
},
|
| 23 |
-
"model": {
|
| 24 |
-
"inter_channels": 192,
|
| 25 |
-
"hidden_channels": 192,
|
| 26 |
-
"filter_channels": 768,
|
| 27 |
-
"text_enc_hidden_dim": 768,
|
| 28 |
-
"n_heads": 2,
|
| 29 |
-
"n_layers": 6,
|
| 30 |
-
"kernel_size": 3,
|
| 31 |
-
"p_dropout": 0,
|
| 32 |
-
"resblock": "1",
|
| 33 |
-
"resblock_kernel_sizes": [3, 7, 11],
|
| 34 |
-
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
| 35 |
-
"upsample_rates": [12, 10, 2, 2],
|
| 36 |
-
"upsample_initial_channel": 512,
|
| 37 |
-
"upsample_kernel_sizes": [24, 20, 4, 4],
|
| 38 |
-
"use_spectral_norm": false,
|
| 39 |
-
"gin_channels": 256,
|
| 40 |
-
"spk_embed_dim": 109
|
| 41 |
-
}
|
| 42 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/inference/audio_effects.py
DELETED
|
@@ -1,185 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import librosa
|
| 4 |
-
import argparse
|
| 5 |
-
|
| 6 |
-
import numpy as np
|
| 7 |
-
import soundfile as sf
|
| 8 |
-
|
| 9 |
-
from distutils.util import strtobool
|
| 10 |
-
from scipy.signal import butter, filtfilt
|
| 11 |
-
from pedalboard import Pedalboard, Chorus, Distortion, Reverb, PitchShift, Delay, Limiter, Gain, Bitcrush, Clipping, Compressor, Phaser, HighpassFilter
|
| 12 |
-
|
| 13 |
-
sys.path.append(os.getcwd())
|
| 14 |
-
|
| 15 |
-
from main.library.utils import pydub_load
|
| 16 |
-
from main.app.variables import translations, logger
|
| 17 |
-
|
| 18 |
-
def parse_arguments():
|
| 19 |
-
parser = argparse.ArgumentParser()
|
| 20 |
-
parser.add_argument("--audio_effects", action='store_true')
|
| 21 |
-
parser.add_argument("--input_path", type=str, required=True)
|
| 22 |
-
parser.add_argument("--output_path", type=str, default="./audios/apply_effects.wav")
|
| 23 |
-
parser.add_argument("--export_format", type=str, default="wav")
|
| 24 |
-
parser.add_argument("--resample", type=lambda x: bool(strtobool(x)), default=False)
|
| 25 |
-
parser.add_argument("--resample_sr", type=int, default=0)
|
| 26 |
-
parser.add_argument("--chorus", type=lambda x: bool(strtobool(x)), default=False)
|
| 27 |
-
parser.add_argument("--chorus_depth", type=float, default=0.5)
|
| 28 |
-
parser.add_argument("--chorus_rate", type=float, default=1.5)
|
| 29 |
-
parser.add_argument("--chorus_mix", type=float, default=0.5)
|
| 30 |
-
parser.add_argument("--chorus_delay", type=int, default=10)
|
| 31 |
-
parser.add_argument("--chorus_feedback", type=float, default=0)
|
| 32 |
-
parser.add_argument("--distortion", type=lambda x: bool(strtobool(x)), default=False)
|
| 33 |
-
parser.add_argument("--drive_db", type=int, default=20)
|
| 34 |
-
parser.add_argument("--reverb", type=lambda x: bool(strtobool(x)), default=False)
|
| 35 |
-
parser.add_argument("--reverb_room_size", type=float, default=0.5)
|
| 36 |
-
parser.add_argument("--reverb_damping", type=float, default=0.5)
|
| 37 |
-
parser.add_argument("--reverb_wet_level", type=float, default=0.33)
|
| 38 |
-
parser.add_argument("--reverb_dry_level", type=float, default=0.67)
|
| 39 |
-
parser.add_argument("--reverb_width", type=float, default=1)
|
| 40 |
-
parser.add_argument("--reverb_freeze_mode", type=lambda x: bool(strtobool(x)), default=False)
|
| 41 |
-
parser.add_argument("--pitchshift", type=lambda x: bool(strtobool(x)), default=False)
|
| 42 |
-
parser.add_argument("--pitch_shift", type=int, default=0)
|
| 43 |
-
parser.add_argument("--delay", type=lambda x: bool(strtobool(x)), default=False)
|
| 44 |
-
parser.add_argument("--delay_seconds", type=float, default=0.5)
|
| 45 |
-
parser.add_argument("--delay_feedback", type=float, default=0.5)
|
| 46 |
-
parser.add_argument("--delay_mix", type=float, default=0.5)
|
| 47 |
-
parser.add_argument("--compressor", type=lambda x: bool(strtobool(x)), default=False)
|
| 48 |
-
parser.add_argument("--compressor_threshold", type=int, default=-20)
|
| 49 |
-
parser.add_argument("--compressor_ratio", type=float, default=4)
|
| 50 |
-
parser.add_argument("--compressor_attack_ms", type=float, default=10)
|
| 51 |
-
parser.add_argument("--compressor_release_ms", type=int, default=200)
|
| 52 |
-
parser.add_argument("--limiter", type=lambda x: bool(strtobool(x)), default=False)
|
| 53 |
-
parser.add_argument("--limiter_threshold", type=int, default=0)
|
| 54 |
-
parser.add_argument("--limiter_release", type=int, default=100)
|
| 55 |
-
parser.add_argument("--gain", type=lambda x: bool(strtobool(x)), default=False)
|
| 56 |
-
parser.add_argument("--gain_db", type=int, default=0)
|
| 57 |
-
parser.add_argument("--bitcrush", type=lambda x: bool(strtobool(x)), default=False)
|
| 58 |
-
parser.add_argument("--bitcrush_bit_depth", type=int, default=16)
|
| 59 |
-
parser.add_argument("--clipping", type=lambda x: bool(strtobool(x)), default=False)
|
| 60 |
-
parser.add_argument("--clipping_threshold", type=int, default=-10)
|
| 61 |
-
parser.add_argument("--phaser", type=lambda x: bool(strtobool(x)), default=False)
|
| 62 |
-
parser.add_argument("--phaser_rate_hz", type=float, default=0.5)
|
| 63 |
-
parser.add_argument("--phaser_depth", type=float, default=0.5)
|
| 64 |
-
parser.add_argument("--phaser_centre_frequency_hz", type=int, default=1000)
|
| 65 |
-
parser.add_argument("--phaser_feedback", type=float, default=0)
|
| 66 |
-
parser.add_argument("--phaser_mix", type=float, default=0.5)
|
| 67 |
-
parser.add_argument("--treble_bass_boost", type=lambda x: bool(strtobool(x)), default=False)
|
| 68 |
-
parser.add_argument("--bass_boost_db", type=int, default=0)
|
| 69 |
-
parser.add_argument("--bass_boost_frequency", type=int, default=100)
|
| 70 |
-
parser.add_argument("--treble_boost_db", type=int, default=0)
|
| 71 |
-
parser.add_argument("--treble_boost_frequency", type=int, default=3000)
|
| 72 |
-
parser.add_argument("--fade_in_out", type=lambda x: bool(strtobool(x)), default=False)
|
| 73 |
-
parser.add_argument("--fade_in_duration", type=float, default=2000)
|
| 74 |
-
parser.add_argument("--fade_out_duration", type=float, default=2000)
|
| 75 |
-
parser.add_argument("--audio_combination", type=lambda x: bool(strtobool(x)), default=False)
|
| 76 |
-
parser.add_argument("--audio_combination_input", type=str)
|
| 77 |
-
parser.add_argument("--main_volume", type=int, default=0)
|
| 78 |
-
parser.add_argument("--combination_volume", type=int, default=-7)
|
| 79 |
-
|
| 80 |
-
return parser.parse_args()
|
| 81 |
-
|
| 82 |
-
def process_audio(input_path, output_path, resample, resample_sr, chorus_depth, chorus_rate, chorus_mix, chorus_delay, chorus_feedback, distortion_drive, reverb_room_size, reverb_damping, reverb_wet_level, reverb_dry_level, reverb_width, reverb_freeze_mode, pitch_shift, delay_seconds, delay_feedback, delay_mix, compressor_threshold, compressor_ratio, compressor_attack_ms, compressor_release_ms, limiter_threshold, limiter_release, gain_db, bitcrush_bit_depth, clipping_threshold, phaser_rate_hz, phaser_depth, phaser_centre_frequency_hz, phaser_feedback, phaser_mix, bass_boost_db, bass_boost_frequency, treble_boost_db, treble_boost_frequency, fade_in_duration, fade_out_duration, export_format, chorus, distortion, reverb, pitchshift, delay, compressor, limiter, gain, bitcrush, clipping, phaser, treble_bass_boost, fade_in_out, audio_combination, audio_combination_input, main_volume, combination_volume):
|
| 83 |
-
def _filtfilt(b, a, audio):
|
| 84 |
-
padlen = 3 * max(len(a), len(b))
|
| 85 |
-
original_len = len(audio)
|
| 86 |
-
|
| 87 |
-
if original_len <= padlen:
|
| 88 |
-
pad_width = padlen - original_len + 1
|
| 89 |
-
audio = np.pad(audio, (pad_width, 0), mode='reflect')
|
| 90 |
-
|
| 91 |
-
filtered = filtfilt(b, a, audio, padlen=0)
|
| 92 |
-
return filtered[-original_len:]
|
| 93 |
-
|
| 94 |
-
def bass_boost(audio, gain_db, frequency, sample_rate):
|
| 95 |
-
if gain_db >= 1:
|
| 96 |
-
b, a = butter(4, frequency / (0.5 * sample_rate), btype='low')
|
| 97 |
-
boosted = _filtfilt(b, a, audio)
|
| 98 |
-
return boosted * (10 ** (gain_db / 20))
|
| 99 |
-
return audio
|
| 100 |
-
|
| 101 |
-
def treble_boost(audio, gain_db, frequency, sample_rate):
|
| 102 |
-
if gain_db >= 1:
|
| 103 |
-
b, a = butter(4, frequency / (0.5 * sample_rate), btype='high')
|
| 104 |
-
boosted = _filtfilt(b, a, audio)
|
| 105 |
-
return boosted * (10 ** (gain_db / 20))
|
| 106 |
-
return audio
|
| 107 |
-
|
| 108 |
-
def fade_out_effect(audio, sr, duration=3.0):
|
| 109 |
-
length = int(duration * sr)
|
| 110 |
-
end = audio.shape[0]
|
| 111 |
-
if length > end: length = end
|
| 112 |
-
start = end - length
|
| 113 |
-
audio[start:end] = audio[start:end] * np.linspace(1.0, 0.0, length)
|
| 114 |
-
return audio
|
| 115 |
-
|
| 116 |
-
def fade_in_effect(audio, sr, duration=3.0):
|
| 117 |
-
length = int(duration * sr)
|
| 118 |
-
start = 0
|
| 119 |
-
if length > audio.shape[0]: length = audio.shape[0]
|
| 120 |
-
end = length
|
| 121 |
-
audio[start:end] = audio[start:end] * np.linspace(0.0, 1.0, length)
|
| 122 |
-
return audio
|
| 123 |
-
|
| 124 |
-
if not input_path or not os.path.exists(input_path):
|
| 125 |
-
logger.warning(translations["input_not_valid"])
|
| 126 |
-
sys.exit(1)
|
| 127 |
-
|
| 128 |
-
if not output_path:
|
| 129 |
-
logger.warning(translations["output_not_valid"])
|
| 130 |
-
sys.exit(1)
|
| 131 |
-
|
| 132 |
-
if os.path.exists(output_path): os.remove(output_path)
|
| 133 |
-
|
| 134 |
-
try:
|
| 135 |
-
input_path = input_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
| 136 |
-
try:
|
| 137 |
-
audio, sample_rate = sf.read(input_path, dtype=np.float32)
|
| 138 |
-
except:
|
| 139 |
-
audio, sample_rate = librosa.load(input_path, sr=None)
|
| 140 |
-
except Exception as e:
|
| 141 |
-
logger.debug(f"{translations['errors_loading_audio']}: {e}")
|
| 142 |
-
raise RuntimeError(f"{translations['errors_loading_audio']}: {e}")
|
| 143 |
-
|
| 144 |
-
try:
|
| 145 |
-
board = Pedalboard([HighpassFilter()])
|
| 146 |
-
|
| 147 |
-
if chorus: board.append(Chorus(depth=chorus_depth, rate_hz=chorus_rate, mix=chorus_mix, centre_delay_ms=chorus_delay, feedback=chorus_feedback))
|
| 148 |
-
if distortion: board.append(Distortion(drive_db=distortion_drive))
|
| 149 |
-
if reverb: board.append(Reverb(room_size=reverb_room_size, damping=reverb_damping, wet_level=reverb_wet_level, dry_level=reverb_dry_level, width=reverb_width, freeze_mode=1 if reverb_freeze_mode else 0))
|
| 150 |
-
if pitchshift: board.append(PitchShift(semitones=pitch_shift))
|
| 151 |
-
if delay: board.append(Delay(delay_seconds=delay_seconds, feedback=delay_feedback, mix=delay_mix))
|
| 152 |
-
if compressor: board.append(Compressor(threshold_db=compressor_threshold, ratio=compressor_ratio, attack_ms=compressor_attack_ms, release_ms=compressor_release_ms))
|
| 153 |
-
if limiter: board.append(Limiter(threshold_db=limiter_threshold, release_ms=limiter_release))
|
| 154 |
-
if gain: board.append(Gain(gain_db=gain_db))
|
| 155 |
-
if bitcrush: board.append(Bitcrush(bit_depth=bitcrush_bit_depth))
|
| 156 |
-
if clipping: board.append(Clipping(threshold_db=clipping_threshold))
|
| 157 |
-
if phaser: board.append(Phaser(rate_hz=phaser_rate_hz, depth=phaser_depth, centre_frequency_hz=phaser_centre_frequency_hz, feedback=phaser_feedback, mix=phaser_mix))
|
| 158 |
-
|
| 159 |
-
processed_audio = board(audio, sample_rate)
|
| 160 |
-
|
| 161 |
-
if treble_bass_boost:
|
| 162 |
-
processed_audio = bass_boost(processed_audio, bass_boost_db, bass_boost_frequency, sample_rate)
|
| 163 |
-
processed_audio = treble_boost(processed_audio, treble_boost_db, treble_boost_frequency, sample_rate)
|
| 164 |
-
|
| 165 |
-
if fade_in_out:
|
| 166 |
-
processed_audio = fade_in_effect(processed_audio, sample_rate, fade_in_duration)
|
| 167 |
-
processed_audio = fade_out_effect(processed_audio, sample_rate, fade_out_duration)
|
| 168 |
-
|
| 169 |
-
if resample and resample_sr != sample_rate and resample_sr > 0:
|
| 170 |
-
processed_audio = librosa.resample(processed_audio, orig_sr=sample_rate, target_sr=resample_sr, res_type="soxr_vhq")
|
| 171 |
-
sample_rate = resample_sr
|
| 172 |
-
|
| 173 |
-
sf.write(output_path.replace("wav", export_format), processed_audio, sample_rate, format=export_format)
|
| 174 |
-
if audio_combination: pydub_load(audio_combination_input, combination_volume).overlay(pydub_load(output_path.replace("wav", export_format), main_volume)).export(output_path.replace("wav", export_format), format=export_format)
|
| 175 |
-
except Exception as e:
|
| 176 |
-
import traceback
|
| 177 |
-
logger.debug(traceback.format_exc())
|
| 178 |
-
raise RuntimeError(translations["apply_error"].format(e=e))
|
| 179 |
-
return output_path
|
| 180 |
-
|
| 181 |
-
def main():
|
| 182 |
-
args = parse_arguments()
|
| 183 |
-
process_audio(input_path=args.input_path, output_path=args.output_path, resample=args.resample, resample_sr=args.resample_sr, chorus_depth=args.chorus_depth, chorus_rate=args.chorus_rate, chorus_mix=args.chorus_mix, chorus_delay=args.chorus_delay, chorus_feedback=args.chorus_feedback, distortion_drive=args.drive_db, reverb_room_size=args.reverb_room_size, reverb_damping=args.reverb_damping, reverb_wet_level=args.reverb_wet_level, reverb_dry_level=args.reverb_dry_level, reverb_width=args.reverb_width, reverb_freeze_mode=args.reverb_freeze_mode, pitch_shift=args.pitch_shift, delay_seconds=args.delay_seconds, delay_feedback=args.delay_feedback, delay_mix=args.delay_mix, compressor_threshold=args.compressor_threshold, compressor_ratio=args.compressor_ratio, compressor_attack_ms=args.compressor_attack_ms, compressor_release_ms=args.compressor_release_ms, limiter_threshold=args.limiter_threshold, limiter_release=args.limiter_release, gain_db=args.gain_db, bitcrush_bit_depth=args.bitcrush_bit_depth, clipping_threshold=args.clipping_threshold, phaser_rate_hz=args.phaser_rate_hz, phaser_depth=args.phaser_depth, phaser_centre_frequency_hz=args.phaser_centre_frequency_hz, phaser_feedback=args.phaser_feedback, phaser_mix=args.phaser_mix, bass_boost_db=args.bass_boost_db, bass_boost_frequency=args.bass_boost_frequency, treble_boost_db=args.treble_boost_db, treble_boost_frequency=args.treble_boost_frequency, fade_in_duration=args.fade_in_duration, fade_out_duration=args.fade_out_duration, export_format=args.export_format, chorus=args.chorus, distortion=args.distortion, reverb=args.reverb, pitchshift=args.pitchshift, delay=args.delay, compressor=args.compressor, limiter=args.limiter, gain=args.gain, bitcrush=args.bitcrush, clipping=args.clipping, phaser=args.phaser, treble_bass_boost=args.treble_bass_boost, fade_in_out=args.fade_in_out, audio_combination=args.audio_combination, audio_combination_input=args.audio_combination_input, main_volume=args.main_volume, combination_volume=args.combination_volume)
|
| 184 |
-
|
| 185 |
-
if __name__ == "__main__": main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/inference/conversion/convert.py
DELETED
|
@@ -1,300 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import json
|
| 4 |
-
import onnx
|
| 5 |
-
import time
|
| 6 |
-
import torch
|
| 7 |
-
import librosa
|
| 8 |
-
import logging
|
| 9 |
-
import argparse
|
| 10 |
-
import warnings
|
| 11 |
-
import onnxruntime
|
| 12 |
-
|
| 13 |
-
import numpy as np
|
| 14 |
-
import soundfile as sf
|
| 15 |
-
|
| 16 |
-
from tqdm import tqdm
|
| 17 |
-
from distutils.util import strtobool
|
| 18 |
-
|
| 19 |
-
warnings.filterwarnings("ignore")
|
| 20 |
-
sys.path.append(os.getcwd())
|
| 21 |
-
|
| 22 |
-
from main.inference.conversion.pipeline import Pipeline
|
| 23 |
-
from main.app.variables import config, logger, translations
|
| 24 |
-
from main.library.algorithm.synthesizers import Synthesizer
|
| 25 |
-
from main.inference.conversion.utils import clear_gpu_cache
|
| 26 |
-
from main.library.utils import check_assets, load_audio, load_embedders_model, cut, restore, get_providers
|
| 27 |
-
|
| 28 |
-
for l in ["torch", "faiss", "omegaconf", "httpx", "httpcore", "faiss.loader", "numba.core", "urllib3", "transformers", "matplotlib"]:
|
| 29 |
-
logging.getLogger(l).setLevel(logging.ERROR)
|
| 30 |
-
|
| 31 |
-
def parse_arguments():
|
| 32 |
-
parser = argparse.ArgumentParser()
|
| 33 |
-
parser.add_argument("--convert", action='store_true')
|
| 34 |
-
parser.add_argument("--pitch", type=int, default=0)
|
| 35 |
-
parser.add_argument("--filter_radius", type=int, default=3)
|
| 36 |
-
parser.add_argument("--index_rate", type=float, default=0.5)
|
| 37 |
-
parser.add_argument("--rms_mix_rate", type=float, default=1)
|
| 38 |
-
parser.add_argument("--protect", type=float, default=0.33)
|
| 39 |
-
parser.add_argument("--hop_length", type=int, default=64)
|
| 40 |
-
parser.add_argument("--f0_method", type=str, default="rmvpe")
|
| 41 |
-
parser.add_argument("--embedder_model", type=str, default="contentvec_base")
|
| 42 |
-
parser.add_argument("--input_path", type=str, required=True)
|
| 43 |
-
parser.add_argument("--output_path", type=str, default="./audios/output.wav")
|
| 44 |
-
parser.add_argument("--export_format", type=str, default="wav")
|
| 45 |
-
parser.add_argument("--pth_path", type=str, required=True)
|
| 46 |
-
parser.add_argument("--index_path", type=str, default="")
|
| 47 |
-
parser.add_argument("--f0_autotune", type=lambda x: bool(strtobool(x)), default=False)
|
| 48 |
-
parser.add_argument("--f0_autotune_strength", type=float, default=1)
|
| 49 |
-
parser.add_argument("--clean_audio", type=lambda x: bool(strtobool(x)), default=False)
|
| 50 |
-
parser.add_argument("--clean_strength", type=float, default=0.7)
|
| 51 |
-
parser.add_argument("--resample_sr", type=int, default=0)
|
| 52 |
-
parser.add_argument("--split_audio", type=lambda x: bool(strtobool(x)), default=False)
|
| 53 |
-
parser.add_argument("--checkpointing", type=lambda x: bool(strtobool(x)), default=False)
|
| 54 |
-
parser.add_argument("--f0_file", type=str, default="")
|
| 55 |
-
parser.add_argument("--f0_onnx", type=lambda x: bool(strtobool(x)), default=False)
|
| 56 |
-
parser.add_argument("--embedders_mode", type=str, default="fairseq")
|
| 57 |
-
parser.add_argument("--formant_shifting", type=lambda x: bool(strtobool(x)), default=False)
|
| 58 |
-
parser.add_argument("--formant_qfrency", type=float, default=0.8)
|
| 59 |
-
parser.add_argument("--formant_timbre", type=float, default=0.8)
|
| 60 |
-
parser.add_argument("--proposal_pitch", type=lambda x: bool(strtobool(x)), default=False)
|
| 61 |
-
parser.add_argument("--proposal_pitch_threshold", type=float, default=255.0)
|
| 62 |
-
|
| 63 |
-
return parser.parse_args()
|
| 64 |
-
|
| 65 |
-
def main():
|
| 66 |
-
args = parse_arguments()
|
| 67 |
-
pitch, filter_radius, index_rate, rms_mix_rate, protect, hop_length, f0_method, input_path, output_path, pth_path, index_path, f0_autotune, f0_autotune_strength, clean_audio, clean_strength, export_format, embedder_model, resample_sr, split_audio, checkpointing, f0_file, f0_onnx, embedders_mode, formant_shifting, formant_qfrency, formant_timbre, proposal_pitch, proposal_pitch_threshold = args.pitch, args.filter_radius, args.index_rate, args.rms_mix_rate,args.protect, args.hop_length, args.f0_method, args.input_path, args.output_path, args.pth_path, args.index_path, args.f0_autotune, args.f0_autotune_strength, args.clean_audio, args.clean_strength, args.export_format, args.embedder_model, args.resample_sr, args.split_audio, args.checkpointing, args.f0_file, args.f0_onnx, args.embedders_mode, args.formant_shifting, args.formant_qfrency, args.formant_timbre, args.proposal_pitch, args.proposal_pitch_threshold
|
| 68 |
-
|
| 69 |
-
run_convert_script(pitch=pitch, filter_radius=filter_radius, index_rate=index_rate, rms_mix_rate=rms_mix_rate, protect=protect, hop_length=hop_length, f0_method=f0_method, input_path=input_path, output_path=output_path, pth_path=pth_path, index_path=index_path, f0_autotune=f0_autotune, f0_autotune_strength=f0_autotune_strength, clean_audio=clean_audio, clean_strength=clean_strength, export_format=export_format, embedder_model=embedder_model, resample_sr=resample_sr, split_audio=split_audio, checkpointing=checkpointing, f0_file=f0_file, f0_onnx=f0_onnx, embedders_mode=embedders_mode, formant_shifting=formant_shifting, formant_qfrency=formant_qfrency, formant_timbre=formant_timbre, proposal_pitch=proposal_pitch, proposal_pitch_threshold=proposal_pitch_threshold)
|
| 70 |
-
|
| 71 |
-
def run_convert_script(pitch=0, filter_radius=3, index_rate=0.5, rms_mix_rate=1, protect=0.5, hop_length=64, f0_method="rmvpe", input_path=None, output_path="./output.wav", pth_path=None, index_path=None, f0_autotune=False, f0_autotune_strength=1, clean_audio=False, clean_strength=0.7, export_format="wav", embedder_model="contentvec_base", resample_sr=0, split_audio=False, checkpointing=False, f0_file=None, f0_onnx=False, embedders_mode="fairseq", formant_shifting=False, formant_qfrency=0.8, formant_timbre=0.8, proposal_pitch=False, proposal_pitch_threshold=255.0):
|
| 72 |
-
check_assets(f0_method, embedder_model, f0_onnx=f0_onnx, embedders_mode=embedders_mode)
|
| 73 |
-
log_data = {translations['pitch']: pitch, translations['filter_radius']: filter_radius, translations['index_strength']: index_rate, translations['rms_mix_rate']: rms_mix_rate, translations['protect']: protect, "Hop length": hop_length, translations['f0_method']: f0_method, translations['audio_path']: input_path, translations['output_path']: output_path.replace('wav', export_format), translations['model_path']: pth_path, translations['indexpath']: index_path, translations['autotune']: f0_autotune, translations['clear_audio']: clean_audio, translations['export_format']: export_format, translations['hubert_model']: embedder_model, translations['split_audio']: split_audio, translations['memory_efficient_training']: checkpointing, translations["f0_onnx_mode"]: f0_onnx, translations["embed_mode"]: embedders_mode, translations["proposal_pitch"]: proposal_pitch}
|
| 74 |
-
|
| 75 |
-
if clean_audio: log_data[translations['clean_strength']] = clean_strength
|
| 76 |
-
if resample_sr != 0: log_data[translations['sample_rate']] = resample_sr
|
| 77 |
-
if f0_autotune: log_data[translations['autotune_rate_info']] = f0_autotune_strength
|
| 78 |
-
if os.path.isfile(f0_file): log_data[translations['f0_file']] = f0_file
|
| 79 |
-
if proposal_pitch: log_data[translations["proposal_pitch_threshold"]] = proposal_pitch_threshold
|
| 80 |
-
if formant_shifting:
|
| 81 |
-
log_data[translations['formant_qfrency']] = formant_qfrency
|
| 82 |
-
log_data[translations['formant_timbre']] = formant_timbre
|
| 83 |
-
|
| 84 |
-
for key, value in log_data.items():
|
| 85 |
-
logger.debug(f"{key}: {value}")
|
| 86 |
-
|
| 87 |
-
if not pth_path or not os.path.exists(pth_path) or os.path.isdir(pth_path) or not pth_path.endswith((".pth", ".onnx")):
|
| 88 |
-
logger.warning(translations["provide_file"].format(filename=translations["model"]))
|
| 89 |
-
sys.exit(1)
|
| 90 |
-
|
| 91 |
-
cvt = VoiceConverter(pth_path, 0)
|
| 92 |
-
start_time = time.time()
|
| 93 |
-
|
| 94 |
-
pid_path = os.path.join("assets", "convert_pid.txt")
|
| 95 |
-
with open(pid_path, "w") as pid_file:
|
| 96 |
-
pid_file.write(str(os.getpid()))
|
| 97 |
-
|
| 98 |
-
if os.path.isdir(input_path):
|
| 99 |
-
logger.info(translations["convert_batch"])
|
| 100 |
-
audio_files = [f for f in os.listdir(input_path) if f.lower().endswith(("wav", "mp3", "flac", "ogg", "opus", "m4a", "mp4", "aac", "alac", "wma", "aiff", "webm", "ac3"))]
|
| 101 |
-
|
| 102 |
-
if not audio_files:
|
| 103 |
-
logger.warning(translations["not_found_audio"])
|
| 104 |
-
sys.exit(1)
|
| 105 |
-
|
| 106 |
-
logger.info(translations["found_audio"].format(audio_files=len(audio_files)))
|
| 107 |
-
|
| 108 |
-
for audio in audio_files:
|
| 109 |
-
audio_path = os.path.join(input_path, audio)
|
| 110 |
-
output_audio = os.path.join(input_path, os.path.splitext(audio)[0] + f"_output.{export_format}")
|
| 111 |
-
|
| 112 |
-
logger.info(f"{translations['convert_audio']} '{audio_path}'...")
|
| 113 |
-
if os.path.exists(output_audio): os.remove(output_audio)
|
| 114 |
-
|
| 115 |
-
cvt.convert_audio(pitch=pitch, filter_radius=filter_radius, index_rate=index_rate, rms_mix_rate=rms_mix_rate, protect=protect, hop_length=hop_length, f0_method=f0_method, audio_input_path=audio_path, audio_output_path=output_audio, index_path=index_path, f0_autotune=f0_autotune, f0_autotune_strength=f0_autotune_strength, clean_audio=clean_audio, clean_strength=clean_strength, export_format=export_format, embedder_model=embedder_model, resample_sr=resample_sr, checkpointing=checkpointing, f0_file=f0_file, f0_onnx=f0_onnx, embedders_mode=embedders_mode, formant_shifting=formant_shifting, formant_qfrency=formant_qfrency, formant_timbre=formant_timbre, split_audio=split_audio, proposal_pitch=proposal_pitch, proposal_pitch_threshold=proposal_pitch_threshold)
|
| 116 |
-
|
| 117 |
-
logger.info(translations["convert_batch_success"].format(elapsed_time=f"{(time.time() - start_time):.2f}", output_path=output_path.replace('wav', export_format)))
|
| 118 |
-
else:
|
| 119 |
-
if not os.path.exists(input_path):
|
| 120 |
-
logger.warning(translations["not_found_audio"])
|
| 121 |
-
sys.exit(1)
|
| 122 |
-
|
| 123 |
-
logger.info(f"{translations['convert_audio']} '{input_path}'...")
|
| 124 |
-
if os.path.exists(output_path): os.remove(output_path)
|
| 125 |
-
|
| 126 |
-
cvt.convert_audio(pitch=pitch, filter_radius=filter_radius, index_rate=index_rate, rms_mix_rate=rms_mix_rate, protect=protect, hop_length=hop_length, f0_method=f0_method, audio_input_path=input_path, audio_output_path=output_path, index_path=index_path, f0_autotune=f0_autotune, f0_autotune_strength=f0_autotune_strength, clean_audio=clean_audio, clean_strength=clean_strength, export_format=export_format, embedder_model=embedder_model, resample_sr=resample_sr, checkpointing=checkpointing, f0_file=f0_file, f0_onnx=f0_onnx, embedders_mode=embedders_mode, formant_shifting=formant_shifting, formant_qfrency=formant_qfrency, formant_timbre=formant_timbre, split_audio=split_audio, proposal_pitch=proposal_pitch, proposal_pitch_threshold=proposal_pitch_threshold)
|
| 127 |
-
logger.info(translations["convert_audio_success"].format(input_path=input_path, elapsed_time=f"{(time.time() - start_time):.2f}", output_path=output_path.replace('wav', export_format)))
|
| 128 |
-
|
| 129 |
-
if os.path.exists(pid_path): os.remove(pid_path)
|
| 130 |
-
|
| 131 |
-
class VoiceConverter:
|
| 132 |
-
def __init__(self, model_path, sid = 0):
|
| 133 |
-
self.config = config
|
| 134 |
-
self.device = config.device
|
| 135 |
-
self.hubert_model = None
|
| 136 |
-
self.tgt_sr = None
|
| 137 |
-
self.net_g = None
|
| 138 |
-
self.vc = None
|
| 139 |
-
self.cpt = None
|
| 140 |
-
self.version = None
|
| 141 |
-
self.n_spk = None
|
| 142 |
-
self.use_f0 = None
|
| 143 |
-
self.loaded_model = None
|
| 144 |
-
self.vocoder = "Default"
|
| 145 |
-
self.checkpointing = False
|
| 146 |
-
self.sample_rate = 16000
|
| 147 |
-
self.sid = sid
|
| 148 |
-
self.get_vc(model_path, sid)
|
| 149 |
-
|
| 150 |
-
def convert_audio(self, audio_input_path, audio_output_path, index_path, embedder_model, pitch, f0_method, index_rate, rms_mix_rate, protect, hop_length, f0_autotune, f0_autotune_strength, filter_radius, clean_audio, clean_strength, export_format, resample_sr = 0, checkpointing = False, f0_file = None, f0_onnx = False, embedders_mode = "fairseq", formant_shifting = False, formant_qfrency = 0.8, formant_timbre = 0.8, split_audio = False, proposal_pitch = False, proposal_pitch_threshold = 255.0):
|
| 151 |
-
try:
|
| 152 |
-
with tqdm(total=10, desc=translations["convert_audio"], ncols=100, unit="a", leave=not split_audio) as pbar:
|
| 153 |
-
audio = load_audio(audio_input_path, self.sample_rate, formant_shifting=formant_shifting, formant_qfrency=formant_qfrency, formant_timbre=formant_timbre)
|
| 154 |
-
self.checkpointing = checkpointing
|
| 155 |
-
|
| 156 |
-
audio_max = np.abs(audio).max() / 0.95
|
| 157 |
-
if audio_max > 1: audio /= audio_max
|
| 158 |
-
|
| 159 |
-
if not self.hubert_model:
|
| 160 |
-
models, embed_suffix = load_embedders_model(embedder_model, embedders_mode)
|
| 161 |
-
self.hubert_model = (models.to(self.device).half() if self.config.is_half else models.to(self.device).float()).eval() if embed_suffix in [".pt", ".safetensors"] else models
|
| 162 |
-
self.embed_suffix = embed_suffix
|
| 163 |
-
|
| 164 |
-
pbar.update(1)
|
| 165 |
-
if split_audio:
|
| 166 |
-
pbar.close()
|
| 167 |
-
chunks = cut(audio, self.sample_rate, db_thresh=-60, min_interval=500)
|
| 168 |
-
|
| 169 |
-
logger.info(f"{translations['split_total']}: {len(chunks)}")
|
| 170 |
-
pbar = tqdm(total=len(chunks) * 5 + 4, desc=translations["convert_audio"], ncols=100, unit="a", leave=True)
|
| 171 |
-
else: chunks = [(audio, 0, 0)]
|
| 172 |
-
|
| 173 |
-
pbar.update(1)
|
| 174 |
-
converted_chunks = [(
|
| 175 |
-
start,
|
| 176 |
-
end,
|
| 177 |
-
self.vc.pipeline(
|
| 178 |
-
logger=logger,
|
| 179 |
-
model=self.hubert_model,
|
| 180 |
-
net_g=self.net_g,
|
| 181 |
-
sid=self.sid,
|
| 182 |
-
audio=waveform,
|
| 183 |
-
f0_up_key=pitch,
|
| 184 |
-
f0_method=f0_method,
|
| 185 |
-
file_index=(index_path.strip().strip('"').strip("\n").strip('"').strip().replace("trained", "added")),
|
| 186 |
-
index_rate=index_rate,
|
| 187 |
-
pitch_guidance=self.use_f0,
|
| 188 |
-
filter_radius=filter_radius,
|
| 189 |
-
rms_mix_rate=rms_mix_rate,
|
| 190 |
-
version=self.version,
|
| 191 |
-
protect=protect,
|
| 192 |
-
hop_length=hop_length,
|
| 193 |
-
f0_autotune=f0_autotune,
|
| 194 |
-
f0_autotune_strength=f0_autotune_strength,
|
| 195 |
-
suffix=self.suffix,
|
| 196 |
-
embed_suffix=self.embed_suffix,
|
| 197 |
-
f0_file=f0_file,
|
| 198 |
-
f0_onnx=f0_onnx,
|
| 199 |
-
pbar=pbar,
|
| 200 |
-
proposal_pitch=proposal_pitch,
|
| 201 |
-
proposal_pitch_threshold=proposal_pitch_threshold,
|
| 202 |
-
energy_use=self.energy
|
| 203 |
-
)
|
| 204 |
-
) for waveform, start, end in chunks]
|
| 205 |
-
|
| 206 |
-
pbar.update(1)
|
| 207 |
-
|
| 208 |
-
del self.net_g, self.hubert_model
|
| 209 |
-
audio_output = restore(converted_chunks, total_len=len(audio), dtype=converted_chunks[0][2].dtype) if split_audio else converted_chunks[0][2]
|
| 210 |
-
|
| 211 |
-
if self.tgt_sr != resample_sr and resample_sr > 0:
|
| 212 |
-
audio_output = librosa.resample(audio_output, orig_sr=self.tgt_sr, target_sr=resample_sr, res_type="soxr_vhq")
|
| 213 |
-
self.tgt_sr = resample_sr
|
| 214 |
-
|
| 215 |
-
pbar.update(1)
|
| 216 |
-
if clean_audio:
|
| 217 |
-
from main.tools.noisereduce import reduce_noise
|
| 218 |
-
audio_output = reduce_noise(y=audio_output, sr=self.tgt_sr, prop_decrease=clean_strength, device=self.device)
|
| 219 |
-
|
| 220 |
-
if len(audio) / self.sample_rate > len(audio_output) / self.tgt_sr:
|
| 221 |
-
padding = np.zeros(int(np.round(len(audio) / self.sample_rate * self.tgt_sr) - len(audio_output)), dtype=audio_output.dtype)
|
| 222 |
-
audio_output = np.concatenate([audio_output, padding])
|
| 223 |
-
|
| 224 |
-
try:
|
| 225 |
-
sf.write(audio_output_path, audio_output, self.tgt_sr, format=export_format)
|
| 226 |
-
except:
|
| 227 |
-
sf.write(audio_output_path, librosa.resample(audio_output, orig_sr=self.tgt_sr, target_sr=48000, res_type="soxr_vhq"), 48000, format=export_format)
|
| 228 |
-
|
| 229 |
-
pbar.update(1)
|
| 230 |
-
except Exception as e:
|
| 231 |
-
logger.error(translations["error_convert"].format(e=e))
|
| 232 |
-
import traceback
|
| 233 |
-
logger.debug(traceback.format_exc())
|
| 234 |
-
|
| 235 |
-
def get_vc(self, weight_root, sid):
|
| 236 |
-
if sid == "" or sid == []:
|
| 237 |
-
self.cleanup()
|
| 238 |
-
clear_gpu_cache()
|
| 239 |
-
|
| 240 |
-
if not self.loaded_model or self.loaded_model != weight_root:
|
| 241 |
-
self.loaded_model = weight_root
|
| 242 |
-
self.load_model()
|
| 243 |
-
if self.cpt is not None: self.setup()
|
| 244 |
-
|
| 245 |
-
def cleanup(self):
|
| 246 |
-
if self.hubert_model is not None:
|
| 247 |
-
del self.net_g, self.n_spk, self.vc, self.hubert_model, self.tgt_sr
|
| 248 |
-
self.hubert_model = self.net_g = self.n_spk = self.vc = self.tgt_sr = None
|
| 249 |
-
clear_gpu_cache()
|
| 250 |
-
|
| 251 |
-
del self.net_g, self.cpt
|
| 252 |
-
clear_gpu_cache()
|
| 253 |
-
self.cpt = None
|
| 254 |
-
|
| 255 |
-
def load_model(self):
|
| 256 |
-
if os.path.isfile(self.loaded_model):
|
| 257 |
-
if self.loaded_model.endswith(".pth"): self.cpt = torch.load(self.loaded_model, map_location="cpu", weights_only=True)
|
| 258 |
-
else:
|
| 259 |
-
sess_options = onnxruntime.SessionOptions()
|
| 260 |
-
sess_options.log_severity_level = 3
|
| 261 |
-
self.cpt = onnxruntime.InferenceSession(self.loaded_model, sess_options=sess_options, providers=get_providers())
|
| 262 |
-
else: self.cpt = None
|
| 263 |
-
|
| 264 |
-
def setup(self):
|
| 265 |
-
if self.cpt is not None:
|
| 266 |
-
if self.loaded_model.endswith(".pth"):
|
| 267 |
-
self.tgt_sr = self.cpt["config"][-1]
|
| 268 |
-
self.cpt["config"][-3] = self.cpt["weight"]["emb_g.weight"].shape[0]
|
| 269 |
-
|
| 270 |
-
self.use_f0 = self.cpt.get("f0", 1)
|
| 271 |
-
self.version = self.cpt.get("version", "v1")
|
| 272 |
-
self.vocoder = self.cpt.get("vocoder", "Default")
|
| 273 |
-
self.energy = self.cpt.get("energy", False)
|
| 274 |
-
|
| 275 |
-
if self.vocoder != "Default": self.config.is_half = False
|
| 276 |
-
self.net_g = Synthesizer(*self.cpt["config"], use_f0=self.use_f0, text_enc_hidden_dim=768 if self.version == "v2" else 256, vocoder=self.vocoder, checkpointing=self.checkpointing, energy=self.energy)
|
| 277 |
-
del self.net_g.enc_q
|
| 278 |
-
|
| 279 |
-
self.net_g.load_state_dict(self.cpt["weight"], strict=False)
|
| 280 |
-
self.net_g.eval().to(self.device)
|
| 281 |
-
self.net_g = (self.net_g.half() if self.config.is_half else self.net_g.float())
|
| 282 |
-
self.n_spk = self.cpt["config"][-3]
|
| 283 |
-
self.suffix = ".pth"
|
| 284 |
-
else:
|
| 285 |
-
metadata_dict = None
|
| 286 |
-
for prop in onnx.load(self.loaded_model).metadata_props:
|
| 287 |
-
if prop.key == "model_info":
|
| 288 |
-
metadata_dict = json.loads(prop.value)
|
| 289 |
-
break
|
| 290 |
-
|
| 291 |
-
self.net_g = self.cpt
|
| 292 |
-
self.tgt_sr = metadata_dict.get("sr", 32000)
|
| 293 |
-
self.use_f0 = metadata_dict.get("f0", 1)
|
| 294 |
-
self.version = metadata_dict.get("version", "v1")
|
| 295 |
-
self.energy = metadata_dict.get("energy", False)
|
| 296 |
-
self.suffix = ".onnx"
|
| 297 |
-
|
| 298 |
-
self.vc = Pipeline(self.tgt_sr, self.config)
|
| 299 |
-
|
| 300 |
-
if __name__ == "__main__": main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/inference/conversion/pipeline.py
DELETED
|
@@ -1,251 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import torch
|
| 4 |
-
import faiss
|
| 5 |
-
|
| 6 |
-
import numpy as np
|
| 7 |
-
import torch.nn.functional as F
|
| 8 |
-
|
| 9 |
-
from scipy import signal
|
| 10 |
-
|
| 11 |
-
sys.path.append(os.getcwd())
|
| 12 |
-
|
| 13 |
-
from main.app.variables import translations
|
| 14 |
-
from main.library.utils import extract_features
|
| 15 |
-
from main.library.predictors.Generator import Generator
|
| 16 |
-
from main.inference.extracting.rms import RMSEnergyExtractor
|
| 17 |
-
from main.inference.conversion.utils import change_rms, clear_gpu_cache, get_onnx_argument
|
| 18 |
-
|
| 19 |
-
bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
|
| 20 |
-
|
| 21 |
-
class Pipeline:
|
| 22 |
-
def __init__(self, tgt_sr, config):
|
| 23 |
-
self.x_pad = config.x_pad
|
| 24 |
-
self.x_query = config.x_query
|
| 25 |
-
self.x_center = config.x_center
|
| 26 |
-
self.x_max = config.x_max
|
| 27 |
-
self.sample_rate = 16000
|
| 28 |
-
self.window = 160
|
| 29 |
-
self.t_pad = self.sample_rate * self.x_pad
|
| 30 |
-
self.t_pad_tgt = tgt_sr * self.x_pad
|
| 31 |
-
self.t_pad2 = self.t_pad * 2
|
| 32 |
-
self.t_query = self.sample_rate * self.x_query
|
| 33 |
-
self.t_center = self.sample_rate * self.x_center
|
| 34 |
-
self.t_max = self.sample_rate * self.x_max
|
| 35 |
-
self.f0_min = 50
|
| 36 |
-
self.f0_max = 1100
|
| 37 |
-
self.device = config.device
|
| 38 |
-
self.is_half = config.is_half
|
| 39 |
-
|
| 40 |
-
def voice_conversion(self, model, net_g, sid, audio0, pitch, pitchf, index, big_npy, index_rate, version, protect, energy):
|
| 41 |
-
pitch_guidance = pitch != None and pitchf != None
|
| 42 |
-
energy_use = energy != None
|
| 43 |
-
|
| 44 |
-
feats = torch.from_numpy(audio0)
|
| 45 |
-
feats = feats.half() if self.is_half else feats.float()
|
| 46 |
-
|
| 47 |
-
feats = feats.mean(-1) if feats.dim() == 2 else feats
|
| 48 |
-
assert feats.dim() == 1, feats.dim()
|
| 49 |
-
feats = feats.view(1, -1)
|
| 50 |
-
|
| 51 |
-
with torch.no_grad():
|
| 52 |
-
if self.embed_suffix == ".pt":
|
| 53 |
-
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
|
| 54 |
-
logits = model.extract_features(**{"source": feats.to(self.device), "padding_mask": padding_mask, "output_layer": 9 if version == "v1" else 12})
|
| 55 |
-
feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
|
| 56 |
-
elif self.embed_suffix == ".onnx": feats = extract_features(model, feats.to(self.device), version).to(self.device)
|
| 57 |
-
elif self.embed_suffix == ".safetensors":
|
| 58 |
-
logits = model(feats.to(self.device))["last_hidden_state"]
|
| 59 |
-
feats = model.final_proj(logits[0]).unsqueeze(0) if version == "v1" else logits
|
| 60 |
-
else: raise ValueError(translations["option_not_valid"])
|
| 61 |
-
|
| 62 |
-
feats0 = feats.clone() if protect < 0.5 and pitch_guidance else None
|
| 63 |
-
|
| 64 |
-
if (not isinstance(index, type(None)) and not isinstance(big_npy, type(None)) and index_rate != 0):
|
| 65 |
-
npy = feats[0].cpu().numpy()
|
| 66 |
-
if self.is_half: npy = npy.astype(np.float32)
|
| 67 |
-
|
| 68 |
-
score, ix = index.search(npy, k=8)
|
| 69 |
-
weight = np.square(1 / score)
|
| 70 |
-
|
| 71 |
-
npy = np.sum(big_npy[ix] * np.expand_dims(weight / weight.sum(axis=1, keepdims=True), axis=2), axis=1)
|
| 72 |
-
if self.is_half: npy = npy.astype(np.float16)
|
| 73 |
-
|
| 74 |
-
feats = (torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate + (1 - index_rate) * feats)
|
| 75 |
-
|
| 76 |
-
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
| 77 |
-
p_len = min(audio0.shape[0] // self.window, feats.shape[1])
|
| 78 |
-
|
| 79 |
-
if pitch_guidance: pitch, pitchf = pitch[:, :p_len], pitchf[:, :p_len]
|
| 80 |
-
if energy_use: energy = energy[:, :p_len]
|
| 81 |
-
|
| 82 |
-
if feats0 is not None:
|
| 83 |
-
pitchff = pitchf.clone()
|
| 84 |
-
pitchff[pitchf > 0] = 1
|
| 85 |
-
pitchff[pitchf < 1] = protect
|
| 86 |
-
pitchff = pitchff.unsqueeze(-1)
|
| 87 |
-
|
| 88 |
-
feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
|
| 89 |
-
feats = (feats * pitchff + feats0 * (1 - pitchff)).to(feats0.dtype)
|
| 90 |
-
|
| 91 |
-
p_len = torch.tensor([p_len], device=self.device).long()
|
| 92 |
-
feats = feats.half() if self.is_half else feats.float()
|
| 93 |
-
|
| 94 |
-
if not pitch_guidance: pitch, pitchf = None, None
|
| 95 |
-
else: pitchf = pitchf.half() if self.is_half else pitchf.float()
|
| 96 |
-
if not energy_use: energy = None
|
| 97 |
-
else: energy = energy.half() if self.is_half else energy.float()
|
| 98 |
-
|
| 99 |
-
audio1 = (
|
| 100 |
-
(
|
| 101 |
-
net_g.infer(
|
| 102 |
-
feats,
|
| 103 |
-
p_len,
|
| 104 |
-
pitch,
|
| 105 |
-
pitchf,
|
| 106 |
-
sid,
|
| 107 |
-
energy
|
| 108 |
-
)[0][0, 0]
|
| 109 |
-
).data.cpu().float().numpy()
|
| 110 |
-
) if self.suffix == ".pth" else (
|
| 111 |
-
net_g.run(
|
| 112 |
-
[net_g.get_outputs()[0].name], (
|
| 113 |
-
get_onnx_argument(
|
| 114 |
-
net_g,
|
| 115 |
-
feats,
|
| 116 |
-
p_len,
|
| 117 |
-
sid,
|
| 118 |
-
pitch,
|
| 119 |
-
pitchf,
|
| 120 |
-
energy,
|
| 121 |
-
pitch_guidance,
|
| 122 |
-
energy_use
|
| 123 |
-
)
|
| 124 |
-
)
|
| 125 |
-
)[0][0, 0]
|
| 126 |
-
)
|
| 127 |
-
|
| 128 |
-
if self.embed_suffix == ".pt": del padding_mask
|
| 129 |
-
del feats, feats0, p_len
|
| 130 |
-
|
| 131 |
-
clear_gpu_cache()
|
| 132 |
-
return audio1
|
| 133 |
-
|
| 134 |
-
def pipeline(self, logger, model, net_g, sid, audio, f0_up_key, f0_method, file_index, index_rate, pitch_guidance, filter_radius, rms_mix_rate, version, protect, hop_length, f0_autotune, f0_autotune_strength, suffix, embed_suffix, f0_file=None, f0_onnx=False, pbar=None, proposal_pitch=False, proposal_pitch_threshold=255.0, energy_use=False):
|
| 135 |
-
self.suffix = suffix
|
| 136 |
-
self.embed_suffix = embed_suffix
|
| 137 |
-
|
| 138 |
-
if file_index != "" and os.path.exists(file_index) and index_rate != 0:
|
| 139 |
-
try:
|
| 140 |
-
index = faiss.read_index(file_index)
|
| 141 |
-
big_npy = index.reconstruct_n(0, index.ntotal)
|
| 142 |
-
except Exception as e:
|
| 143 |
-
logger.error(translations["read_faiss_index_error"].format(e=e))
|
| 144 |
-
index = big_npy = None
|
| 145 |
-
else: index = big_npy = None
|
| 146 |
-
|
| 147 |
-
if pbar: pbar.update(1)
|
| 148 |
-
opt_ts, audio_opt = [], []
|
| 149 |
-
audio = signal.filtfilt(bh, ah, audio)
|
| 150 |
-
audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
|
| 151 |
-
|
| 152 |
-
if audio_pad.shape[0] > self.t_max:
|
| 153 |
-
audio_sum = np.zeros_like(audio)
|
| 154 |
-
|
| 155 |
-
for i in range(self.window):
|
| 156 |
-
audio_sum += audio_pad[i : i - self.window]
|
| 157 |
-
|
| 158 |
-
for t in range(self.t_center, audio.shape[0], self.t_center):
|
| 159 |
-
opt_ts.append(t - self.t_query + np.where(np.abs(audio_sum[t - self.t_query : t + self.t_query]) == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min())[0][0])
|
| 160 |
-
|
| 161 |
-
s = 0
|
| 162 |
-
t, inp_f0 = None, None
|
| 163 |
-
audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect")
|
| 164 |
-
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
|
| 165 |
-
p_len = audio_pad.shape[0] // self.window
|
| 166 |
-
|
| 167 |
-
if hasattr(f0_file, "name"):
|
| 168 |
-
try:
|
| 169 |
-
with open(f0_file.name, "r") as f:
|
| 170 |
-
raw_lines = f.read()
|
| 171 |
-
|
| 172 |
-
if len(raw_lines) > 0:
|
| 173 |
-
inp_f0 = []
|
| 174 |
-
|
| 175 |
-
for line in raw_lines.strip("\n").split("\n"):
|
| 176 |
-
inp_f0.append([float(i) for i in line.split(",")])
|
| 177 |
-
|
| 178 |
-
inp_f0 = np.array(inp_f0, dtype=np.float32)
|
| 179 |
-
except:
|
| 180 |
-
logger.error(translations["error_readfile"])
|
| 181 |
-
inp_f0 = None
|
| 182 |
-
|
| 183 |
-
if pbar: pbar.update(1)
|
| 184 |
-
if pitch_guidance:
|
| 185 |
-
if not hasattr(self, "f0_generator"): self.f0_generator = Generator(self.sample_rate, hop_length, self.f0_min, self.f0_max, self.is_half, self.device, f0_onnx, f0_onnx)
|
| 186 |
-
pitch, pitchf = self.f0_generator.calculator(self.x_pad, f0_method, audio_pad, f0_up_key, p_len, filter_radius, f0_autotune, f0_autotune_strength, manual_f0=inp_f0, proposal_pitch=proposal_pitch, proposal_pitch_threshold=proposal_pitch_threshold)
|
| 187 |
-
|
| 188 |
-
if self.device == "mps": pitchf = pitchf.astype(np.float32)
|
| 189 |
-
pitch, pitchf = torch.tensor(pitch[:p_len], device=self.device).unsqueeze(0).long(), torch.tensor(pitchf[:p_len], device=self.device).unsqueeze(0).float()
|
| 190 |
-
|
| 191 |
-
if pbar: pbar.update(1)
|
| 192 |
-
|
| 193 |
-
if energy_use:
|
| 194 |
-
if not hasattr(self, "rms_extract"): self.rms_extract = RMSEnergyExtractor(frame_length=2048, hop_length=self.window, center=True, pad_mode = "reflect").to(self.device).eval()
|
| 195 |
-
energy = self.rms_extract(torch.from_numpy(audio_pad).to(self.device).unsqueeze(0)).cpu().numpy()
|
| 196 |
-
|
| 197 |
-
if self.device == "mps": energy = energy.astype(np.float32)
|
| 198 |
-
energy = torch.tensor(energy[:p_len], device=self.device).unsqueeze(0).float()
|
| 199 |
-
|
| 200 |
-
if pbar: pbar.update(1)
|
| 201 |
-
|
| 202 |
-
for t in opt_ts:
|
| 203 |
-
t = t // self.window * self.window
|
| 204 |
-
audio_opt.append(
|
| 205 |
-
self.voice_conversion(
|
| 206 |
-
model,
|
| 207 |
-
net_g,
|
| 208 |
-
sid,
|
| 209 |
-
audio_pad[s : t + self.t_pad2 + self.window],
|
| 210 |
-
pitch[:, s // self.window : (t + self.t_pad2) // self.window] if pitch_guidance else None,
|
| 211 |
-
pitchf[:, s // self.window : (t + self.t_pad2) // self.window] if pitch_guidance else None,
|
| 212 |
-
index,
|
| 213 |
-
big_npy,
|
| 214 |
-
index_rate,
|
| 215 |
-
version,
|
| 216 |
-
protect,
|
| 217 |
-
energy[:, s // self.window : (t + self.t_pad2) // self.window] if energy_use else None
|
| 218 |
-
)[self.t_pad_tgt : -self.t_pad_tgt]
|
| 219 |
-
)
|
| 220 |
-
s = t
|
| 221 |
-
|
| 222 |
-
audio_opt.append(
|
| 223 |
-
self.voice_conversion(
|
| 224 |
-
model,
|
| 225 |
-
net_g,
|
| 226 |
-
sid,
|
| 227 |
-
audio_pad[t:],
|
| 228 |
-
(pitch[:, t // self.window :] if t is not None else pitch) if pitch_guidance else None,
|
| 229 |
-
(pitchf[:, t // self.window :] if t is not None else pitchf) if pitch_guidance else None,
|
| 230 |
-
index,
|
| 231 |
-
big_npy,
|
| 232 |
-
index_rate,
|
| 233 |
-
version,
|
| 234 |
-
protect,
|
| 235 |
-
(energy[:, t // self.window :] if t is not None else energy) if energy_use else None
|
| 236 |
-
)[self.t_pad_tgt : -self.t_pad_tgt]
|
| 237 |
-
)
|
| 238 |
-
|
| 239 |
-
audio_opt = np.concatenate(audio_opt)
|
| 240 |
-
if pbar: pbar.update(1)
|
| 241 |
-
|
| 242 |
-
if rms_mix_rate != 1: audio_opt = change_rms(audio, self.sample_rate, audio_opt, self.sample_rate, rms_mix_rate)
|
| 243 |
-
|
| 244 |
-
audio_max = np.abs(audio_opt).max() / 0.99
|
| 245 |
-
if audio_max > 1: audio_opt /= audio_max
|
| 246 |
-
|
| 247 |
-
if pitch_guidance: del pitch, pitchf
|
| 248 |
-
del sid
|
| 249 |
-
|
| 250 |
-
clear_gpu_cache()
|
| 251 |
-
return audio_opt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/inference/conversion/utils.py
DELETED
|
@@ -1,66 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import gc
|
| 3 |
-
import sys
|
| 4 |
-
import torch
|
| 5 |
-
import librosa
|
| 6 |
-
|
| 7 |
-
import numpy as np
|
| 8 |
-
import torch.nn.functional as F
|
| 9 |
-
|
| 10 |
-
sys.path.append(os.getcwd())
|
| 11 |
-
|
| 12 |
-
from main.library import opencl
|
| 13 |
-
|
| 14 |
-
def autotune_f0(note_dict, f0, f0_autotune_strength):
|
| 15 |
-
autotuned_f0 = np.zeros_like(f0)
|
| 16 |
-
|
| 17 |
-
for i, freq in enumerate(f0):
|
| 18 |
-
autotuned_f0[i] = freq + (min(note_dict, key=lambda x: abs(x - freq)) - freq) * f0_autotune_strength
|
| 19 |
-
|
| 20 |
-
return autotuned_f0
|
| 21 |
-
|
| 22 |
-
def change_rms(source_audio, source_rate, target_audio, target_rate, rate):
|
| 23 |
-
rms2 = F.interpolate(torch.from_numpy(librosa.feature.rms(y=target_audio, frame_length=target_rate // 2 * 2, hop_length=target_rate // 2)).float().unsqueeze(0), size=target_audio.shape[0], mode="linear").squeeze()
|
| 24 |
-
return (target_audio * (torch.pow(F.interpolate(torch.from_numpy(librosa.feature.rms(y=source_audio, frame_length=source_rate // 2 * 2, hop_length=source_rate // 2)).float().unsqueeze(0), size=target_audio.shape[0], mode="linear").squeeze(), 1 - rate) * torch.pow(torch.maximum(rms2, torch.zeros_like(rms2) + 1e-6), rate - 1)).numpy())
|
| 25 |
-
|
| 26 |
-
def clear_gpu_cache():
|
| 27 |
-
gc.collect()
|
| 28 |
-
|
| 29 |
-
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
| 30 |
-
elif torch.backends.mps.is_available(): torch.mps.empty_cache()
|
| 31 |
-
elif opencl.is_available(): opencl.pytorch_ocl.empty_cache()
|
| 32 |
-
|
| 33 |
-
def extract_median_f0(f0):
|
| 34 |
-
f0 = np.where(f0 == 0, np.nan, f0)
|
| 35 |
-
return float(np.median(np.interp(np.arange(len(f0)), np.where(~np.isnan(f0))[0], f0[~np.isnan(f0)])))
|
| 36 |
-
|
| 37 |
-
def proposal_f0_up_key(f0, target_f0 = 155.0, limit = 12):
|
| 38 |
-
return max(-limit, min(limit, int(np.round(12 * np.log2(target_f0 / extract_median_f0(f0))))))
|
| 39 |
-
|
| 40 |
-
def get_onnx_argument(net_g, feats, p_len, sid, pitch, pitchf, energy, pitch_guidance, energy_use):
|
| 41 |
-
inputs = {
|
| 42 |
-
net_g.get_inputs()[0].name: feats.cpu().numpy().astype(np.float32),
|
| 43 |
-
net_g.get_inputs()[1].name: p_len.cpu().numpy(),
|
| 44 |
-
net_g.get_inputs()[2].name: np.array([sid.cpu().item()], dtype=np.int64),
|
| 45 |
-
net_g.get_inputs()[3].name: np.random.randn(1, 192, p_len).astype(np.float32)
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
-
if energy_use:
|
| 49 |
-
if pitch_guidance:
|
| 50 |
-
inputs.update({
|
| 51 |
-
net_g.get_inputs()[4].name: pitch.cpu().numpy().astype(np.int64),
|
| 52 |
-
net_g.get_inputs()[5].name: pitchf.cpu().numpy().astype(np.float32),
|
| 53 |
-
net_g.get_inputs()[6].name: energy.cpu().numpy().astype(np.float32)
|
| 54 |
-
})
|
| 55 |
-
else:
|
| 56 |
-
inputs.update({
|
| 57 |
-
net_g.get_inputs()[4].name: energy.cpu().numpy().astype(np.float32)
|
| 58 |
-
})
|
| 59 |
-
else:
|
| 60 |
-
if pitch_guidance:
|
| 61 |
-
inputs.update({
|
| 62 |
-
net_g.get_inputs()[4].name: pitch.cpu().numpy().astype(np.int64),
|
| 63 |
-
net_g.get_inputs()[5].name: pitchf.cpu().numpy().astype(np.float32)
|
| 64 |
-
})
|
| 65 |
-
|
| 66 |
-
return inputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/inference/create_dataset.py
DELETED
|
@@ -1,212 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import time
|
| 4 |
-
import yt_dlp
|
| 5 |
-
import shutil
|
| 6 |
-
import librosa
|
| 7 |
-
import argparse
|
| 8 |
-
import warnings
|
| 9 |
-
|
| 10 |
-
from soundfile import read, write
|
| 11 |
-
from distutils.util import strtobool
|
| 12 |
-
|
| 13 |
-
sys.path.append(os.getcwd())
|
| 14 |
-
|
| 15 |
-
from main.app.variables import config, logger, translations
|
| 16 |
-
from main.library.uvr5_lib.separator import Separator
|
| 17 |
-
|
| 18 |
-
dataset_temp = "dataset_temp"
|
| 19 |
-
|
| 20 |
-
def parse_arguments():
|
| 21 |
-
parser = argparse.ArgumentParser()
|
| 22 |
-
parser.add_argument("--create_dataset", action='store_true')
|
| 23 |
-
parser.add_argument("--input_audio", type=str, required=True)
|
| 24 |
-
parser.add_argument("--output_dataset", type=str, default="./dataset")
|
| 25 |
-
parser.add_argument("--sample_rate", type=int, default=44100)
|
| 26 |
-
parser.add_argument("--clean_dataset", type=lambda x: bool(strtobool(x)), default=False)
|
| 27 |
-
parser.add_argument("--clean_strength", type=float, default=0.7)
|
| 28 |
-
parser.add_argument("--separator_reverb", type=lambda x: bool(strtobool(x)), default=False)
|
| 29 |
-
parser.add_argument("--kim_vocal_version", type=int, default=2)
|
| 30 |
-
parser.add_argument("--overlap", type=float, default=0.25)
|
| 31 |
-
parser.add_argument("--segments_size", type=int, default=256)
|
| 32 |
-
parser.add_argument("--mdx_hop_length", type=int, default=1024)
|
| 33 |
-
parser.add_argument("--mdx_batch_size", type=int, default=1)
|
| 34 |
-
parser.add_argument("--denoise_mdx", type=lambda x: bool(strtobool(x)), default=False)
|
| 35 |
-
parser.add_argument("--skip", type=lambda x: bool(strtobool(x)), default=False)
|
| 36 |
-
parser.add_argument("--skip_start_audios", type=str, default="0")
|
| 37 |
-
parser.add_argument("--skip_end_audios", type=str, default="0")
|
| 38 |
-
|
| 39 |
-
return parser.parse_args()
|
| 40 |
-
|
| 41 |
-
def main():
|
| 42 |
-
pid_path = os.path.join("assets", "create_dataset_pid.txt")
|
| 43 |
-
with open(pid_path, "w") as pid_file:
|
| 44 |
-
pid_file.write(str(os.getpid()))
|
| 45 |
-
|
| 46 |
-
args = parse_arguments()
|
| 47 |
-
input_audio, output_dataset, sample_rate, clean_dataset, clean_strength, separator_reverb, kim_vocal_version, overlap, segments_size, hop_length, batch_size, denoise_mdx, skip, skip_start_audios, skip_end_audios = args.input_audio, args.output_dataset, args.sample_rate, args.clean_dataset, args.clean_strength, args.separator_reverb, args.kim_vocal_version, args.overlap, args.segments_size, args.mdx_hop_length, args.mdx_batch_size, args.denoise_mdx, args.skip, args.skip_start_audios, args.skip_end_audios
|
| 48 |
-
log_data = {translations['audio_path']: input_audio, translations['output_path']: output_dataset, translations['sr']: sample_rate, translations['clear_dataset']: clean_dataset, translations['dereveb_audio']: separator_reverb, translations['segments_size']: segments_size, translations['overlap']: overlap, "Hop length": hop_length, translations['batch_size']: batch_size, translations['denoise_mdx']: denoise_mdx, translations['skip']: skip}
|
| 49 |
-
|
| 50 |
-
if clean_dataset: log_data[translations['clean_strength']] = clean_strength
|
| 51 |
-
if skip:
|
| 52 |
-
log_data[translations['skip_start']] = skip_start_audios
|
| 53 |
-
log_data[translations['skip_end']] = skip_end_audios
|
| 54 |
-
|
| 55 |
-
for key, value in log_data.items():
|
| 56 |
-
logger.debug(f"{key}: {value}")
|
| 57 |
-
|
| 58 |
-
if kim_vocal_version not in [1, 2]: raise ValueError(translations["version_not_valid"])
|
| 59 |
-
start_time = time.time()
|
| 60 |
-
|
| 61 |
-
try:
|
| 62 |
-
paths = []
|
| 63 |
-
|
| 64 |
-
if not os.path.exists(dataset_temp): os.makedirs(dataset_temp, exist_ok=True)
|
| 65 |
-
urls = input_audio.replace(", ", ",").split(",")
|
| 66 |
-
|
| 67 |
-
for url in urls:
|
| 68 |
-
path = downloader(url, urls.index(url))
|
| 69 |
-
paths.append(path)
|
| 70 |
-
|
| 71 |
-
if skip:
|
| 72 |
-
skip_start_audios, skip_end_audios = skip_start_audios.replace(", ", ",").split(","), skip_end_audios.replace(", ", ",").split(",")
|
| 73 |
-
|
| 74 |
-
if len(skip_start_audios) < len(paths) or len(skip_end_audios) < len(paths):
|
| 75 |
-
logger.warning(translations["skip<audio"])
|
| 76 |
-
sys.exit(1)
|
| 77 |
-
elif len(skip_start_audios) > len(paths) or len(skip_end_audios) > len(paths):
|
| 78 |
-
logger.warning(translations["skip>audio"])
|
| 79 |
-
sys.exit(1)
|
| 80 |
-
else:
|
| 81 |
-
for audio, skip_start_audio, skip_end_audio in zip(paths, skip_start_audios, skip_end_audios):
|
| 82 |
-
skip_start(audio, skip_start_audio)
|
| 83 |
-
skip_end(audio, skip_end_audio)
|
| 84 |
-
|
| 85 |
-
separator_paths = []
|
| 86 |
-
|
| 87 |
-
for audio in paths:
|
| 88 |
-
vocals = separator_music_main(audio, dataset_temp, segments_size, overlap, denoise_mdx, kim_vocal_version, hop_length, batch_size, sample_rate)
|
| 89 |
-
if separator_reverb: vocals = separator_reverb_audio(vocals, dataset_temp, segments_size, overlap, denoise_mdx, hop_length, batch_size, sample_rate)
|
| 90 |
-
separator_paths.append(vocals)
|
| 91 |
-
|
| 92 |
-
paths = separator_paths
|
| 93 |
-
|
| 94 |
-
for audio_path in paths:
|
| 95 |
-
data, sample_rate = read(audio_path)
|
| 96 |
-
data = librosa.to_mono(data.T)
|
| 97 |
-
|
| 98 |
-
if clean_dataset:
|
| 99 |
-
from main.tools.noisereduce import reduce_noise
|
| 100 |
-
data = reduce_noise(y=data, sr=sample_rate, prop_decrease=clean_strength, device=config.device)
|
| 101 |
-
|
| 102 |
-
write(audio_path, data, sample_rate)
|
| 103 |
-
except Exception as e:
|
| 104 |
-
logger.error(f"{translations['create_dataset_error']}: {e}")
|
| 105 |
-
import traceback
|
| 106 |
-
logger.error(traceback.format_exc())
|
| 107 |
-
finally:
|
| 108 |
-
for audio in paths:
|
| 109 |
-
shutil.move(audio, output_dataset)
|
| 110 |
-
|
| 111 |
-
if os.path.exists(dataset_temp): shutil.rmtree(dataset_temp, ignore_errors=True)
|
| 112 |
-
|
| 113 |
-
elapsed_time = time.time() - start_time
|
| 114 |
-
if os.path.exists(pid_path): os.remove(pid_path)
|
| 115 |
-
logger.info(translations["create_dataset_success"].format(elapsed_time=f"{elapsed_time:.2f}"))
|
| 116 |
-
|
| 117 |
-
def downloader(url, name):
|
| 118 |
-
with warnings.catch_warnings():
|
| 119 |
-
warnings.simplefilter("ignore")
|
| 120 |
-
|
| 121 |
-
ydl_opts = {"format": "bestaudio/best", "outtmpl": os.path.join(dataset_temp, f"{name}"), "postprocessors": [{"key": "FFmpegExtractAudio", "preferredcodec": "wav", "preferredquality": "192"}], "no_warnings": True, "noplaylist": True, "noplaylist": True, "verbose": False}
|
| 122 |
-
logger.info(f"{translations['starting_download']}: {url}...")
|
| 123 |
-
|
| 124 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 125 |
-
ydl.extract_info(url)
|
| 126 |
-
logger.info(f"{translations['download_success']}: {url}")
|
| 127 |
-
|
| 128 |
-
return os.path.join(dataset_temp, f"{name}" + ".wav")
|
| 129 |
-
|
| 130 |
-
def skip_start(input_file, seconds):
|
| 131 |
-
data, sr = read(input_file)
|
| 132 |
-
total_duration = len(data) / sr
|
| 133 |
-
|
| 134 |
-
if seconds <= 0: logger.warning(translations["=<0"])
|
| 135 |
-
elif seconds >= total_duration: logger.warning(translations["skip_warning"].format(seconds=seconds, total_duration=f"{total_duration:.2f}"))
|
| 136 |
-
else:
|
| 137 |
-
logger.info(f"{translations['skip_start']}: {input_file}...")
|
| 138 |
-
write(input_file, data[int(seconds * sr):], sr)
|
| 139 |
-
|
| 140 |
-
logger.info(translations["skip_start_audio"].format(input_file=input_file))
|
| 141 |
-
|
| 142 |
-
def skip_end(input_file, seconds):
|
| 143 |
-
data, sr = read(input_file)
|
| 144 |
-
total_duration = len(data) / sr
|
| 145 |
-
|
| 146 |
-
if seconds <= 0: logger.warning(translations["=<0"])
|
| 147 |
-
elif seconds > total_duration: logger.warning(translations["skip_warning"].format(seconds=seconds, total_duration=f"{total_duration:.2f}"))
|
| 148 |
-
else:
|
| 149 |
-
logger.info(f"{translations['skip_end']}: {input_file}...")
|
| 150 |
-
write(input_file, data[:-int(seconds * sr)], sr)
|
| 151 |
-
|
| 152 |
-
logger.info(translations["skip_end_audio"].format(input_file=input_file))
|
| 153 |
-
|
| 154 |
-
def separator_music_main(input, output, segments_size, overlap, denoise, version, hop_length, batch_size, sample_rate):
|
| 155 |
-
if not os.path.exists(input):
|
| 156 |
-
logger.warning(translations["input_not_valid"])
|
| 157 |
-
return None
|
| 158 |
-
|
| 159 |
-
if not os.path.exists(output):
|
| 160 |
-
logger.warning(translations["output_not_valid"])
|
| 161 |
-
return None
|
| 162 |
-
|
| 163 |
-
model = f"Kim_Vocal_{version}.onnx"
|
| 164 |
-
output_separator = separator_main(audio_file=input, model_filename=model, output_format="wav", output_dir=output, mdx_segment_size=segments_size, mdx_overlap=overlap, mdx_batch_size=batch_size, mdx_hop_length=hop_length, mdx_enable_denoise=denoise, sample_rate=sample_rate)
|
| 165 |
-
|
| 166 |
-
for f in output_separator:
|
| 167 |
-
path = os.path.join(output, f)
|
| 168 |
-
if not os.path.exists(path): logger.error(translations["not_found"].format(name=path))
|
| 169 |
-
|
| 170 |
-
if '_(Instrumental)_' in f: os.rename(path, os.path.splitext(path)[0].replace("(", "").replace(")", "") + ".wav")
|
| 171 |
-
elif '_(Vocals)_' in f:
|
| 172 |
-
rename_file = os.path.splitext(path)[0].replace("(", "").replace(")", "") + ".wav"
|
| 173 |
-
os.rename(path, rename_file)
|
| 174 |
-
|
| 175 |
-
return rename_file
|
| 176 |
-
|
| 177 |
-
def separator_reverb_audio(input, output, segments_size, overlap, denoise, hop_length, batch_size, sample_rate):
|
| 178 |
-
if not os.path.exists(input):
|
| 179 |
-
logger.warning(translations["input_not_valid"])
|
| 180 |
-
return None
|
| 181 |
-
|
| 182 |
-
if not os.path.exists(output):
|
| 183 |
-
logger.warning(translations["output_not_valid"])
|
| 184 |
-
return None
|
| 185 |
-
|
| 186 |
-
logger.info(f"{translations['dereverb']}: {input}...")
|
| 187 |
-
output_dereverb = separator_main(audio_file=input, model_filename="Reverb_HQ_By_FoxJoy.onnx", output_format="wav", output_dir=output, mdx_segment_size=segments_size, mdx_overlap=overlap, mdx_batch_size=hop_length, mdx_hop_length=batch_size, mdx_enable_denoise=denoise, sample_rate=sample_rate)
|
| 188 |
-
|
| 189 |
-
for f in output_dereverb:
|
| 190 |
-
path = os.path.join(output, f)
|
| 191 |
-
if not os.path.exists(path): logger.error(translations["not_found"].format(name=path))
|
| 192 |
-
|
| 193 |
-
if '_(Reverb)_' in f: os.rename(path, os.path.splitext(path)[0].replace("(", "").replace(")", "") + ".wav")
|
| 194 |
-
elif '_(No Reverb)_' in f:
|
| 195 |
-
rename_file = os.path.splitext(path)[0].replace("(", "").replace(")", "") + ".wav"
|
| 196 |
-
os.rename(path, rename_file)
|
| 197 |
-
|
| 198 |
-
logger.info(f"{translations['dereverb_success']}: {rename_file}")
|
| 199 |
-
return rename_file
|
| 200 |
-
|
| 201 |
-
def separator_main(audio_file=None, model_filename="Kim_Vocal_1.onnx", output_format="wav", output_dir=".", mdx_segment_size=256, mdx_overlap=0.25, mdx_batch_size=1, mdx_hop_length=1024, mdx_enable_denoise=True, sample_rate=44100):
|
| 202 |
-
try:
|
| 203 |
-
separator = Separator(logger=logger, output_dir=output_dir, output_format=output_format, output_bitrate=None, normalization_threshold=0.9, sample_rate=sample_rate, mdx_params={"hop_length": mdx_hop_length, "segment_size": mdx_segment_size, "overlap": mdx_overlap, "batch_size": mdx_batch_size, "enable_denoise": mdx_enable_denoise})
|
| 204 |
-
separator.load_model(model_filename=model_filename)
|
| 205 |
-
return separator.separate(audio_file)
|
| 206 |
-
except:
|
| 207 |
-
logger.debug(translations["default_setting"])
|
| 208 |
-
separator = Separator(logger=logger, output_dir=output_dir, output_format=output_format, output_bitrate=None, normalization_threshold=0.9, sample_rate=44100, mdx_params={"hop_length": 1024, "segment_size": 256, "overlap": 0.25, "batch_size": 1, "enable_denoise": mdx_enable_denoise})
|
| 209 |
-
separator.load_model(model_filename=model_filename)
|
| 210 |
-
return separator.separate(audio_file)
|
| 211 |
-
|
| 212 |
-
if __name__ == "__main__": main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main/inference/create_index.py
DELETED
|
@@ -1,73 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import sys
|
| 3 |
-
import faiss
|
| 4 |
-
import argparse
|
| 5 |
-
|
| 6 |
-
import numpy as np
|
| 7 |
-
|
| 8 |
-
from multiprocessing import cpu_count
|
| 9 |
-
from sklearn.cluster import MiniBatchKMeans
|
| 10 |
-
|
| 11 |
-
sys.path.append(os.getcwd())
|
| 12 |
-
|
| 13 |
-
from main.app.variables import logger, translations, configs
|
| 14 |
-
|
| 15 |
-
def parse_arguments():
|
| 16 |
-
parser = argparse.ArgumentParser()
|
| 17 |
-
parser.add_argument("--create_index", action='store_true')
|
| 18 |
-
parser.add_argument("--model_name", type=str, required=True)
|
| 19 |
-
parser.add_argument("--rvc_version", type=str, default="v2")
|
| 20 |
-
parser.add_argument("--index_algorithm", type=str, default="Auto")
|
| 21 |
-
|
| 22 |
-
return parser.parse_args()
|
| 23 |
-
|
| 24 |
-
def main():
|
| 25 |
-
args = parse_arguments()
|
| 26 |
-
exp_dir = os.path.join(configs["logs_path"], args.model_name)
|
| 27 |
-
version, index_algorithm = args.rvc_version, args.index_algorithm
|
| 28 |
-
|
| 29 |
-
log_data = {translations['modelname']: args.model_name, translations['model_path']: exp_dir, translations['training_version']: version, translations['index_algorithm_info']: index_algorithm}
|
| 30 |
-
for key, value in log_data.items():
|
| 31 |
-
logger.debug(f"{key}: {value}")
|
| 32 |
-
|
| 33 |
-
try:
|
| 34 |
-
npys = []
|
| 35 |
-
feature_dir = os.path.join(exp_dir, f"{version}_extracted")
|
| 36 |
-
model_name = os.path.basename(exp_dir)
|
| 37 |
-
|
| 38 |
-
for name in sorted(os.listdir(feature_dir)):
|
| 39 |
-
npys.append(np.load(os.path.join(feature_dir, name)))
|
| 40 |
-
|
| 41 |
-
big_npy = np.concatenate(npys, axis=0)
|
| 42 |
-
big_npy_idx = np.arange(big_npy.shape[0])
|
| 43 |
-
np.random.shuffle(big_npy_idx)
|
| 44 |
-
big_npy = big_npy[big_npy_idx]
|
| 45 |
-
|
| 46 |
-
if big_npy.shape[0] > 2e5 and (index_algorithm == "Auto" or index_algorithm == "KMeans"): big_npy = (MiniBatchKMeans(n_clusters=10000, verbose=True, batch_size=256 * cpu_count(), compute_labels=False, init="random").fit(big_npy).cluster_centers_)
|
| 47 |
-
np.save(os.path.join(exp_dir, "total_fea.npy"), big_npy)
|
| 48 |
-
|
| 49 |
-
n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
|
| 50 |
-
index_trained = faiss.index_factory(256 if version == "v1" else 768, f"IVF{n_ivf},Flat")
|
| 51 |
-
index_ivf_trained = faiss.extract_index_ivf(index_trained)
|
| 52 |
-
index_ivf_trained.nprobe = 1
|
| 53 |
-
index_trained.train(big_npy)
|
| 54 |
-
faiss.write_index(index_trained, os.path.join(exp_dir, f"trained_IVF{n_ivf}_Flat_nprobe_{index_ivf_trained.nprobe}_{model_name}_{version}.index"))
|
| 55 |
-
|
| 56 |
-
index_added = faiss.index_factory(256 if version == "v1" else 768, f"IVF{n_ivf},Flat")
|
| 57 |
-
index_ivf_added = faiss.extract_index_ivf(index_added)
|
| 58 |
-
index_ivf_added.nprobe = 1
|
| 59 |
-
index_added.train(big_npy)
|
| 60 |
-
batch_size_add = 8192
|
| 61 |
-
|
| 62 |
-
for i in range(0, big_npy.shape[0], batch_size_add):
|
| 63 |
-
index_added.add(big_npy[i : i + batch_size_add])
|
| 64 |
-
|
| 65 |
-
index_filepath_added = os.path.join(exp_dir, f"added_IVF{n_ivf}_Flat_nprobe_{index_ivf_added.nprobe}_{model_name}_{version}.index")
|
| 66 |
-
faiss.write_index(index_added, index_filepath_added)
|
| 67 |
-
logger.info(f"{translations['save_index']} '{index_filepath_added}'")
|
| 68 |
-
except Exception as e:
|
| 69 |
-
logger.error(f"{translations['create_index_error']}: {e}")
|
| 70 |
-
import traceback
|
| 71 |
-
logger.debug(traceback.format_exc())
|
| 72 |
-
|
| 73 |
-
if __name__ == "__main__": main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|