Spaces:

Artrajz
/

vits-simple-api-gsv

Running

App Files Files Community

vits-simple-api-gsv / manager /model_handler.py

Artrajz

init

960cd20 over 1 year ago

raw

history blame

18.7 kB

	"""
	放置公用模型
	"""

	import gc
	import logging
	import os

	import torch
	from transformers import AutoTokenizer, AutoModelForMaskedLM, BertTokenizer, MegatronBertModel

	from contants import config
	from utils.download import download_file
	from bert_vits2.text.chinese_bert import get_bert_feature as zh_bert
	from bert_vits2.text.english_bert_mock import get_bert_feature as en_bert
	from bert_vits2.text.japanese_bert import get_bert_feature as ja_bert
	from bert_vits2.text.japanese_bert_v111 import get_bert_feature as ja_bert_v111
	from bert_vits2.text.japanese_bert_v200 import get_bert_feature as ja_bert_v200
	from bert_vits2.text.english_bert_mock_v200 import get_bert_feature as en_bert_v200
	from bert_vits2.text.chinese_bert_extra import get_bert_feature as zh_bert_extra
	from bert_vits2.text.japanese_bert_extra import get_bert_feature as ja_bert_extra


	class ModelHandler:
	def __init__(self, device=config.system.device):
	self.DOWNLOAD_PATHS = {
	"CHINESE_ROBERTA_WWM_EXT_LARGE": [
	"https://huggingface.co/hfl/chinese-roberta-wwm-ext-large/resolve/main/pytorch_model.bin",
	"https://hf-mirror.com/hfl/chinese-roberta-wwm-ext-large/resolve/main/pytorch_model.bin",
	],
	"BERT_BASE_JAPANESE_V3": [
	"https://huggingface.co/cl-tohoku/bert-base-japanese-v3/resolve/main/pytorch_model.bin",
	"https://hf-mirror.com/cl-tohoku/bert-base-japanese-v3/resolve/main/pytorch_model.bin",
	],
	"BERT_LARGE_JAPANESE_V2": [
	"https://huggingface.co/cl-tohoku/bert-large-japanese-v2/resolve/main/pytorch_model.bin",
	"https://hf-mirror.com/cl-tohoku/bert-large-japanese-v2/resolve/main/pytorch_model.bin",
	],
	"DEBERTA_V2_LARGE_JAPANESE": [
	"https://huggingface.co/ku-nlp/deberta-v2-large-japanese/resolve/main/pytorch_model.bin",
	"https://hf-mirror.com/ku-nlp/deberta-v2-large-japanese/resolve/main/pytorch_model.bin",
	],
	"DEBERTA_V3_LARGE": [
	"https://huggingface.co/microsoft/deberta-v3-large/resolve/main/pytorch_model.bin",
	"https://hf-mirror.com/microsoft/deberta-v3-large/resolve/main/pytorch_model.bin",
	],
	"SPM": [
	"https://huggingface.co/microsoft/deberta-v3-large/resolve/main/spm.model",
	"https://hf-mirror.com/microsoft/deberta-v3-large/resolve/main/spm.model",
	],
	"DEBERTA_V2_LARGE_JAPANESE_CHAR_WWM": [
	"https://huggingface.co/ku-nlp/deberta-v2-large-japanese-char-wwm/resolve/main/pytorch_model.bin",
	"https://hf-mirror.com/ku-nlp/deberta-v2-large-japanese-char-wwm/resolve/main/pytorch_model.bin",
	],
	"WAV2VEC2_LARGE_ROBUST_12_FT_EMOTION_MSP_DIM": [
	"https://huggingface.co/audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim/resolve/main/pytorch_model.bin",
	"https://hf-mirror.com/audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim/resolve/main/pytorch_model.bin",
	],
	"CLAP_HTSAT_FUSED": [
	"https://huggingface.co/laion/clap-htsat-fused/resolve/main/pytorch_model.bin?download=true",
	"https://hf-mirror.com/laion/clap-htsat-fused/resolve/main/pytorch_model.bin?download=true",
	],
	"Erlangshen_MegatronBert_1.3B_Chinese": [
	"https://huggingface.co/IDEA-CCNL/Erlangshen-UniMC-MegatronBERT-1.3B-Chinese/resolve/main/pytorch_model.bin",
	"https://hf-mirror.com/IDEA-CCNL/Erlangshen-UniMC-MegatronBERT-1.3B-Chinese/resolve/main/pytorch_model.bin",
	],
	"G2PWModel": [
	# "https://storage.googleapis.com/esun-ai/g2pW/G2PWModel-v2-onnx.zip",
	"https://huggingface.co/ADT109119/G2PWModel-v2-onnx/resolve/main/g2pw.onnx",
	"https://hf-mirror.com/ADT109119/G2PWModel-v2-onnx/resolve/main/g2pw.onnx",
	],
	"CHINESE_HUBERT_BASE": [
	"https://huggingface.co/TencentGameMate/chinese-hubert-base/resolve/main/pytorch_model.bin",
	"https://hf-mirror.com/TencentGameMate/chinese-hubert-base/resolve/main/pytorch_model.bin",
	]
	}

	self.SHA256 = {
	"CHINESE_ROBERTA_WWM_EXT_LARGE": "4ac62d49144d770c5ca9a5d1d3039c4995665a080febe63198189857c6bd11cd",
	"BERT_BASE_JAPANESE_V3": "e172862e0674054d65e0ba40d67df2a4687982f589db44aa27091c386e5450a4",
	"BERT_LARGE_JAPANESE_V2": "50212d714f79af45d3e47205faa356d0e5030e1c9a37138eadda544180f9e7c9",
	"DEBERTA_V2_LARGE_JAPANESE": "a6c15feac0dea77ab8835c70e1befa4cf4c2137862c6fb2443b1553f70840047",
	"DEBERTA_V3_LARGE": "dd5b5d93e2db101aaf281df0ea1216c07ad73620ff59c5b42dccac4bf2eef5b5",
	"SPM": "c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd",
	"DEBERTA_V2_LARGE_JAPANESE_CHAR_WWM": "bf0dab8ad87bd7c22e85ec71e04f2240804fda6d33196157d6b5923af6ea1201",
	"WAV2VEC2_LARGE_ROBUST_12_FT_EMOTION_MSP_DIM": "176d9d1ce29a8bddbab44068b9c1c194c51624c7f1812905e01355da58b18816",
	"CLAP_HTSAT_FUSED": "1ed5d0215d887551ddd0a49ce7311b21429ebdf1e6a129d4e68f743357225253",
	"Erlangshen_MegatronBert_1.3B_Chinese": "3456bb8f2c7157985688a4cb5cecdb9e229cb1dcf785b01545c611462ffe3579",
	# "G2PWModel": "bb40c8c7b5baa755b2acd317c6bc5a65e4af7b80c40a569247fbd76989299999",
	"G2PWModel": "",
	"CHINESE_HUBERT_BASE": "2fefccd26c2794a583b80f6f7210c721873cb7ebae2c1cde3baf9b27855e24d8",
	}
	self.model_path = {
	"CHINESE_ROBERTA_WWM_EXT_LARGE": os.path.join(config.abs_path, config.system.data_path,
	config.model_config.chinese_roberta_wwm_ext_large),
	"BERT_BASE_JAPANESE_V3": os.path.join(config.abs_path, config.system.data_path,
	config.model_config.bert_base_japanese_v3),
	"BERT_LARGE_JAPANESE_V2": os.path.join(config.abs_path, config.system.data_path,
	config.model_config.bert_large_japanese_v2),
	"DEBERTA_V2_LARGE_JAPANESE": os.path.join(config.abs_path, config.system.data_path,
	config.model_config.deberta_v2_large_japanese),
	"DEBERTA_V3_LARGE": os.path.join(config.abs_path, config.system.data_path,
	config.model_config.deberta_v3_large),
	"DEBERTA_V2_LARGE_JAPANESE_CHAR_WWM": os.path.join(config.abs_path, config.system.data_path,
	config.model_config.deberta_v2_large_japanese_char_wwm),
	"WAV2VEC2_LARGE_ROBUST_12_FT_EMOTION_MSP_DIM": os.path.join(config.abs_path, config.system.data_path,
	config.model_config.wav2vec2_large_robust_12_ft_emotion_msp_dim),
	"CLAP_HTSAT_FUSED": os.path.join(config.abs_path, config.system.data_path,
	config.model_config.clap_htsat_fused),
	"Erlangshen_MegatronBert_1.3B_Chinese": os.path.join(config.abs_path, config.system.data_path,
	config.model_config.erlangshen_MegatronBert_1_3B_Chinese),
	"G2PWModel": os.path.join(config.abs_path, config.system.data_path, config.model_config.g2pw_model),
	"CHINESE_HUBERT_BASE": os.path.join(config.abs_path, config.system.data_path,
	config.model_config.chinese_hubert_base),
	}

	self.lang_bert_func_map = {"zh": zh_bert, "en": en_bert, "ja": ja_bert, "ja_v111": ja_bert_v111,
	"ja_v200": ja_bert_v200, "en_v200": en_bert_v200, "zh_extra": zh_bert_extra,
	"ja_extra": ja_bert_extra}

	self.bert_models = {} # Value: (tokenizer, model, reference_count)
	self.emotion = None
	self.clap = None
	self.pinyinPlus = None
	self.device = device
	self.ssl_model = None

	if config.bert_vits2_config.torch_data_type.lower() in ["float16", "fp16"]:
	self.torch_dtype = torch.float16
	else:
	self.torch_dtype = None

	@property
	def emotion_model(self):
	return self.emotion["model"]

	@property
	def emotion_processor(self):
	return self.emotion["processor"]

	@property
	def clap_model(self):
	return self.clap["model"]

	@property
	def clap_processor(self):
	return self.clap["processor"]

	def _download_model(self, model_name, target_path=None):
	urls = self.DOWNLOAD_PATHS[model_name]

	if target_path is None:
	target_path = os.path.join(self.model_path[model_name], "pytorch_model.bin")

	expected_sha256 = self.SHA256[model_name]
	success, message = download_file(urls, target_path, expected_sha256=expected_sha256)
	if not success:
	logging.error(f"Failed to download {model_name}: {message}")
	else:
	logging.info(f"{message}")

	def load_bert(self, bert_model_name, max_retries=3):
	if bert_model_name not in self.bert_models:
	retries = 0
	model_path = ""
	while retries < max_retries:
	model_path = self.model_path[bert_model_name]
	logging.info(f"Loading BERT model: {model_path}")
	try:
	if bert_model_name == "Erlangshen_MegatronBert_1.3B_Chinese":
	tokenizer = BertTokenizer.from_pretrained(model_path, torch_dtype=self.torch_dtype)
	model = MegatronBertModel.from_pretrained(model_path, torch_dtype=self.torch_dtype).to(
	self.device)
	else:
	tokenizer = AutoTokenizer.from_pretrained(model_path, torch_dtype=self.torch_dtype)
	model = AutoModelForMaskedLM.from_pretrained(model_path, torch_dtype=self.torch_dtype).to(
	self.device)
	self.bert_models[bert_model_name] = (tokenizer, model, 1) # 初始化引用计数为1
	logging.info(f"Success loading: {model_path}")
	break
	except Exception as e:
	logging.error(f"Failed loading {model_path}. {e}")
	logging.info(f"Trying to download.")
	if bert_model_name == "DEBERTA_V3_LARGE" and not os.path.exists(
	os.path.join(model_path, "spm.model")):
	self._download_model("SPM", os.path.join(model_path, "spm.model"))
	self._download_model(bert_model_name)
	retries += 1
	if retries == max_retries:
	logging.error(f"Failed to load {model_path} after {max_retries} retries.")
	else:
	tokenizer, model, count = self.bert_models[bert_model_name]
	self.bert_models[bert_model_name] = (tokenizer, model, count + 1)

	def load_emotion(self, max_retries=3):
	"""Bert-VITS2 v2.1 EmotionModel"""
	if self.emotion is None:
	from transformers import Wav2Vec2Processor
	from bert_vits2.get_emo import EmotionModel
	retries = 0
	model_path = self.model_path["WAV2VEC2_LARGE_ROBUST_12_FT_EMOTION_MSP_DIM"]
	while retries < max_retries:
	logging.info(f"Loading WAV2VEC2_LARGE_ROBUST_12_FT_EMOTION_MSP_DIM: {model_path}")
	try:
	self.emotion = {}
	self.emotion["model"] = EmotionModel.from_pretrained(model_path).to(self.device)
	self.emotion["processor"] = Wav2Vec2Processor.from_pretrained(model_path)
	self.emotion["reference_count"] = 1
	logging.info(f"Success loading: {model_path}")
	break
	except Exception as e:
	logging.error(f"Failed loading {model_path}. {e}")
	self._download_model("WAV2VEC2_LARGE_ROBUST_12_FT_EMOTION_MSP_DIM")
	retries += 1
	if retries == max_retries:
	logging.error(f"Failed to load {model_path} after {max_retries} retries.")
	else:
	self.emotion["reference_count"] += 1

	def release_emotion(self):
	if self.emotion is not None:
	self.emotion["reference_count"] -= 1
	if self.emotion["reference_count"] <= 0:
	del self.emotion
	self.emotion = None
	gc.collect()
	torch.cuda.empty_cache()
	logging.info(f"Emotion model has been released.")

	def load_clap(self, max_retries=3):
	"""Bert-VITS2 v2.2 ClapModel"""
	if self.clap is None:
	from transformers import ClapModel, ClapProcessor
	retries = 0
	model_path = self.model_path["CLAP_HTSAT_FUSED"]
	while retries < max_retries:
	logging.info(f"Loading CLAP_HTSAT_FUSED: {model_path}")
	try:
	self.clap = {}
	self.clap["model"] = ClapModel.from_pretrained(model_path, torch_dtype=self.torch_dtype).to(
	self.device)
	self.clap["processor"] = ClapProcessor.from_pretrained(model_path, torch_dtype=self.torch_dtype)
	self.clap["reference_count"] = 1
	logging.info(f"Success loading: {model_path}")
	break
	except Exception as e:
	logging.error(f"Failed loading {model_path}. {e}")
	self._download_model("CLAP_HTSAT_FUSED")
	retries += 1
	if retries == max_retries:
	logging.error(f"Failed to load {model_path} after {max_retries} retries.")
	else:
	self.clap["reference_count"] += 1

	def release_clap(self):
	if self.clap is not None:
	self.clap["reference_count"] -= 1
	if self.clap["reference_count"] <= 0:
	del self.clap
	self.clap = None
	gc.collect()
	torch.cuda.empty_cache()
	logging.info(f"Clap model has been released.")

	def get_bert_model(self, bert_model_name):
	if bert_model_name not in self.bert_models:
	self.load_bert(bert_model_name)

	tokenizer, model, _ = self.bert_models[bert_model_name]
	return tokenizer, model

	def get_bert_feature(self, norm_text, word2ph, language, bert_model_name, style_text=None, style_weight=0.7):
	tokenizer, model = self.get_bert_model(bert_model_name)
	bert_feature = self.lang_bert_func_map[language](norm_text, word2ph, tokenizer, model, self.device,
	style_text=style_text, style_weight=style_weight)
	return bert_feature

	def get_pinyinPlus(self):
	if self.pinyinPlus is None:
	from bert_vits2.g2pW.pypinyin_G2pW_bv2 import G2PWPinyin

	logging.info(f"Loading G2PWModel: {self.model_path['G2PWModel']}")
	self.pinyinPlus = G2PWPinyin(
	model_dir=self.model_path["G2PWModel"],
	model_source=self.model_path["Erlangshen_MegatronBert_1.3B_Chinese"],
	v_to_u=False,
	neutral_tone_with_five=True,
	)
	logging.info("Success loading G2PWModel")

	return self.pinyinPlus

	def release_bert(self, bert_model_name):
	if bert_model_name in self.bert_models:
	_, _, count = self.bert_models[bert_model_name]
	count -= 1
	if count == 0:
	# 当引用计数为0时，删除模型并释放其资源
	del self.bert_models[bert_model_name]
	gc.collect()
	torch.cuda.empty_cache()
	logging.info(f"BERT model {bert_model_name} has been released.")
	else:
	tokenizer, model = self.bert_models[bert_model_name][:2]
	self.bert_models[bert_model_name] = (tokenizer, model, count)

	def load_ssl(self, max_retries=3):
	"""GPT-SoVITS"""
	if self.ssl_model is None:
	retries = 0
	model_path = self.model_path["CHINESE_HUBERT_BASE"]
	while retries < max_retries:
	logging.info(f"Loading CHINESE_HUBERT_BASE: {model_path}")
	try:
	from gpt_sovits.feature_extractor.cnhubert import CNHubert
	self.ssl_model = {}
	model_path = self.model_path.get("CHINESE_HUBERT_BASE")

	self.ssl_model["model"] = CNHubert(model_path)
	self.ssl_model["model"].eval()

	if config.gpt_sovits_config.is_half:
	self.ssl_model["model"] = self.ssl_model["model"].half()

	self.ssl_model["model"] = self.ssl_model["model"].to(self.device)
	self.ssl_model["reference_count"] = 1
	logging.info(f"Success loading: {model_path}")
	break
	except Exception as e:
	logging.error(f"Failed loading {model_path}. {e}")
	self._download_model("CHINESE_HUBERT_BASE")
	retries += 1
	if retries == max_retries:
	logging.error(f"Failed to load {model_path} after {max_retries} retries.")
	else:
	self.ssl_model["reference_count"] += 1

	def get_ssl_model(self):
	if self.ssl_model is None:
	self.load_ssl()

	return self.ssl_model.get("model")

	def release_ssl_model(self):
	if self.ssl_model is not None:
	self.ssl_model["reference_count"] -= 1
	if self.ssl_model["reference_count"] <= 0:
	del self.ssl_model
	self.ssl_model = None
	gc.collect()
	torch.cuda.empty_cache()
	logging.info(f"SSL model has been released.")

	def is_model_loaded(self, bert_model_name):
	return bert_model_name in self.bert_models

	def reference_count(self, bert_model_name):
	return self.bert_models[bert_model_name][2] if bert_model_name in self.bert_models else 0