Slashy commited on
Commit
ee1231e
·
1 Parent(s): 55b009a

Upload 4 files

Browse files
Files changed (4) hide show
  1. config.json +46 -0
  2. cream.pth +3 -0
  3. export_onnx.py +85 -0
  4. trained_IVF116_Flat_nprobe_1.index +0 -0
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "epochs": 20000,
6
+ "learning_rate": 1e-4,
7
+ "betas": [0.8, 0.99],
8
+ "eps": 1e-9,
9
+ "batch_size": 4,
10
+ "fp16_run": true,
11
+ "lr_decay": 0.999875,
12
+ "segment_size": 12800,
13
+ "init_lr_ratio": 1,
14
+ "warmup_epochs": 0,
15
+ "c_mel": 45,
16
+ "c_kl": 1.0
17
+ },
18
+ "data": {
19
+ "max_wav_value": 32768.0,
20
+ "sampling_rate": 40000,
21
+ "filter_length": 2048,
22
+ "hop_length": 400,
23
+ "win_length": 2048,
24
+ "n_mel_channels": 125,
25
+ "mel_fmin": 0.0,
26
+ "mel_fmax": null
27
+ },
28
+ "model": {
29
+ "inter_channels": 192,
30
+ "hidden_channels": 192,
31
+ "filter_channels": 768,
32
+ "n_heads": 2,
33
+ "n_layers": 6,
34
+ "kernel_size": 3,
35
+ "p_dropout": 0,
36
+ "resblock": "1",
37
+ "resblock_kernel_sizes": [3,7,11],
38
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
39
+ "upsample_rates": [10,10,2,2],
40
+ "upsample_initial_channel": 512,
41
+ "upsample_kernel_sizes": [16,16,4,4],
42
+ "use_spectral_norm": false,
43
+ "gin_channels": 256,
44
+ "spk_embed_dim": 109
45
+ }
46
+ }
cream.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ce2ad107f5dc7dc242a9b78b01dd05f7dda1f1b901bef2fee0e5c3c97544f5
3
+ size 54996633
export_onnx.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from infer_pack.models_onnx_moess import SynthesizerTrnMs256NSFsidM
2
+ from infer_pack.models_onnx import SynthesizerTrnMs256NSFsidO
3
+ import torch
4
+
5
+ if __name__ == "__main__":
6
+ MoeVS = True # 模型是否为MoeVoiceStudio(原MoeSS)使用
7
+
8
+ ModelPath = "Shiroha/shiroha.pth" # 模型路径
9
+ ExportedPath = "model.onnx" # 输出路径
10
+ hidden_channels = 256 # hidden_channels,为768Vec做准备
11
+ cpt = torch.load(ModelPath, map_location="cpu")
12
+ cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
13
+ print(*cpt["config"])
14
+
15
+ test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
16
+ test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用)
17
+ test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹)
18
+ test_pitchf = torch.rand(1, 200) # nsf基频
19
+ test_ds = torch.LongTensor([0]) # 说话人ID
20
+ test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子)
21
+
22
+ device = "cpu" # 导出时设备(不影响使用模型)
23
+
24
+ if MoeVS:
25
+ net_g = SynthesizerTrnMs256NSFsidM(
26
+ *cpt["config"], is_half=False
27
+ ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
28
+ net_g.load_state_dict(cpt["weight"], strict=False)
29
+ input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
30
+ output_names = [
31
+ "audio",
32
+ ]
33
+ torch.onnx.export(
34
+ net_g,
35
+ (
36
+ test_phone.to(device),
37
+ test_phone_lengths.to(device),
38
+ test_pitch.to(device),
39
+ test_pitchf.to(device),
40
+ test_ds.to(device),
41
+ test_rnd.to(device),
42
+ ),
43
+ ExportedPath,
44
+ dynamic_axes={
45
+ "phone": [1],
46
+ "pitch": [1],
47
+ "pitchf": [1],
48
+ "rnd": [2],
49
+ },
50
+ do_constant_folding=False,
51
+ opset_version=16,
52
+ verbose=False,
53
+ input_names=input_names,
54
+ output_names=output_names,
55
+ )
56
+ else:
57
+ net_g = SynthesizerTrnMs256NSFsidO(
58
+ *cpt["config"], is_half=False
59
+ ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
60
+ net_g.load_state_dict(cpt["weight"], strict=False)
61
+ input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds"]
62
+ output_names = [
63
+ "audio",
64
+ ]
65
+ torch.onnx.export(
66
+ net_g,
67
+ (
68
+ test_phone.to(device),
69
+ test_phone_lengths.to(device),
70
+ test_pitch.to(device),
71
+ test_pitchf.to(device),
72
+ test_ds.to(device),
73
+ ),
74
+ ExportedPath,
75
+ dynamic_axes={
76
+ "phone": [1],
77
+ "pitch": [1],
78
+ "pitchf": [1],
79
+ },
80
+ do_constant_folding=False,
81
+ opset_version=16,
82
+ verbose=False,
83
+ input_names=input_names,
84
+ output_names=output_names,
85
+ )
trained_IVF116_Flat_nprobe_1.index ADDED
Binary file (119 kB). View file