Thatguy099 commited on
Commit
dc81f37
·
verified ·
1 Parent(s): ffef93c

Update lib/infer.py

Browse files
Files changed (1) hide show
  1. lib/infer.py +262 -221
lib/infer.py CHANGED
@@ -1,221 +1,262 @@
1
- import os
2
- import shutil
3
- import gc
4
- import torch
5
- from multiprocessing import cpu_count
6
- from lib.modules import VC
7
- from lib.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
8
-
9
- class Configs:
10
- def __init__(self, device, is_half):
11
- self.device = device
12
- self.is_half = is_half
13
- self.n_cpu = 0
14
- self.gpu_name = None
15
- self.gpu_mem = None
16
- self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
17
-
18
- def device_config(self) -> tuple:
19
- if torch.cuda.is_available():
20
- i_device = int(self.device.split(":")[-1])
21
- self.gpu_name = torch.cuda.get_device_name(i_device)
22
- #if (
23
- # ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
24
- # or "P40" in self.gpu_name.upper()
25
- # or "1060" in self.gpu_name
26
- # or "1070" in self.gpu_name
27
- # or "1080" in self.gpu_name
28
- # ):
29
- # print("16 series/10 series P40 forced single precision")
30
- # self.is_half = False
31
- # for config_file in ["32k.json", "40k.json", "48k.json"]:
32
- # with open(BASE_DIR / "src" / "configs" / config_file, "r") as f:
33
- # strr = f.read().replace("true", "false")
34
- # with open(BASE_DIR / "src" / "configs" / config_file, "w") as f:
35
- # f.write(strr)
36
- # with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "r") as f:
37
- # strr = f.read().replace("3.7", "3.0")
38
- # with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "w") as f:
39
- # f.write(strr)
40
- # else:
41
- # self.gpu_name = None
42
- # self.gpu_mem = int(
43
- # torch.cuda.get_device_properties(i_device).total_memory
44
- # / 1024
45
- # / 1024
46
- # / 1024
47
- # + 0.4
48
- # )
49
- # if self.gpu_mem <= 4:
50
- # with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "r") as f:
51
- # strr = f.read().replace("3.7", "3.0")
52
- # with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "w") as f:
53
- # f.write(strr)
54
- elif torch.backends.mps.is_available():
55
- print("No supported N-card found, use MPS for inference")
56
- self.device = "mps"
57
- else:
58
- print("No supported N-card found, use CPU for inference")
59
- self.device = "cpu"
60
-
61
- if self.n_cpu == 0:
62
- self.n_cpu = cpu_count()
63
-
64
- if self.is_half:
65
- # 6G memory config
66
- x_pad = 3
67
- x_query = 10
68
- x_center = 60
69
- x_max = 65
70
- else:
71
- # 5G memory config
72
- x_pad = 1
73
- x_query = 6
74
- x_center = 38
75
- x_max = 41
76
-
77
- if self.gpu_mem != None and self.gpu_mem <= 4:
78
- x_pad = 1
79
- x_query = 5
80
- x_center = 30
81
- x_max = 32
82
-
83
- return x_pad, x_query, x_center, x_max
84
-
85
- def get_model(voice_model):
86
- model_dir = os.path.join(os.getcwd(), "models", voice_model)
87
- model_filename, index_filename = None, None
88
- for file in os.listdir(model_dir):
89
- ext = os.path.splitext(file)[1]
90
- if ext == '.pth':
91
- model_filename = file
92
- if ext == '.index':
93
- index_filename = file
94
-
95
- if model_filename is None:
96
- print(f'No model file exists in {models_dir}.')
97
- return None, None
98
-
99
- return os.path.join(model_dir, model_filename), os.path.join(model_dir, index_filename) if index_filename else ''
100
-
101
- def infer_audio(
102
- model_name,
103
- audio_path,
104
- f0_change=0,
105
- f0_method="rmvpe+",
106
- min_pitch="50",
107
- max_pitch="1100",
108
- crepe_hop_length=128,
109
- index_rate=0.75,
110
- filter_radius=3,
111
- rms_mix_rate=0.25,
112
- protect=0.33,
113
- split_infer=False,
114
- min_silence=500,
115
- silence_threshold=-50,
116
- seek_step=1,
117
- keep_silence=100,
118
- do_formant=False,
119
- quefrency=0,
120
- timbre=1,
121
- f0_autotune=False,
122
- audio_format="wav",
123
- resample_sr=0,
124
- hubert_model_path="assets/hubert/hubert_base.pt",
125
- rmvpe_model_path="assets/rmvpe/rmvpe.pt",
126
- fcpe_model_path="assets/fcpe/fcpe.pt"
127
- ):
128
- os.environ["rmvpe_model_path"] = rmvpe_model_path
129
- os.environ["fcpe_model_path"] = fcpe_model_path
130
- configs = Configs('cuda:0', True)
131
- vc = VC(configs)
132
- pth_path, index_path = get_model(model_name)
133
- vc_data = vc.get_vc(pth_path, protect, 0.5)
134
-
135
- if split_infer:
136
- inferred_files = []
137
- temp_dir = os.path.join(os.getcwd(), "seperate", "temp")
138
- os.makedirs(temp_dir, exist_ok=True)
139
- print("Splitting audio to silence and nonsilent segments.")
140
- silence_files, nonsilent_files = split_silence_nonsilent(audio_path, min_silence, silence_threshold, seek_step, keep_silence)
141
- print(f"Total silence segments: {len(silence_files)}.\nTotal nonsilent segments: {len(nonsilent_files)}.")
142
- for i, nonsilent_file in enumerate(nonsilent_files):
143
- print(f"Inferring nonsilent audio {i+1}")
144
- inference_info, audio_data, output_path = vc.vc_single(
145
- 0,
146
- nonsilent_file,
147
- f0_change,
148
- f0_method,
149
- index_path,
150
- index_path,
151
- index_rate,
152
- filter_radius,
153
- resample_sr,
154
- rms_mix_rate,
155
- protect,
156
- audio_format,
157
- crepe_hop_length,
158
- do_formant,
159
- quefrency,
160
- timbre,
161
- min_pitch,
162
- max_pitch,
163
- f0_autotune,
164
- hubert_model_path
165
- )
166
- if inference_info[0] == "Success.":
167
- print("Inference ran successfully.")
168
- print(inference_info[1])
169
- print("Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
170
- else:
171
- print(f"An error occurred while processing.\n{inference_info[0]}")
172
- return None
173
- inferred_files.append(output_path)
174
- print("Adjusting inferred audio lengths.")
175
- adjusted_inferred_files = adjust_audio_lengths(nonsilent_files, inferred_files)
176
- print("Combining silence and inferred audios.")
177
- output_count = 1
178
- while True:
179
- output_path = os.path.join(os.getcwd(), "output", f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}{f0_method.capitalize()}_{output_count}.{audio_format}")
180
- if not os.path.exists(output_path):
181
- break
182
- output_count += 1
183
- output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, keep_silence, output_path)
184
- [shutil.move(inferred_file, temp_dir) for inferred_file in inferred_files]
185
- shutil.rmtree(temp_dir)
186
- else:
187
- inference_info, audio_data, output_path = vc.vc_single(
188
- 0,
189
- audio_path,
190
- f0_change,
191
- f0_method,
192
- index_path,
193
- index_path,
194
- index_rate,
195
- filter_radius,
196
- resample_sr,
197
- rms_mix_rate,
198
- protect,
199
- audio_format,
200
- crepe_hop_length,
201
- do_formant,
202
- quefrency,
203
- timbre,
204
- min_pitch,
205
- max_pitch,
206
- f0_autotune,
207
- hubert_model_path
208
- )
209
- if inference_info[0] == "Success.":
210
- print("Inference ran successfully.")
211
- print(inference_info[1])
212
- print("Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
213
- else:
214
- print(f"An error occurred while processing.\n{inference_info[0]}")
215
- del configs, vc
216
- gc.collect()
217
- return inference_info[0]
218
-
219
- del configs, vc
220
- gc.collect()
221
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import gc
4
+ import torch
5
+ from multiprocessing import cpu_count
6
+ from lib.modules import VC
7
+ from lib.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
8
+ import logging
9
+ from datetime import datetime
10
+ import traceback
11
+
12
+ # Configure logging
13
+ logging.basicConfig(
14
+ level=logging.DEBUG,
15
+ format='%(asctime)s - %(levelname)s - %(process)d - %(funcName)s:%(lineno)d - %(message)s',
16
+ handlers=[
17
+ logging.FileHandler(f'debug_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
18
+ logging.StreamHandler()
19
+ ]
20
+ )
21
+
22
+ class Configs:
23
+ def __init__(self, device, is_half):
24
+ logging.debug(f"Initializing Configs with device={device}, is_half={is_half}")
25
+ self.device = device
26
+ self.is_half = is_half
27
+ self.n_cpu = 0
28
+ self.gpu_name = None
29
+ self.gpu_mem = None
30
+ try:
31
+ self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
32
+ logging.debug(f"Device configuration: pad={self.x_pad}, query={self.x_query}, "
33
+ f"center={self.x_center}, max={self.x_max}")
34
+ except Exception as e:
35
+ logging.error(f"Failed to configure device: {str(e)}")
36
+ raise
37
+
38
+ def device_config(self) -> tuple:
39
+ if torch.cuda.is_available():
40
+ i_device = int(self.device.split(":")[-1])
41
+ self.gpu_name = torch.cuda.get_device_name(i_device)
42
+ logging.debug(f"GPU detected: {self.gpu_name}")
43
+ elif torch.backends.mps.is_available():
44
+ logging.warning("No supported N-card found, falling back to MPS")
45
+ self.device = "mps"
46
+ else:
47
+ logging.warning("No supported N-card found, falling back to CPU")
48
+ self.device = "cpu"
49
+
50
+ if self.n_cpu == 0:
51
+ self.n_cpu = cpu_count()
52
+ logging.debug(f"Detected {self.n_cpu} CPU cores")
53
+
54
+ # Memory configuration settings
55
+ if self.is_half:
56
+ x_pad = 3
57
+ x_query = 10
58
+ x_center = 60
59
+ x_max = 65
60
+ else:
61
+ x_pad = 1
62
+ x_query = 6
63
+ x_center = 38
64
+ x_max = 41
65
+
66
+ if self.gpu_mem is not None and self.gpu_mem <= 4:
67
+ x_pad = 1
68
+ x_query = 5
69
+ x_center = 30
70
+ x_max = 32
71
+
72
+ return x_pad, x_query, x_center, x_max
73
+
74
+ def get_model(voice_model):
75
+ model_dir = os.path.join(os.getcwd(), "models", voice_model)
76
+ logging.debug(f"Searching for model files in directory: {model_dir}")
77
+
78
+ model_filename, index_filename = None, None
79
+ try:
80
+ for file in os.listdir(model_dir):
81
+ ext = os.path.splitext(file)[1]
82
+ if ext == '.pth':
83
+ model_filename = file
84
+ logging.debug(f"Found model file: {file}")
85
+ elif ext == '.index':
86
+ index_filename = file
87
+ logging.debug(f"Found index file: {file}")
88
+
89
+ if model_filename is None:
90
+ logging.error(f"No model file exists in {model_dir}")
91
+ raise FileNotFoundError(f"No model file exists in {model_dir}")
92
+
93
+ return os.path.join(model_dir, model_filename), os.path.join(model_dir, index_filename) if index_filename else ''
94
+
95
+ except Exception as e:
96
+ logging.error(f"Failed to retrieve model files: {str(e)}")
97
+ raise
98
+
99
+ def infer_audio(
100
+ model_name,
101
+ audio_path,
102
+ f0_change=0,
103
+ f0_method="rmvpe+",
104
+ min_pitch="50",
105
+ max_pitch="1100",
106
+ crepe_hop_length=128,
107
+ index_rate=0.75,
108
+ filter_radius=3,
109
+ rms_mix_rate=0.25,
110
+ protect=0.33,
111
+ split_infer=False,
112
+ min_silence=500,
113
+ silence_threshold=-50,
114
+ seek_step=1,
115
+ keep_silence=100,
116
+ do_formant=False,
117
+ quefrency=0,
118
+ timbre=1,
119
+ f0_autotune=False,
120
+ audio_format="wav",
121
+ resample_sr=0,
122
+ hubert_model_path="assets/hubert/hubert_base.pt",
123
+ rmvpe_model_path="assets/rmvpe/rmvpe.pt",
124
+ fcpe_model_path="assets/fcpe/fcpe.pt"
125
+ ):
126
+ logging.info(f"Starting inference with parameters:")
127
+ logging.info(f"- Model: {model_name}")
128
+ logging.info(f"- Audio path: {audio_path}")
129
+ logging.info(f"- F0 change: {f0_change}, Method: {f0_method}")
130
+ logging.info(f"- Split inference: {split_infer}")
131
+
132
+ os.environ["rmvpe_model_path"] = rmvpe_model_path
133
+ os.environ["fcpe_model_path"] = fcpe_model_path
134
+
135
+ try:
136
+ configs = Configs('cuda:0', True)
137
+ vc = VC(configs)
138
+ pth_path, index_path = get_model(model_name)
139
+ vc_data = vc.get_vc(pth_path, protect, 0.5)
140
+
141
+ if split_infer:
142
+ logging.info("Split inference mode enabled")
143
+ inferred_files = []
144
+ temp_dir = os.path.join(os.getcwd(), "seperate", "temp")
145
+ os.makedirs(temp_dir, exist_ok=True)
146
+
147
+ try:
148
+ silence_files, nonsilent_files = split_silence_nonsilent(
149
+ audio_path, min_silence, silence_threshold, seek_step, keep_silence
150
+ )
151
+ logging.debug(f"Silence segments: {len(silence_files)}")
152
+ logging.debug(f"Nonsilent segments: {len(nonsilent_files)}")
153
+
154
+ for i, nonsilent_file in enumerate(nonsilent_files):
155
+ logging.info(f"Processing segment {i+1}/{len(nonsilent_files)}")
156
+
157
+ start_time = datetime.now()
158
+ inference_info, audio_data, output_path = vc.vc_single(
159
+ 0,
160
+ nonsilent_file,
161
+ f0_change,
162
+ f0_method,
163
+ index_path,
164
+ index_path,
165
+ index_rate,
166
+ filter_radius,
167
+ resample_sr,
168
+ rms_mix_rate,
169
+ protect,
170
+ audio_format,
171
+ crepe_hop_length,
172
+ do_formant,
173
+ quefrency,
174
+ timbre,
175
+ min_pitch,
176
+ max_pitch,
177
+ f0_autotune,
178
+ hubert_model_path
179
+ )
180
+ process_time = (datetime.now() - start_time).total_seconds()
181
+ logging.debug(f"Segment processing time: {process_time:.2f}s")
182
+
183
+ if inference_info[0] == "Success.":
184
+ logging.info("Segment processed successfully")
185
+ logging.debug(inference_info[1])
186
+ logging.debug(f"Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
187
+ inferred_files.append(output_path)
188
+ else:
189
+ logging.error(f"Error processing segment {i+1}: {inference_info[0]}")
190
+ raise RuntimeError(f"Error processing segment {i+1}")
191
+
192
+ logging.info("Adjusting inferred audio lengths")
193
+ adjusted_inferred_files = adjust_audio_lengths(nonsilent_files, inferred_files)
194
+
195
+ logging.info("Combining silence and inferred audios")
196
+ output_count = 1
197
+ while True:
198
+ output_path = os.path.join(
199
+ os.getcwd(),
200
+ "output",
201
+ f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}"
202
+ f"{f0_method.capitalize()}_{output_count}.{audio_format}"
203
+ )
204
+ if not os.path.exists(output_path):
205
+ break
206
+ output_count += 1
207
+
208
+ output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, keep_silence, output_path)
209
+
210
+ # Cleanup temporary files
211
+ for inferred_file in inferred_files:
212
+ shutil.move(inferred_file, temp_dir)
213
+ shutil.rmtree(temp_dir)
214
+
215
+ except Exception as e:
216
+ logging.error(f"Split inference failed: {str(e)}")
217
+ raise
218
+
219
+ else:
220
+ logging.info("Single inference mode")
221
+ start_time = datetime.now()
222
+ inference_info, audio_data, output_path = vc.vc_single(
223
+ 0,
224
+ audio_path,
225
+ f0_change,
226
+ f0_method,
227
+ index_path,
228
+ index_path,
229
+ index_rate,
230
+ filter_radius,
231
+ resample_sr,
232
+ rms_mix_rate,
233
+ protect,
234
+ audio_format,
235
+ crepe_hop_length,
236
+ do_formant,
237
+ quefrency,
238
+ timbre,
239
+ min_pitch,
240
+ max_pitch,
241
+ f0_autotune,
242
+ hubert_model_path
243
+ )
244
+ process_time = (datetime.now() - start_time).total_seconds()
245
+ logging.debug(f"Total processing time: {process_time:.2f}s")
246
+
247
+ if inference_info[0] == "Success.":
248
+ logging.info("Inference completed successfully")
249
+ logging.debug(inference_info[1])
250
+ logging.debug(f"Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
251
+ else:
252
+ logging.error(f"Inference failed: {inference_info[0]}")
253
+ raise RuntimeError(inference_info[0])
254
+
255
+ del configs, vc
256
+ gc.collect()
257
+ return output_path
258
+
259
+ except Exception as e:
260
+ logging.error(f"Inference failed: {str(e)}")
261
+ logging.error(traceback.format_exc())
262
+ raise