Thatguy099 commited on
Commit
ffef93c
·
verified ·
1 Parent(s): 51df0e8

Delete lib/rvc.py

Browse files
Files changed (1) hide show
  1. lib/rvc.py +0 -437
lib/rvc.py DELETED
@@ -1,437 +0,0 @@
1
- import argparse
2
- import gc
3
- import hashlib
4
- import json
5
- import os
6
- import shlex
7
- import subprocess
8
- from contextlib import suppress
9
- from urllib.parse import urlparse, parse_qs
10
-
11
- import gradio as gr
12
- import librosa
13
- import numpy as np
14
- import soundfile as sf
15
- import sox
16
- import yt_dlp
17
- from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
18
- from pedalboard.io import AudioFile
19
- from pydub import AudioSegment
20
- from audio_separator.separator import Separator
21
- from lib.infer import infer_audio
22
-
23
- # Base directories
24
- BASE_DIR = os.path.join(os.getcwd())
25
- rvc_models_dir = os.path.join(BASE_DIR, 'models')
26
- output_dir = os.path.join(BASE_DIR, 'song_output')
27
-
28
-
29
- def get_youtube_video_id(url, ignore_playlist=True):
30
- """
31
- Extract the YouTube video ID from various URL formats.
32
-
33
- Examples:
34
- http://youtu.be/SA2iWivDJiE
35
- http://www.youtube.com/watch?v=_oPAwA_Udwc&feature=feedu
36
- http://www.youtube.com/embed/SA2iWivDJiE
37
- http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US
38
- """
39
- parsed_url = urlparse(url)
40
- hostname = parsed_url.hostname or ''
41
- path = parsed_url.path
42
-
43
- if hostname.lower() == 'youtu.be':
44
- return path.lstrip('/')
45
-
46
- if hostname.lower() in {'www.youtube.com', 'youtube.com', 'music.youtube.com'}:
47
- if not ignore_playlist:
48
- with suppress(KeyError):
49
- return parse_qs(parsed_url.query)['list'][0]
50
- if parsed_url.path == '/watch':
51
- return parse_qs(parsed_url.query).get('v', [None])[0]
52
- if parsed_url.path.startswith('/watch/'):
53
- return parsed_url.path.split('/')[1]
54
- if parsed_url.path.startswith('/embed/'):
55
- return parsed_url.path.split('/')[2]
56
- if parsed_url.path.startswith('/v/'):
57
- return parsed_url.path.split('/')[2]
58
-
59
- return None
60
-
61
-
62
- def yt_download(link):
63
- """
64
- Download the audio from a YouTube link as an mp3 file.
65
- """
66
- ydl_opts = {
67
- 'format': 'bestaudio',
68
- 'outtmpl': '%(title)s',
69
- 'nocheckcertificate': True,
70
- 'ignoreerrors': True,
71
- 'no_warnings': True,
72
- 'quiet': True,
73
- 'extractaudio': True,
74
- 'postprocessors': [{
75
- 'key': 'FFmpegExtractAudio',
76
- 'preferredcodec': 'mp3'
77
- }],
78
- }
79
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
80
- result = ydl.extract_info(link, download=True)
81
- download_path = ydl.prepare_filename(result, outtmpl='%(title)s.mp3')
82
- return download_path
83
-
84
-
85
- def display_progress(message, percent, is_webui, progress=None):
86
- """
87
- Display progress either via the provided progress callback or by printing.
88
- """
89
- if is_webui and progress is not None:
90
- progress(percent, desc=message)
91
- else:
92
- print(message)
93
-
94
-
95
- def raise_exception(error_msg, is_webui):
96
- """
97
- Raise an exception. If running in a web UI, use gr.Error.
98
- """
99
- if is_webui:
100
- raise gr.Error(error_msg)
101
- else:
102
- raise Exception(error_msg)
103
-
104
-
105
-
106
- def separation_uvr(filename, output):
107
- """
108
- Run the separation steps using different pre-trained models.
109
- Returns a tuple of four file paths:
110
- - vocals_no_reverb: The vocals after initial de-echo/de-reverb (used as intermediate vocals)
111
- - instrumental_path: The separated instrumental audio
112
- - main_vocals_dereverb: The lead vocals after final de-reverb processing
113
- - backup_vocals: The backup vocals extracted in the final stage
114
- """
115
- separator = Separator(output_dir=output)
116
- base_name = os.path.splitext(os.path.basename(filename))[0]
117
-
118
- instrumental_path = os.path.join(output, f'{base_name}_Instrumental.wav')
119
- initial_vocals = os.path.join(output, f'{base_name}_Vocals.wav')
120
- vocals_no_reverb = os.path.join(output, f'{base_name}_Vocals (No Reverb).wav')
121
- vocals_reverb = os.path.join(output, f'{base_name}_Vocals (Reverb).wav')
122
- main_vocals_dereverb = os.path.join(output, f'{base_name}_Vocals_Main_DeReverb.wav')
123
- backup_vocals = os.path.join(output, f'{base_name}_Vocals_Backup.wav')
124
-
125
- separator.load_model(model_filename='model_bs_roformer_ep_317_sdr_12.9755.ckpt')
126
- voc_inst = separator.separate(filename)
127
- os.rename(os.path.join(output, voc_inst[0]), instrumental_path)
128
- os.rename(os.path.join(output, voc_inst[1]), initial_vocals)
129
-
130
- separator.load_model(model_filename='UVR-DeEcho-DeReverb.pth')
131
- voc_no_reverb = separator.separate(initial_vocals)
132
- os.rename(os.path.join(output, voc_no_reverb[0]), vocals_no_reverb)
133
- os.rename(os.path.join(output, voc_no_reverb[1]), vocals_reverb)
134
-
135
- separator.load_model(model_filename='mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt')
136
- voc_split = separator.separate(vocals_no_reverb)
137
- os.rename(os.path.join(output, voc_split[0]), backup_vocals)
138
- os.rename(os.path.join(output, voc_split[1]), main_vocals_dereverb)
139
-
140
- if os.path.exists(vocals_reverb):
141
- os.remove(vocals_reverb)
142
-
143
- return vocals_no_reverb, instrumental_path, main_vocals_dereverb, backup_vocals
144
-
145
-
146
- def get_audio_paths(song_dir):
147
- """
148
- Search the given directory for expected audio files.
149
- Returns:
150
- orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
151
- """
152
- orig_song_path = None
153
- instrumentals_path = None
154
- main_vocals_dereverb_path = None
155
- backup_vocals_path = None
156
-
157
- for file in os.listdir(song_dir):
158
- if file.endswith('_Instrumental.wav'):
159
- instrumentals_path = os.path.join(song_dir, file)
160
- orig_song_path = instrumentals_path.replace('_Instrumental', '')
161
- elif file.endswith('_Vocals_Main_DeReverb.wav'):
162
- main_vocals_dereverb_path = os.path.join(song_dir, file)
163
- elif file.endswith('_Vocals_Backup.wav'):
164
- backup_vocals_path = os.path.join(song_dir, file)
165
-
166
- return orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
167
-
168
-
169
- def convert_to_stereo(audio_path):
170
- """
171
- Convert the given audio file to stereo (2 channels) if it is mono.
172
- """
173
- wave, sr = librosa.load(audio_path, mono=False, sr=44100)
174
- if wave.ndim == 1:
175
- stereo_path = f'{os.path.splitext(audio_path)[0]}_stereo.wav'
176
- command = shlex.split(f'ffmpeg -y -loglevel error -i "{audio_path}" -ac 2 -f wav "{stereo_path}"')
177
- subprocess.run(command, check=True)
178
- return stereo_path
179
- return audio_path
180
-
181
-
182
- def pitch_shift(audio_path, pitch_change):
183
- """
184
- Shift the pitch of the audio by the specified amount.
185
- """
186
- output_path = f'{os.path.splitext(audio_path)[0]}_p{pitch_change}.wav'
187
- if not os.path.exists(output_path):
188
- y, sr = sf.read(audio_path)
189
- tfm = sox.Transformer()
190
- tfm.pitch(pitch_change)
191
- y_shifted = tfm.build_array(input_array=y, sample_rate_in=sr)
192
- sf.write(output_path, y_shifted, sr)
193
- return output_path
194
-
195
-
196
- def get_hash(filepath):
197
- """
198
- Calculate a short BLAKE2b hash for the given file.
199
- """
200
- with open(filepath, 'rb') as f:
201
- file_hash = hashlib.blake2b()
202
- while chunk := f.read(8192):
203
- file_hash.update(chunk)
204
- return file_hash.hexdigest()[:11]
205
-
206
-
207
- def preprocess_song(song_input, song_id, is_webui, input_type, progress):
208
- """
209
- Preprocess the input song:
210
- - Download if YouTube URL.
211
- - Convert to stereo.
212
- - Separate vocals and instrumentals.
213
- Returns a tuple with six values matching the expected unpacking in the pipeline.
214
- """
215
- if input_type == 'yt':
216
- display_progress('[~] Downloading song...', 0, is_webui, progress)
217
- song_link = song_input.split('&')[0]
218
- orig_song_path = yt_download(song_link)
219
- elif input_type == 'local':
220
- orig_song_path = song_input
221
- else:
222
- orig_song_path = None
223
-
224
- song_output_dir = os.path.join(output_dir, song_id)
225
- if not os.path.exists(song_output_dir):
226
- os.makedirs(song_output_dir)
227
-
228
- orig_song_path = convert_to_stereo(orig_song_path)
229
-
230
- display_progress('[~] Separating Vocals from Instrumental...', 0.1, is_webui, progress)
231
- vocals_no_reverb, instrumental_path, main_vocals_dereverb, backup_vocals = separation_uvr(orig_song_path, song_output_dir)
232
- return orig_song_path, vocals_no_reverb, instrumental_path, main_vocals_dereverb, backup_vocals, main_vocals_dereverb
233
-
234
-
235
- def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method,
236
- index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui):
237
- """
238
- Convert the input vocals using the specified RVC model.
239
- """
240
-
241
- inferred_audio = infer_audio(
242
- MODEL_NAME=voice_model,
243
- SOUND_PATH=vocals_path,
244
- F0_CHANGE=pitch_change,
245
- F0_METHOD=f0_method,
246
- CREPE_HOP_LENGTH=crepe_hop_length,
247
- INDEX_RATE=index_rate,
248
- FILTER_RADIUS=filter_radius,
249
- RMS_MIX_RATE=rms_mix_rate,
250
- PROTECT=protect,
251
- )
252
- gc.collect()
253
-
254
-
255
- def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping):
256
- """
257
- Apply a chain of audio effects (highpass, compression, reverb) to the input audio.
258
- """
259
- output_path = f'{os.path.splitext(audio_path)[0]}_mixed.wav'
260
- board = Pedalboard([
261
- HighpassFilter(),
262
- Compressor(ratio=4, threshold_db=-15),
263
- Reverb(room_size=reverb_rm_size, dry_level=reverb_dry, wet_level=reverb_wet, damping=reverb_damping)
264
- ])
265
-
266
- with AudioFile(audio_path) as f:
267
- with AudioFile(output_path, 'w', f.samplerate, f.num_channels) as o:
268
- while f.tell() < f.frames:
269
- chunk = f.read(int(f.samplerate))
270
- effected = board(chunk, f.samplerate, reset=False)
271
- o.write(effected)
272
- return output_path
273
-
274
-
275
- def combine_audio(audio_paths, output_path, main_gain, backup_gain, inst_gain, output_format):
276
- """
277
- Combine main vocals, backup vocals, and instrumental audio into a final mix.
278
- """
279
- main_vocal_audio = AudioSegment.from_wav(audio_paths[0]) - 4 + main_gain
280
- backup_vocal_audio = AudioSegment.from_wav(audio_paths[1]) - 6 + backup_gain
281
- instrumental_audio = AudioSegment.from_wav(audio_paths[2]) - 7 + inst_gain
282
- final_audio = main_vocal_audio.overlay(backup_vocal_audio).overlay(instrumental_audio)
283
- final_audio.export(output_path, format=output_format)
284
-
285
-
286
- def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
287
- is_webui=0, main_gain=0, backup_gain=0, inst_gain=0, index_rate=0.5, filter_radius=3,
288
- rms_mix_rate=0.25, f0_method='rmvpe', crepe_hop_length=128, protect=0.33, pitch_change_all=0,
289
- reverb_rm_size=0.15, reverb_wet=0.2, reverb_dry=0.8, reverb_damping=0.7, output_format='mp3',
290
- progress=gr.Progress()):
291
- """
292
- Main pipeline that orchestrates the AI cover song generation.
293
- """
294
- try:
295
- if not song_input or not voice_model:
296
- raise_exception('Ensure that the song input field and voice model field is filled.', is_webui)
297
-
298
- display_progress('[~] Starting AI Cover Generation Pipeline...', 0, is_webui, progress)
299
-
300
- if urlparse(song_input).scheme == 'https':
301
- input_type = 'yt'
302
- song_id = get_youtube_video_id(song_input)
303
- if song_id is None:
304
- raise_exception('Invalid YouTube url.', is_webui)
305
- else:
306
- input_type = 'local'
307
- song_input = song_input.strip('\"')
308
- if os.path.exists(song_input):
309
- song_id = get_hash(song_input)
310
- else:
311
- raise_exception(f'{song_input} does not exist.', is_webui)
312
-
313
- song_dir = os.path.join(output_dir, song_id)
314
-
315
- if not os.path.exists(song_dir):
316
- os.makedirs(song_dir)
317
- (orig_song_path, vocals_path, instrumentals_path,
318
- main_vocals_path, backup_vocals_path, main_vocals_dereverb_path) = preprocess_song(
319
- song_input, song_id, is_webui, input_type, progress
320
- )
321
- else:
322
- vocals_path, main_vocals_path = None, None
323
- paths = get_audio_paths(song_dir)
324
- if any(path is None for path in paths) or keep_files:
325
- (orig_song_path, vocals_path, instrumentals_path,
326
- main_vocals_path, backup_vocals_path, main_vocals_dereverb_path) = preprocess_song(
327
- song_input, song_id, is_webui, input_type, progress
328
- )
329
- else:
330
- orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path = paths
331
- main_vocals_path = main_vocals_dereverb_path
332
-
333
- pitch_change += pitch_change_all
334
-
335
- base_song_name = os.path.splitext(os.path.basename(orig_song_path))[0]
336
- algo_suffix = f"_{crepe_hop_length}" if f0_method == "mangio-crepe" else ""
337
- ai_vocals_path = os.path.join(
338
- song_dir,
339
- f'{base_song_name}_lead_{voice_model}_p{pitch_change}_i{index_rate}_fr{filter_radius}_'
340
- f'rms{rms_mix_rate}_pro{protect}_{f0_method}{algo_suffix}.wav'
341
- )
342
-
343
- ai_cover_path = os.path.join(song_dir, f'{base_song_name} ({voice_model} Ver).{output_format}')
344
-
345
- if not os.path.exists(ai_vocals_path):
346
- display_progress('[~] Converting voice using RVC...', 0.5, is_webui, progress)
347
- voice_change(voice_model, main_vocals_dereverb_path, ai_vocals_path, pitch_change,
348
- f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui)
349
-
350
-
351
- display_progress('[~] Applying audio effects to Vocals...', 0.8, is_webui, progress)
352
- ai_vocals_mixed_path = add_audio_effects(ai_vocals_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping)
353
-
354
- if pitch_change_all != 0:
355
- display_progress('[~] Applying overall pitch change', 0.85, is_webui, progress)
356
- instrumentals_path = pitch_shift(instrumentals_path, pitch_change_all)
357
-
358
- display_progress('[~] Combining AI Vocals and Instrumentals...', 0.9, is_webui, progress)
359
- combine_audio([ai_vocals_mixed_path, backup_vocals_path, instrumentals_path],
360
- ai_cover_path, main_gain, backup_gain, inst_gain, output_format)
361
-
362
- if not keep_files:
363
- display_progress('[~] Removing intermediate audio files...', 0.95, is_webui, progress)
364
- intermediate_files = [vocals_path, main_vocals_path, ai_vocals_mixed_path]
365
- if pitch_change_all != 0:
366
- intermediate_files += [instrumentals_path, backup_vocals_path]
367
- for file in intermediate_files:
368
- if file and os.path.exists(file):
369
- os.remove(file)
370
-
371
- return ai_cover_path
372
-
373
- except Exception as e:
374
- raise_exception(str(e), is_webui)
375
-
376
-
377
- if __name__ == '__main__':
378
- parser = argparse.ArgumentParser(
379
- description='AICoverGen: Mod.',
380
- add_help=True
381
- )
382
- parser.add_argument('-i', '--song-input', type=str, required=True,
383
- help='Link to a YouTube video or the filepath to a local mp3/wav file to create an AI cover of')
384
- parser.add_argument('-dir', '--rvc-dirname', type=str, required=True,
385
- help='Name of the folder in the rvc_models directory containing the RVC model file and optional index file to use')
386
- parser.add_argument('-p', '--pitch-change', type=int, required=True,
387
- help='Change the pitch of AI Vocals only. Generally, use 1 for male to female and -1 for vice-versa. (Octaves)')
388
- parser.add_argument('-k', '--keep-files', action=argparse.BooleanOptionalAction,
389
- help='Whether to keep all intermediate audio files generated in the song_output/id directory, e.g. Isolated Vocals/Instrumentals')
390
- parser.add_argument('-ir', '--index-rate', type=float, default=0.5,
391
- help='A decimal number e.g. 0.5, used to reduce/resolve the timbre leakage problem. If set to 1, more biased towards the timbre quality of the training dataset')
392
- parser.add_argument('-fr', '--filter-radius', type=int, default=3,
393
- help='A number between 0 and 7. If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness.')
394
- parser.add_argument('-rms', '--rms-mix-rate', type=float, default=0.25,
395
- help="A decimal number e.g. 0.25. Control how much to use the original vocal's loudness (0) or a fixed loudness (1).")
396
- parser.add_argument('-palgo', '--pitch-detection-algo', type=str, default='rmvpe',
397
- help='Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals).')
398
- parser.add_argument('-hop', '--crepe-hop-length', type=int, default=128,
399
- help='If pitch detection algo is mangio-crepe, controls how often it checks for pitch changes in milliseconds. Recommended: 128.')
400
- parser.add_argument('-pro', '--protect', type=float, default=0.33,
401
- help='A decimal number e.g. 0.33. Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music.')
402
- parser.add_argument('-mv', '--main-vol', type=int, default=0,
403
- help='Volume change for AI main vocals in decibels. Use -3 to decrease by 3 dB and 3 to increase by 3 dB')
404
- parser.add_argument('-bv', '--backup-vol', type=int, default=0,
405
- help='Volume change for backup vocals in decibels')
406
- parser.add_argument('-iv', '--inst-vol', type=int, default=0,
407
- help='Volume change for instrumentals in decibels')
408
- parser.add_argument('-pall', '--pitch-change-all', type=int, default=0,
409
- help='Change the pitch/key of vocals and instrumentals. Changing this slightly reduces sound quality')
410
- parser.add_argument('-rsize', '--reverb-size', type=float, default=0.15,
411
- help='Reverb room size between 0 and 1')
412
- parser.add_argument('-rwet', '--reverb-wetness', type=float, default=0.2,
413
- help='Reverb wet level between 0 and 1')
414
- parser.add_argument('-rdry', '--reverb-dryness', type=float, default=0.8,
415
- help='Reverb dry level between 0 and 1')
416
- parser.add_argument('-rdamp', '--reverb-damping', type=float, default=0.7,
417
- help='Reverb damping between 0 and 1')
418
- parser.add_argument('-oformat', '--output-format', type=str, default='mp3',
419
- help='Output format of audio file. mp3 for smaller file size, wav for best quality')
420
- args = parser.parse_args()
421
-
422
- rvc_dir = os.path.join(rvc_models_dir, args.rvc_dirname)
423
- if not os.path.exists(rvc_dir):
424
- raise Exception(f'The folder {rvc_dir} does not exist.')
425
-
426
- cover_path, cover_with_backing = song_cover_pipeline(
427
- args.song_input, args.rvc_dirname, args.pitch_change, args.keep_files,
428
- main_gain=args.main_vol, backup_gain=args.backup_vol, inst_gain=args.inst_vol,
429
- index_rate=args.index_rate, filter_radius=args.filter_radius,
430
- rms_mix_rate=args.rms_mix_rate, f0_method=args.pitch_detection_algo,
431
- crepe_hop_length=args.crepe_hop_length, protect=args.protect,
432
- pitch_change_all=args.pitch_change_all,
433
- reverb_rm_size=args.reverb_size, reverb_wet=args.reverb_wetness,
434
- reverb_dry=args.reverb_dryness, reverb_damping=args.reverb_damping,
435
- output_format=args.output_format
436
- )
437
- print(f'[+] Cover generated at {cover_path}')