diff --git a/rvc/configs/config.py b/rvc/configs/config.py new file mode 100644 index 0000000..8cd6012 --- /dev/null +++ b/rvc/configs/config.py @@ -0,0 +1,251 @@ +import argparse +import os +import sys +import json +from multiprocessing import cpu_count + +import torch + +try: + import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import + + if torch.xpu.is_available(): + from rvc.lib.ipex import ipex_init + + ipex_init() +except Exception: # pylint: disable=broad-exception-caught + pass +import logging + +logger = logging.getLogger(__name__) + + +version_config_list = [ + "v1/32k.json", + "v1/40k.json", + "v1/48k.json", + "v2/48k.json", + "v2/32k.json", +] + + +def singleton_variable(func): + def wrapper(*args, **kwargs): + if not wrapper.instance: + wrapper.instance = func(*args, **kwargs) + return wrapper.instance + + wrapper.instance = None + return wrapper + + +@singleton_variable +class Config: + def __init__(self): + self.device = "cuda:0" + self.is_half = True + self.use_jit = False + self.n_cpu = 0 + self.gpu_name = None + self.json_config = self.load_config_json() + self.gpu_mem = None + ( + self.python_cmd, + self.listen_port, + self.iscolab, + self.noparallel, + self.noautoopen, + self.dml, + ) = self.arg_parse() + self.instead = "" + self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() + + @staticmethod + def load_config_json() -> dict: + d = {} + for config_file in version_config_list: + with open(f"configs/{config_file}", "r") as f: + d[config_file] = json.load(f) + return d + + @staticmethod + def arg_parse() -> tuple: + exe = sys.executable or "python" + parser = argparse.ArgumentParser() + parser.add_argument("--port", type=int, default=7865, help="Listen port") + parser.add_argument("--pycmd", type=str, default=exe, help="Python command") + parser.add_argument("--colab", action="store_true", help="Launch in colab") + parser.add_argument( + "--noparallel", action="store_true", help="Disable parallel processing" + ) + parser.add_argument( + "--noautoopen", + action="store_true", + help="Do not open in browser automatically", + ) + parser.add_argument( + "--dml", + action="store_true", + help="torch_dml", + ) + cmd_opts = parser.parse_args() + + cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865 + + return ( + cmd_opts.pycmd, + cmd_opts.port, + cmd_opts.colab, + cmd_opts.noparallel, + cmd_opts.noautoopen, + cmd_opts.dml, + ) + + # has_mps is only available in nightly pytorch (for now) and MasOS 12.3+. + # check `getattr` and try it for compatibility + @staticmethod + def has_mps() -> bool: + if not torch.backends.mps.is_available(): + return False + try: + torch.zeros(1).to(torch.device("mps")) + return True + except Exception: + return False + + @staticmethod + def has_xpu() -> bool: + if hasattr(torch, "xpu") and torch.xpu.is_available(): + return True + else: + return False + + def use_fp32_config(self): + for config_file in version_config_list: + self.json_config[config_file]["train"]["fp16_run"] = False + with open(f"configs/{config_file}", "r") as f: + strr = f.read().replace("true", "false") + with open(f"configs/{config_file}", "w") as f: + f.write(strr) + with open("infer/modules/train/preprocess.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("infer/modules/train/preprocess.py", "w") as f: + f.write(strr) + print("overwrite preprocess and configs.json") + + def device_config(self) -> tuple: + if torch.cuda.is_available(): + if self.has_xpu(): + self.device = self.instead = "xpu:0" + self.is_half = True + i_device = int(self.device.split(":")[-1]) + self.gpu_name = torch.cuda.get_device_name(i_device) + if ( + ("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) + or "P40" in self.gpu_name.upper() + or "P10" in self.gpu_name.upper() + or "1060" in self.gpu_name + or "1070" in self.gpu_name + or "1080" in self.gpu_name + ): + logger.info("Found GPU %s, force to fp32", self.gpu_name) + self.is_half = False + self.use_fp32_config() + else: + logger.info("Found GPU %s", self.gpu_name) + self.gpu_mem = int( + torch.cuda.get_device_properties(i_device).total_memory + / 1024 + / 1024 + / 1024 + + 0.4 + ) + if self.gpu_mem <= 4: + with open("infer/modules/train/preprocess.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("infer/modules/train/preprocess.py", "w") as f: + f.write(strr) + elif self.has_mps(): + logger.info("No supported Nvidia GPU found") + self.device = self.instead = "mps" + self.is_half = False + self.use_fp32_config() + else: + logger.info("No supported Nvidia GPU found") + self.device = self.instead = "cpu" + self.is_half = False + self.use_fp32_config() + + if self.n_cpu == 0: + self.n_cpu = cpu_count() + + if self.is_half: + # 6G显存配置 + x_pad = 3 + x_query = 10 + x_center = 60 + x_max = 65 + else: + # 5G显存配置 + x_pad = 1 + x_query = 6 + x_center = 38 + x_max = 41 + + if self.gpu_mem is not None and self.gpu_mem <= 4: + x_pad = 1 + x_query = 5 + x_center = 30 + x_max = 32 + if self.dml: + logger.info("Use DirectML instead") + if ( + os.path.exists( + "runtime\Lib\site-packages\onnxruntime\capi\DirectML.dll" + ) + == False + ): + try: + os.rename( + "runtime\Lib\site-packages\onnxruntime", + "runtime\Lib\site-packages\onnxruntime-cuda", + ) + except: + pass + try: + os.rename( + "runtime\Lib\site-packages\onnxruntime-dml", + "runtime\Lib\site-packages\onnxruntime", + ) + except: + pass + # if self.device != "cpu": + import torch_directml + + self.device = torch_directml.device(torch_directml.default_device()) + self.is_half = False + else: + if self.instead: + logger.info(f"Use {self.instead} instead") + if ( + os.path.exists( + "runtime\Lib\site-packages\onnxruntime\capi\onnxruntime_providers_cuda.dll" + ) + == False + ): + try: + os.rename( + "runtime\Lib\site-packages\onnxruntime", + "runtime\Lib\site-packages\onnxruntime-dml", + ) + except: + pass + try: + os.rename( + "runtime\Lib\site-packages\onnxruntime-cuda", + "runtie\Lib\site-packages\onnxruntime", + ) + except: + pass + print("is_half:%s, device:%s" % (self.is_half, self.device)) + return x_pad, x_query, x_center, x_max diff --git a/rvc/configs/v1/32k.json b/rvc/configs/v1/32k.json new file mode 100644 index 0000000..d5f16d6 --- /dev/null +++ b/rvc/configs/v1/32k.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 12800, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 32000, + "filter_length": 1024, + "hop_length": 320, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,4,2,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16,16,4,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/rvc/configs/v1/40k.json b/rvc/configs/v1/40k.json new file mode 100644 index 0000000..4ffc87b --- /dev/null +++ b/rvc/configs/v1/40k.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 12800, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 40000, + "filter_length": 2048, + "hop_length": 400, + "win_length": 2048, + "n_mel_channels": 125, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,10,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16,16,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/rvc/configs/v1/48k.json b/rvc/configs/v1/48k.json new file mode 100644 index 0000000..2d0e05b --- /dev/null +++ b/rvc/configs/v1/48k.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 11520, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 48000, + "filter_length": 2048, + "hop_length": 480, + "win_length": 2048, + "n_mel_channels": 128, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,6,2,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16,16,4,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/rvc/configs/v2/32k.json b/rvc/configs/v2/32k.json new file mode 100644 index 0000000..70e534f --- /dev/null +++ b/rvc/configs/v2/32k.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 12800, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 32000, + "filter_length": 1024, + "hop_length": 320, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,8,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [20,16,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/rvc/configs/v2/48k.json b/rvc/configs/v2/48k.json new file mode 100644 index 0000000..75f770c --- /dev/null +++ b/rvc/configs/v2/48k.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 17280, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 48000, + "filter_length": 2048, + "hop_length": 480, + "win_length": 2048, + "n_mel_channels": 128, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [12,10,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [24,20,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +}