uvr5
This commit is contained in:
@@ -9,7 +9,7 @@ from tqdm import tqdm
|
|||||||
|
|
||||||
cpu = torch.device("cpu")
|
cpu = torch.device("cpu")
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger: logging.Logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ConvTDFNetTrim:
|
class ConvTDFNetTrim:
|
||||||
@@ -158,7 +158,7 @@ class Predictor:
|
|||||||
mix_waves = []
|
mix_waves = []
|
||||||
i = 0
|
i = 0
|
||||||
while i < n_sample + pad:
|
while i < n_sample + pad:
|
||||||
waves = np.array(mix_p[:, i: i + model.chunk_size])
|
waves = np.array(mix_p[:, i : i + model.chunk_size])
|
||||||
mix_waves.append(waves)
|
mix_waves.append(waves)
|
||||||
i += gen_size
|
i += gen_size
|
||||||
mix_waves = torch.tensor(mix_waves, dtype=torch.float32).to(cpu)
|
mix_waves = torch.tensor(mix_waves, dtype=torch.float32).to(cpu)
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from rvc.configs.config import Config
|
|||||||
from rvc.modules.uvr5.mdxnet import MDXNetDereverb
|
from rvc.modules.uvr5.mdxnet import MDXNetDereverb
|
||||||
from rvc.modules.uvr5.vr import AudioPre, AudioPreDeEcho
|
from rvc.modules.uvr5.vr import AudioPre, AudioPreDeEcho
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger: logging.Logger = logging.getLogger(__name__)
|
||||||
config = Config()
|
config = Config()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ class AudioPre:
|
|||||||
"agg": agg,
|
"agg": agg,
|
||||||
"high_end_process": "mirroring",
|
"high_end_process": "mirroring",
|
||||||
}
|
}
|
||||||
mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
|
mp = ModelParameters("rvc/lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
|
||||||
model = Nets.CascadedASPPNet(mp.param["bins"] * 2)
|
model = Nets.CascadedASPPNet(mp.param["bins"] * 2)
|
||||||
cpk = torch.load(model_path, map_location="cpu")
|
cpk = torch.load(model_path, map_location="cpu")
|
||||||
model.load_state_dict(cpk)
|
model.load_state_dict(cpk)
|
||||||
@@ -43,23 +43,24 @@ class AudioPre:
|
|||||||
def _path_audio_(
|
def _path_audio_(
|
||||||
self, music_file, ins_root=None, vocal_root=None, format="flac", is_hp3=False
|
self, music_file, ins_root=None, vocal_root=None, format="flac", is_hp3=False
|
||||||
):
|
):
|
||||||
if ins_root is None and vocal_root is None:
|
|
||||||
return "No save root."
|
|
||||||
name = os.path.basename(music_file)
|
name = os.path.basename(music_file)
|
||||||
if ins_root is not None:
|
if (ins_root and vocal_root) is None:
|
||||||
|
return "No save root."
|
||||||
|
else:
|
||||||
os.makedirs(ins_root, exist_ok=True)
|
os.makedirs(ins_root, exist_ok=True)
|
||||||
if vocal_root is not None:
|
|
||||||
os.makedirs(vocal_root, exist_ok=True)
|
os.makedirs(vocal_root, exist_ok=True)
|
||||||
|
|
||||||
X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
|
X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
|
||||||
bands_n = len(self.mp.param["band"])
|
bands_n = len(self.mp.param["band"])
|
||||||
# print(bands_n)
|
# print(bands_n)
|
||||||
for d in range(bands_n, 0, -1):
|
for d in range(bands_n, 0, -1):
|
||||||
bp = self.mp.param["band"][d]
|
bp = self.mp.param["band"][d]
|
||||||
if d == bands_n: # high-end band
|
if d == bands_n: # high-end band
|
||||||
|
# librosa loading may be buggy for some audio. ffmpeg will solve this, but it's a pain
|
||||||
(
|
(
|
||||||
X_wave[d],
|
X_wave[d],
|
||||||
_,
|
_,
|
||||||
) = librosa.core.load( # 理论上librosa读取可能对某些音频有bug,应该上ffmpeg读取,但是太麻烦了弃坑
|
) = librosa.core.load(
|
||||||
music_file,
|
music_file,
|
||||||
bp["sr"],
|
bp["sr"],
|
||||||
False,
|
False,
|
||||||
@@ -121,7 +122,7 @@ class AudioPre:
|
|||||||
else:
|
else:
|
||||||
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
|
wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
|
||||||
logger.info("%s instruments done" % name)
|
logger.info("%s instruments done" % name)
|
||||||
if is_hp3 == True:
|
if is_hp3:
|
||||||
head = "vocal_"
|
head = "vocal_"
|
||||||
else:
|
else:
|
||||||
head = "instrument_"
|
head = "instrument_"
|
||||||
@@ -129,15 +130,13 @@ class AudioPre:
|
|||||||
sf.write(
|
sf.write(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
ins_root,
|
ins_root,
|
||||||
head + "{}_{}.{}".format(name, self.data["agg"], format),
|
head + f"{name}_{self.data['agg']}.{format}",
|
||||||
),
|
),
|
||||||
(np.array(wav_instrument) * 32768).astype("int16"),
|
(np.array(wav_instrument) * 32768).astype("int16"),
|
||||||
self.mp.param["sr"],
|
self.mp.param["sr"],
|
||||||
) #
|
) #
|
||||||
else:
|
else:
|
||||||
path = os.path.join(
|
path = os.path.join(ins_root, head + f"{name}_{self.data['agg']}.wav")
|
||||||
ins_root, head + "{}_{}.wav".format(name, self.data["agg"])
|
|
||||||
)
|
|
||||||
sf.write(
|
sf.write(
|
||||||
path,
|
path,
|
||||||
(np.array(wav_instrument) * 32768).astype("int16"),
|
(np.array(wav_instrument) * 32768).astype("int16"),
|
||||||
@@ -149,13 +148,10 @@ class AudioPre:
|
|||||||
if os.path.exists(opt_format_path):
|
if os.path.exists(opt_format_path):
|
||||||
try:
|
try:
|
||||||
os.remove(path)
|
os.remove(path)
|
||||||
except:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if vocal_root is not None:
|
if vocal_root is not None:
|
||||||
if is_hp3 == True:
|
head = "instrument_" if is_hp3 else "vocal_"
|
||||||
head = "instrument_"
|
|
||||||
else:
|
|
||||||
head = "vocal_"
|
|
||||||
if self.data["high_end_process"].startswith("mirroring"):
|
if self.data["high_end_process"].startswith("mirroring"):
|
||||||
input_high_end_ = spec_utils.mirroring(
|
input_high_end_ = spec_utils.mirroring(
|
||||||
self.data["high_end_process"], v_spec_m, input_high_end, self.mp
|
self.data["high_end_process"], v_spec_m, input_high_end, self.mp
|
||||||
@@ -165,28 +161,26 @@ class AudioPre:
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
|
wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
|
||||||
logger.info("%s vocals done" % name)
|
logger.info(f"{name} vocals done")
|
||||||
if format in ["wav", "flac"]:
|
if format in ["wav", "flac"]:
|
||||||
sf.write(
|
sf.write(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
vocal_root,
|
vocal_root,
|
||||||
head + "{}_{}.{}".format(name, self.data["agg"], format),
|
head + f"{name}_{self.data['agg']}.{format}",
|
||||||
),
|
),
|
||||||
(np.array(wav_vocals) * 32768).astype("int16"),
|
(np.array(wav_vocals) * 32768).astype("int16"),
|
||||||
self.mp.param["sr"],
|
self.mp.param["sr"],
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
path = os.path.join(
|
path = os.path.join(vocal_root, head + f"{name}_{self.data['agg']}.wav")
|
||||||
vocal_root, head + "{}_{}.wav".format(name, self.data["agg"])
|
|
||||||
)
|
|
||||||
sf.write(
|
sf.write(
|
||||||
path,
|
path,
|
||||||
(np.array(wav_vocals) * 32768).astype("int16"),
|
(np.array(wav_vocals) * 32768).astype("int16"),
|
||||||
self.mp.param["sr"],
|
self.mp.param["sr"],
|
||||||
)
|
)
|
||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
opt_format_path = path[:-4] + ".%s" % format
|
opt_format_path = path[:-4] + f".{format}"
|
||||||
os.system("ffmpeg -i %s -vn %s -q:a 2 -y" % (path, opt_format_path))
|
os.system(f"ffmpeg -i {path} -vn {opt_format_path} -q:a 2 -y")
|
||||||
if os.path.exists(opt_format_path):
|
if os.path.exists(opt_format_path):
|
||||||
try:
|
try:
|
||||||
os.remove(path)
|
os.remove(path)
|
||||||
@@ -207,7 +201,7 @@ class AudioPreDeEcho:
|
|||||||
"agg": agg,
|
"agg": agg,
|
||||||
"high_end_process": "mirroring",
|
"high_end_process": "mirroring",
|
||||||
}
|
}
|
||||||
mp = ModelParameters("infer/lib/uvr5_pack/lib_v5/modelparams/4band_v3.json")
|
mp = ModelParameters("rvc/lib/uvr5_pack/lib_v5/modelparams/4band_v3.json")
|
||||||
nout = 64 if "DeReverb" in model_path else 48
|
nout = 64 if "DeReverb" in model_path else 48
|
||||||
model = CascadedNet(mp.param["bins"] * 2, nout)
|
model = CascadedNet(mp.param["bins"] * 2, nout)
|
||||||
cpk = torch.load(model_path, map_location="cpu")
|
cpk = torch.load(model_path, map_location="cpu")
|
||||||
@@ -224,23 +218,24 @@ class AudioPreDeEcho:
|
|||||||
def _path_audio_(
|
def _path_audio_(
|
||||||
self, music_file, vocal_root=None, ins_root=None, format="flac", is_hp3=False
|
self, music_file, vocal_root=None, ins_root=None, format="flac", is_hp3=False
|
||||||
): # 3个VR模型vocal和ins是反的
|
): # 3个VR模型vocal和ins是反的
|
||||||
if ins_root is None and vocal_root is None:
|
|
||||||
return "No save root."
|
|
||||||
name = os.path.basename(music_file)
|
name = os.path.basename(music_file)
|
||||||
if ins_root is not None:
|
if (ins_root and vocal_root) is None:
|
||||||
|
return "No save root."
|
||||||
|
else:
|
||||||
os.makedirs(ins_root, exist_ok=True)
|
os.makedirs(ins_root, exist_ok=True)
|
||||||
if vocal_root is not None:
|
|
||||||
os.makedirs(vocal_root, exist_ok=True)
|
os.makedirs(vocal_root, exist_ok=True)
|
||||||
|
|
||||||
X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
|
X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
|
||||||
bands_n = len(self.mp.param["band"])
|
bands_n = len(self.mp.param["band"])
|
||||||
# print(bands_n)
|
# print(bands_n)
|
||||||
for d in range(bands_n, 0, -1):
|
for d in range(bands_n, 0, -1):
|
||||||
bp = self.mp.param["band"][d]
|
bp = self.mp.param["band"][d]
|
||||||
if d == bands_n: # high-end band
|
if d == bands_n: # high-end band
|
||||||
|
# librosa loading may be buggy for some audio. ffmpeg will solve this, but it's a pain
|
||||||
(
|
(
|
||||||
X_wave[d],
|
X_wave[d],
|
||||||
_,
|
_,
|
||||||
) = librosa.core.load( # 理论上librosa读取可能对某些音频有bug,应该上ffmpeg读取,但是太麻烦了弃坑
|
) = librosa.core.load(
|
||||||
music_file,
|
music_file,
|
||||||
bp["sr"],
|
bp["sr"],
|
||||||
False,
|
False,
|
||||||
|
|||||||
Reference in New Issue
Block a user