Files
Retrieval-based-Voice-Conve…/rvc/modules/uvr5/modules.py
Ftps 24a322e8a7 reconstruct uvr
· ffmpeg processing moved to soundfile, pydub
· Optimization of for statement
· Change in file path acquisition method
·ffmpegの処理をsoundfile, pydubに移行
·for文の最適化
·pathのからの取得方法の変更
2023-12-08 15:45:31 +09:00

94 lines
2.9 KiB
Python

import logging
import os
import traceback
from glob import glob
from pathlib import Path
import soundfile as sf
import torch
from pydub import AudioSegment
from rvc.configs.config import Config
from rvc.modules.uvr5.mdxnet import MDXNetDereverb
from rvc.modules.uvr5.vr import AudioPre, AudioPreDeEcho
logger: logging.Logger = logging.getLogger(__name__)
class UVR:
def __init__(self):
self.need_reformat: bool = True
self.config: Config = Config()
def uvr_wrapper(
self,
audio_path: Path,
save_vocal_path: Path | None = None,
save_ins_path: Path | None = None,
agg: int = 10,
export_format: str = "flac",
model_name: str | None = None,
temp_path: Path | None = None,
):
infos = []
save_vocal_path = (
os.getenv("save_uvr_path") if not save_vocal_path else save_vocal_path
)
save_ins_path = (
os.getenv("save_uvr_path") if not save_ins_path else save_ins_path
)
if model_name is None:
model_name = os.path.basename(glob(f"{os.getenv('weight_uvr5_root')}/*")[0])
is_hp3 = "HP3" in model_name
if model_name == "onnx_dereverb_By_FoxJoy":
pre_fun = MDXNetDereverb(15, self.config.device)
else:
func = AudioPre if "DeEcho" not in model_name else AudioPreDeEcho
pre_fun = func(
agg=int(agg),
model_path=os.path.join(
os.getenv("weight_uvr5_root"), model_name # + ".pth"
),
device=self.config.device,
is_half=self.config.is_half,
)
process_paths = (
[
_
for _ in glob(f"{audio_path}/*")
if os.path.splitext(_)[-1][1:].upper() in sf.available_formats()
]
if os.path.isdir(audio_path)
else audio_path
)
for process_path in [process_paths]:
print(f"path: {process_path}")
info = sf.info(process_path)
if not (info.channels == 2 and info.samplerate == "44100"):
tmp_path = os.path.join(
temp_path or os.environ.get("TEMP"), os.path.basename(process_path)
)
AudioSegment.from_file(process_path).export(
tmp_path,
format="wav",
codec="pcm_s16le",
bitrate="16k",
parameters=["-ar", "44100"],
)
pre_fun._path_audio_(
process_path,
save_vocal_path,
save_ins_path,
export_format,
is_hp3=is_hp3,
)
infos.append(f"{os.path.basename(process_path)}->Success" )
yield "\n".join(infos)
if torch.cuda.is_available():
torch.cuda.empty_cache()