diff --git a/README.md b/README.md index 1950cd6..ddc808e 100644 --- a/README.md +++ b/README.md @@ -19,3 +19,29 @@ An easy-to-use Voice Conversion framework based on VITS.

> [!NOTE] > Currently under development... Provided as a library and API in rvc + +## Installation and usage + +### CLI Usage + +#### Inference Audio + +```sh +rvc infer -m {model.pth} -i {input.wav} -o {output.wav} +``` + +| option | type | default value | description | require | +|---------------|--------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------| +| modelPath | Path | | Model path or filename (reads in the directory set in env) | * | +| inputPath | Path | | Input audio path or folder | * | +| outputPath | Path | | Output audio path or folder | * | +| sid | int | 0 | Speaker/Singer ID | | +| f0_up_key | int | 0 | Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12) | | +| f0_method | str | rmvpe | pitch extraction algorithm (pm, harvest, crepe, rmvpe | | +| f0_file | Path \| None | None | F0 curve file (optional). One pitch per line. Replaces the default F0 and pitch modulation | | +| index_file | Path \| None | None | Path to the feature index file | | +| index_rate | float | 0.75 | Search feature ratio (controls accent strength, too high has artifacting) | | +| filter_radius | int | 3 | If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness | | +| resample_sr | int | 0 | Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling | | +| rms_mix_rate | float | 0.25 | Adjust the volume envelope scaling. Closer to 0, the more it mimicks the volume of the original vocals. Can help mask noise and make volume sound more natural when set relatively low. Closer to 1 will be more of a consistently loud volume | | +| protect | float | 0.33 | Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy | | \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 760f1a1..104a836 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1257,6 +1257,26 @@ files = [ [package.dependencies] numpy = ">=1.7.0" +[[package]] +name = "protobuf" +version = "4.25.2" +description = "" +optional = false +python-versions = ">=3.8" +files = [ + {file = "protobuf-4.25.2-cp310-abi3-win32.whl", hash = "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6"}, + {file = "protobuf-4.25.2-cp310-abi3-win_amd64.whl", hash = "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9"}, + {file = "protobuf-4.25.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d"}, + {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62"}, + {file = "protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020"}, + {file = "protobuf-4.25.2-cp38-cp38-win32.whl", hash = "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61"}, + {file = "protobuf-4.25.2-cp38-cp38-win_amd64.whl", hash = "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62"}, + {file = "protobuf-4.25.2-cp39-cp39-win32.whl", hash = "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3"}, + {file = "protobuf-4.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0"}, + {file = "protobuf-4.25.2-py3-none-any.whl", hash = "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830"}, + {file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"}, +] + [[package]] name = "pycparser" version = "2.21" @@ -1748,6 +1768,22 @@ files = [ [package.extras] widechars = ["wcwidth"] +[[package]] +name = "tensorboardx" +version = "2.6.2.2" +description = "TensorBoardX lets you watch Tensors Flow without Tensorflow" +optional = false +python-versions = "*" +files = [ + {file = "tensorboardX-2.6.2.2-py2.py3-none-any.whl", hash = "sha256:160025acbf759ede23fd3526ae9d9bfbfd8b68eb16c38a010ebe326dc6395db8"}, + {file = "tensorboardX-2.6.2.2.tar.gz", hash = "sha256:c6476d7cd0d529b0b72f4acadb1269f9ed8b22f441e87a84f2a3b940bb87b666"}, +] + +[package.dependencies] +numpy = "*" +packaging = "*" +protobuf = ">=3.20" + [[package]] name = "threadpoolctl" version = "3.2.0" @@ -1899,4 +1935,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "3.11.2" -content-hash = "e6513e4097292f4085650c7b341b531b648554dd5bcdd7441e73b5eb3ef543fd" +content-hash = "4696062dd04776d959869b2425e72e760c3fc7eb64e7dbe3fc3f6d510ada0542" diff --git a/pyproject.toml b/pyproject.toml index ca37830..8d118b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ faiss-cpu = "^1.7.4" python-dotenv = "^1.0.0" pydub = "^0.25.1" click = "^8.1.7" +tensorboardx = "^2.6.2.2" [project.scripts] rvc = "rvc:cli" diff --git a/rvc/cli/cli.py b/rvc/cli/cli.py new file mode 100644 index 0000000..01fcea8 --- /dev/null +++ b/rvc/cli/cli.py @@ -0,0 +1,30 @@ +import re +from typing import Optional, Pattern + +import click + +from rvc.cli.handler.infer import infer +from rvc.cli.handler.train import train +from rvc.cli.handler.uvr5 import uvr + +from rvc.cli.utils.dlmodel import dlmodel +from rvc.cli.utils.env import env +from rvc.cli.utils.initialize import initialize + + +@click.group( + context_settings={"help_option_names": ["-h", "--help"]}, + help="rvc cli feature list", +) +def cli(): + pass + + +if __name__ == "__main__": + cli.add_command(infer) + cli.add_command(train) + cli.add_command(uvr) + cli.add_command(dlmodel) + cli.add_command(env) + cli.add_command(initialize) + cli() diff --git a/rvc/cli/handler/infer.py b/rvc/cli/handler/infer.py new file mode 100644 index 0000000..8ebd91b --- /dev/null +++ b/rvc/cli/handler/infer.py @@ -0,0 +1,131 @@ +import logging +from pathlib import Path + +import click +from dotenv import load_dotenv +from scipy.io import wavfile + +from rvc.modules.vc.modules import VC + + +logging.getLogger("numba").setLevel(logging.WARNING) + + +@click.command( + context_settings={"help_option_names": ["-h", "--help"]}, + help="inference audio", +) +@click.option( + "-m", + "--modelPath", + is_flag=False, + type=str, + help="Model path or filename (reads in the directory set in env)", + required=True, +) +@click.option( + "-i", + "--inputPath", + is_flag=False, + type=Path, + help="input audio path or folder", + required=True, +) +@click.option( + "-o", + "--outputPath", + is_flag=False, + type=Path, + help="output audio path or folder", + required=True, +) +@click.option( + "-s", "--sid", is_flag=False, type=int, help="Speaker/Singer id", default=0 +) +@click.option("-fu", "--f0upkey", is_flag=False, type=int, help="Transpose", default=0) +@click.option( + "-fm", + "--f0method", + is_flag=False, + type=str, + help="Pitch extraction algorith", + default="rmvpe", +) +@click.option( + "-ff", "--f0file", is_flag=False, type=Path, help="F0 curve file (optional)" +) +@click.option("-if", "--indexFile", is_flag=False, type=Path, help="Feature index file") +@click.option( + "-ir", + "--indexRate", + is_flag=False, + type=float, + help="Search feature ratio", + default=0.75, +) +@click.option( + "-fr", + "--filterRadius", + is_flag=False, + type=int, + help="Apply median filtering", + default=3, +) +@click.option( + "-rsr", + "--resamplesr", + is_flag=False, + type=int, + help="Resample the output audio", + default=0, +) +@click.option( + "-rmr", + "--rmsmixrate", + is_flag=False, + type=float, + help="Adjust the volume envelope scaling", + default=0.25, +) +@click.option( + "-p", + "--protect", + is_flag=False, + type=float, + help="Protect voiceless consonants and breath sounds", + default=0.33, +) +def infer( + modelpath, + inputpath, + outputpath, + sid, + f0upkey, + f0method, + f0file, + indexfile, + indexrate, + filterradius, + resamplesr, + rmsmixrate, + protect, +): + load_dotenv() + vc = VC() + vc.get_vc(modelpath) + tgt_sr, audio_opt, times, _ = vc.vc_single( + sid, + inputpath, + f0upkey, + f0method, + f0file, + indexfile, + indexrate, + filterradius, + resamplesr, + rmsmixrate, + protect, + ) + wavfile.write(outputpath, tgt_sr, audio_opt) + click.echo(times) + click.echo(f"Finish inference. Check {outputpath}") diff --git a/rvc/cli/handler/train.py b/rvc/cli/handler/train.py new file mode 100644 index 0000000..d5ff831 --- /dev/null +++ b/rvc/cli/handler/train.py @@ -0,0 +1,6 @@ +import click + + +@click.command() +def train(): + pass diff --git a/rvc/cli/handler/uvr5.py b/rvc/cli/handler/uvr5.py new file mode 100644 index 0000000..249ec6b --- /dev/null +++ b/rvc/cli/handler/uvr5.py @@ -0,0 +1,6 @@ +import click + + +@click.command() +def uvr(): + pass diff --git a/rvc/cli/utils/dlmodel.py b/rvc/cli/utils/dlmodel.py new file mode 100644 index 0000000..506b0a9 --- /dev/null +++ b/rvc/cli/utils/dlmodel.py @@ -0,0 +1,8 @@ +import urllib +import click + + +@click.command() +def dlmodel() -> None: + # Download models [harvest, uvr5, and more ] + pass diff --git a/rvc/cli/utils/env.py b/rvc/cli/utils/env.py new file mode 100644 index 0000000..818f0cf --- /dev/null +++ b/rvc/cli/utils/env.py @@ -0,0 +1,13 @@ +""" +setup or cleanup enviroment file +usage: rvc env [set / cleanup] +Default: [nowDir/.env] + +""" + +import click + + +@click.command() +def env(): + pass diff --git a/rvc/cli/utils/initialize.py b/rvc/cli/utils/initialize.py new file mode 100644 index 0000000..afe471c --- /dev/null +++ b/rvc/cli/utils/initialize.py @@ -0,0 +1,14 @@ +""" +Uage: rvc init +download model and setup environmmnt file + +""" +import click + + +import click + + +@click.command() +def initialize(): + pass diff --git a/rvc/configs/config.py b/rvc/configs/config.py index 6794043..57897c5 100644 --- a/rvc/configs/config.py +++ b/rvc/configs/config.py @@ -82,7 +82,8 @@ class Config: action="store_true", help="torch_dml", ) - cmd_opts: argparse.Namespace = parser.parse_args() + cmd_opts: argparse.Namespace + cmd_opts, _ = parser.parse_known_args() cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865 diff --git a/rvc/modules/uvr5/modules.py b/rvc/modules/uvr5/modules.py index ccd6496..cb61d54 100644 --- a/rvc/modules/uvr5/modules.py +++ b/rvc/modules/uvr5/modules.py @@ -87,7 +87,7 @@ class UVR: export_format, is_hp3=is_hp3, ) - infos.append(f"{os.path.basename(process_path)}->Success" ) + infos.append(f"{os.path.basename(process_path)}->Success") yield "\n".join(infos) if torch.cuda.is_available(): torch.cuda.empty_cache() diff --git a/rvc/modules/vc/modules.py b/rvc/modules/vc/modules.py index 0c00b39..ce5fc84 100644 --- a/rvc/modules/vc/modules.py +++ b/rvc/modules/vc/modules.py @@ -45,7 +45,7 @@ class VC: to_return_protect[1] if self.if_f0 != 0 and to_return_protect else 0.33, ] - person = f'{os.getenv("weight_root")}/{sid}' + person = sid if os.path.exists(sid) else f'{os.getenv("weight_root")}/{sid}' logger.info(f"Loading: {person}") self.cpt = torch.load(person, map_location="cpu")