add inference cli

This commit is contained in:
Ftps
2024-01-20 22:48:15 +09:00
parent 95d989827d
commit 6d759b4b96
13 changed files with 276 additions and 4 deletions

View File

@@ -19,3 +19,29 @@ An easy-to-use Voice Conversion framework based on VITS.<br><br>
> [!NOTE]
> Currently under development... Provided as a library and API in rvc
## Installation and usage
### CLI Usage
#### Inference Audio
```sh
rvc infer -m {model.pth} -i {input.wav} -o {output.wav}
```
| option | type | default value | description | require |
|---------------|--------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|
| modelPath | Path | | Model path or filename (reads in the directory set in env) | * |
| inputPath | Path | | Input audio path or folder | * |
| outputPath | Path | | Output audio path or folder | * |
| sid | int | 0 | Speaker/Singer ID | |
| f0_up_key | int | 0 | Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12) | |
| f0_method | str | rmvpe | pitch extraction algorithm (pm, harvest, crepe, rmvpe | |
| f0_file | Path \| None | None | F0 curve file (optional). One pitch per line. Replaces the default F0 and pitch modulation | |
| index_file | Path \| None | None | Path to the feature index file | |
| index_rate | float | 0.75 | Search feature ratio (controls accent strength, too high has artifacting) | |
| filter_radius | int | 3 | If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness | |
| resample_sr | int | 0 | Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling | |
| rms_mix_rate | float | 0.25 | Adjust the volume envelope scaling. Closer to 0, the more it mimicks the volume of the original vocals. Can help mask noise and make volume sound more natural when set relatively low. Closer to 1 will be more of a consistently loud volume | |
| protect | float | 0.33 | Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy | |

38
poetry.lock generated
View File

@@ -1257,6 +1257,26 @@ files = [
[package.dependencies]
numpy = ">=1.7.0"
[[package]]
name = "protobuf"
version = "4.25.2"
description = ""
optional = false
python-versions = ">=3.8"
files = [
{file = "protobuf-4.25.2-cp310-abi3-win32.whl", hash = "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6"},
{file = "protobuf-4.25.2-cp310-abi3-win_amd64.whl", hash = "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9"},
{file = "protobuf-4.25.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d"},
{file = "protobuf-4.25.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62"},
{file = "protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020"},
{file = "protobuf-4.25.2-cp38-cp38-win32.whl", hash = "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61"},
{file = "protobuf-4.25.2-cp38-cp38-win_amd64.whl", hash = "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62"},
{file = "protobuf-4.25.2-cp39-cp39-win32.whl", hash = "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3"},
{file = "protobuf-4.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0"},
{file = "protobuf-4.25.2-py3-none-any.whl", hash = "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830"},
{file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"},
]
[[package]]
name = "pycparser"
version = "2.21"
@@ -1748,6 +1768,22 @@ files = [
[package.extras]
widechars = ["wcwidth"]
[[package]]
name = "tensorboardx"
version = "2.6.2.2"
description = "TensorBoardX lets you watch Tensors Flow without Tensorflow"
optional = false
python-versions = "*"
files = [
{file = "tensorboardX-2.6.2.2-py2.py3-none-any.whl", hash = "sha256:160025acbf759ede23fd3526ae9d9bfbfd8b68eb16c38a010ebe326dc6395db8"},
{file = "tensorboardX-2.6.2.2.tar.gz", hash = "sha256:c6476d7cd0d529b0b72f4acadb1269f9ed8b22f441e87a84f2a3b940bb87b666"},
]
[package.dependencies]
numpy = "*"
packaging = "*"
protobuf = ">=3.20"
[[package]]
name = "threadpoolctl"
version = "3.2.0"
@@ -1899,4 +1935,4 @@ zstd = ["zstandard (>=0.18.0)"]
[metadata]
lock-version = "2.0"
python-versions = "3.11.2"
content-hash = "e6513e4097292f4085650c7b341b531b648554dd5bcdd7441e73b5eb3ef543fd"
content-hash = "4696062dd04776d959869b2425e72e760c3fc7eb64e7dbe3fc3f6d510ada0542"

View File

@@ -19,6 +19,7 @@ faiss-cpu = "^1.7.4"
python-dotenv = "^1.0.0"
pydub = "^0.25.1"
click = "^8.1.7"
tensorboardx = "^2.6.2.2"
[project.scripts]
rvc = "rvc:cli"

30
rvc/cli/cli.py Normal file
View File

@@ -0,0 +1,30 @@
import re
from typing import Optional, Pattern
import click
from rvc.cli.handler.infer import infer
from rvc.cli.handler.train import train
from rvc.cli.handler.uvr5 import uvr
from rvc.cli.utils.dlmodel import dlmodel
from rvc.cli.utils.env import env
from rvc.cli.utils.initialize import initialize
@click.group(
context_settings={"help_option_names": ["-h", "--help"]},
help="rvc cli feature list",
)
def cli():
pass
if __name__ == "__main__":
cli.add_command(infer)
cli.add_command(train)
cli.add_command(uvr)
cli.add_command(dlmodel)
cli.add_command(env)
cli.add_command(initialize)
cli()

131
rvc/cli/handler/infer.py Normal file
View File

@@ -0,0 +1,131 @@
import logging
from pathlib import Path
import click
from dotenv import load_dotenv
from scipy.io import wavfile
from rvc.modules.vc.modules import VC
logging.getLogger("numba").setLevel(logging.WARNING)
@click.command(
context_settings={"help_option_names": ["-h", "--help"]},
help="inference audio",
)
@click.option(
"-m",
"--modelPath",
is_flag=False,
type=str,
help="Model path or filename (reads in the directory set in env)",
required=True,
)
@click.option(
"-i",
"--inputPath",
is_flag=False,
type=Path,
help="input audio path or folder",
required=True,
)
@click.option(
"-o",
"--outputPath",
is_flag=False,
type=Path,
help="output audio path or folder",
required=True,
)
@click.option(
"-s", "--sid", is_flag=False, type=int, help="Speaker/Singer id", default=0
)
@click.option("-fu", "--f0upkey", is_flag=False, type=int, help="Transpose", default=0)
@click.option(
"-fm",
"--f0method",
is_flag=False,
type=str,
help="Pitch extraction algorith",
default="rmvpe",
)
@click.option(
"-ff", "--f0file", is_flag=False, type=Path, help="F0 curve file (optional)"
)
@click.option("-if", "--indexFile", is_flag=False, type=Path, help="Feature index file")
@click.option(
"-ir",
"--indexRate",
is_flag=False,
type=float,
help="Search feature ratio",
default=0.75,
)
@click.option(
"-fr",
"--filterRadius",
is_flag=False,
type=int,
help="Apply median filtering",
default=3,
)
@click.option(
"-rsr",
"--resamplesr",
is_flag=False,
type=int,
help="Resample the output audio",
default=0,
)
@click.option(
"-rmr",
"--rmsmixrate",
is_flag=False,
type=float,
help="Adjust the volume envelope scaling",
default=0.25,
)
@click.option(
"-p",
"--protect",
is_flag=False,
type=float,
help="Protect voiceless consonants and breath sounds",
default=0.33,
)
def infer(
modelpath,
inputpath,
outputpath,
sid,
f0upkey,
f0method,
f0file,
indexfile,
indexrate,
filterradius,
resamplesr,
rmsmixrate,
protect,
):
load_dotenv()
vc = VC()
vc.get_vc(modelpath)
tgt_sr, audio_opt, times, _ = vc.vc_single(
sid,
inputpath,
f0upkey,
f0method,
f0file,
indexfile,
indexrate,
filterradius,
resamplesr,
rmsmixrate,
protect,
)
wavfile.write(outputpath, tgt_sr, audio_opt)
click.echo(times)
click.echo(f"Finish inference. Check {outputpath}")

6
rvc/cli/handler/train.py Normal file
View File

@@ -0,0 +1,6 @@
import click
@click.command()
def train():
pass

6
rvc/cli/handler/uvr5.py Normal file
View File

@@ -0,0 +1,6 @@
import click
@click.command()
def uvr():
pass

8
rvc/cli/utils/dlmodel.py Normal file
View File

@@ -0,0 +1,8 @@
import urllib
import click
@click.command()
def dlmodel() -> None:
# Download models [harvest, uvr5, and more ]
pass

13
rvc/cli/utils/env.py Normal file
View File

@@ -0,0 +1,13 @@
"""
setup or cleanup enviroment file
usage: rvc env [set / cleanup]
Default: [nowDir/.env]
"""
import click
@click.command()
def env():
pass

View File

@@ -0,0 +1,14 @@
"""
Uage: rvc init
download model and setup environmmnt file
"""
import click
import click
@click.command()
def initialize():
pass

View File

@@ -82,7 +82,8 @@ class Config:
action="store_true",
help="torch_dml",
)
cmd_opts: argparse.Namespace = parser.parse_args()
cmd_opts: argparse.Namespace
cmd_opts, _ = parser.parse_known_args()
cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865

View File

@@ -87,7 +87,7 @@ class UVR:
export_format,
is_hp3=is_hp3,
)
infos.append(f"{os.path.basename(process_path)}->Success" )
infos.append(f"{os.path.basename(process_path)}->Success")
yield "\n".join(infos)
if torch.cuda.is_available():
torch.cuda.empty_cache()

View File

@@ -45,7 +45,7 @@ class VC:
to_return_protect[1] if self.if_f0 != 0 and to_return_protect else 0.33,
]
person = f'{os.getenv("weight_root")}/{sid}'
person = sid if os.path.exists(sid) else f'{os.getenv("weight_root")}/{sid}'
logger.info(f"Loading: {person}")
self.cpt = torch.load(person, map_location="cpu")