add inference cli
This commit is contained in:
26
README.md
26
README.md
@@ -19,3 +19,29 @@ An easy-to-use Voice Conversion framework based on VITS.<br><br>
|
||||
|
||||
> [!NOTE]
|
||||
> Currently under development... Provided as a library and API in rvc
|
||||
|
||||
## Installation and usage
|
||||
|
||||
### CLI Usage
|
||||
|
||||
#### Inference Audio
|
||||
|
||||
```sh
|
||||
rvc infer -m {model.pth} -i {input.wav} -o {output.wav}
|
||||
```
|
||||
|
||||
| option | type | default value | description | require |
|
||||
|---------------|--------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|
|
||||
| modelPath | Path | | Model path or filename (reads in the directory set in env) | * |
|
||||
| inputPath | Path | | Input audio path or folder | * |
|
||||
| outputPath | Path | | Output audio path or folder | * |
|
||||
| sid | int | 0 | Speaker/Singer ID | |
|
||||
| f0_up_key | int | 0 | Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12) | |
|
||||
| f0_method | str | rmvpe | pitch extraction algorithm (pm, harvest, crepe, rmvpe | |
|
||||
| f0_file | Path \| None | None | F0 curve file (optional). One pitch per line. Replaces the default F0 and pitch modulation | |
|
||||
| index_file | Path \| None | None | Path to the feature index file | |
|
||||
| index_rate | float | 0.75 | Search feature ratio (controls accent strength, too high has artifacting) | |
|
||||
| filter_radius | int | 3 | If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness | |
|
||||
| resample_sr | int | 0 | Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling | |
|
||||
| rms_mix_rate | float | 0.25 | Adjust the volume envelope scaling. Closer to 0, the more it mimicks the volume of the original vocals. Can help mask noise and make volume sound more natural when set relatively low. Closer to 1 will be more of a consistently loud volume | |
|
||||
| protect | float | 0.33 | Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy | |
|
||||
38
poetry.lock
generated
38
poetry.lock
generated
@@ -1257,6 +1257,26 @@ files = [
|
||||
[package.dependencies]
|
||||
numpy = ">=1.7.0"
|
||||
|
||||
[[package]]
|
||||
name = "protobuf"
|
||||
version = "4.25.2"
|
||||
description = ""
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "protobuf-4.25.2-cp310-abi3-win32.whl", hash = "sha256:b50c949608682b12efb0b2717f53256f03636af5f60ac0c1d900df6213910fd6"},
|
||||
{file = "protobuf-4.25.2-cp310-abi3-win_amd64.whl", hash = "sha256:8f62574857ee1de9f770baf04dde4165e30b15ad97ba03ceac65f760ff018ac9"},
|
||||
{file = "protobuf-4.25.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:2db9f8fa64fbdcdc93767d3cf81e0f2aef176284071507e3ede160811502fd3d"},
|
||||
{file = "protobuf-4.25.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:10894a2885b7175d3984f2be8d9850712c57d5e7587a2410720af8be56cdaf62"},
|
||||
{file = "protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fc381d1dd0516343f1440019cedf08a7405f791cd49eef4ae1ea06520bc1c020"},
|
||||
{file = "protobuf-4.25.2-cp38-cp38-win32.whl", hash = "sha256:33a1aeef4b1927431d1be780e87b641e322b88d654203a9e9d93f218ee359e61"},
|
||||
{file = "protobuf-4.25.2-cp38-cp38-win_amd64.whl", hash = "sha256:47f3de503fe7c1245f6f03bea7e8d3ec11c6c4a2ea9ef910e3221c8a15516d62"},
|
||||
{file = "protobuf-4.25.2-cp39-cp39-win32.whl", hash = "sha256:5e5c933b4c30a988b52e0b7c02641760a5ba046edc5e43d3b94a74c9fc57c1b3"},
|
||||
{file = "protobuf-4.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:d66a769b8d687df9024f2985d5137a337f957a0916cf5464d1513eee96a63ff0"},
|
||||
{file = "protobuf-4.25.2-py3-none-any.whl", hash = "sha256:a8b7a98d4ce823303145bf3c1a8bdb0f2f4642a414b196f04ad9853ed0c8f830"},
|
||||
{file = "protobuf-4.25.2.tar.gz", hash = "sha256:fe599e175cb347efc8ee524bcd4b902d11f7262c0e569ececcb89995c15f0a5e"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pycparser"
|
||||
version = "2.21"
|
||||
@@ -1748,6 +1768,22 @@ files = [
|
||||
[package.extras]
|
||||
widechars = ["wcwidth"]
|
||||
|
||||
[[package]]
|
||||
name = "tensorboardx"
|
||||
version = "2.6.2.2"
|
||||
description = "TensorBoardX lets you watch Tensors Flow without Tensorflow"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "tensorboardX-2.6.2.2-py2.py3-none-any.whl", hash = "sha256:160025acbf759ede23fd3526ae9d9bfbfd8b68eb16c38a010ebe326dc6395db8"},
|
||||
{file = "tensorboardX-2.6.2.2.tar.gz", hash = "sha256:c6476d7cd0d529b0b72f4acadb1269f9ed8b22f441e87a84f2a3b940bb87b666"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
numpy = "*"
|
||||
packaging = "*"
|
||||
protobuf = ">=3.20"
|
||||
|
||||
[[package]]
|
||||
name = "threadpoolctl"
|
||||
version = "3.2.0"
|
||||
@@ -1899,4 +1935,4 @@ zstd = ["zstandard (>=0.18.0)"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "3.11.2"
|
||||
content-hash = "e6513e4097292f4085650c7b341b531b648554dd5bcdd7441e73b5eb3ef543fd"
|
||||
content-hash = "4696062dd04776d959869b2425e72e760c3fc7eb64e7dbe3fc3f6d510ada0542"
|
||||
|
||||
@@ -19,6 +19,7 @@ faiss-cpu = "^1.7.4"
|
||||
python-dotenv = "^1.0.0"
|
||||
pydub = "^0.25.1"
|
||||
click = "^8.1.7"
|
||||
tensorboardx = "^2.6.2.2"
|
||||
|
||||
[project.scripts]
|
||||
rvc = "rvc:cli"
|
||||
|
||||
30
rvc/cli/cli.py
Normal file
30
rvc/cli/cli.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import re
|
||||
from typing import Optional, Pattern
|
||||
|
||||
import click
|
||||
|
||||
from rvc.cli.handler.infer import infer
|
||||
from rvc.cli.handler.train import train
|
||||
from rvc.cli.handler.uvr5 import uvr
|
||||
|
||||
from rvc.cli.utils.dlmodel import dlmodel
|
||||
from rvc.cli.utils.env import env
|
||||
from rvc.cli.utils.initialize import initialize
|
||||
|
||||
|
||||
@click.group(
|
||||
context_settings={"help_option_names": ["-h", "--help"]},
|
||||
help="rvc cli feature list",
|
||||
)
|
||||
def cli():
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli.add_command(infer)
|
||||
cli.add_command(train)
|
||||
cli.add_command(uvr)
|
||||
cli.add_command(dlmodel)
|
||||
cli.add_command(env)
|
||||
cli.add_command(initialize)
|
||||
cli()
|
||||
131
rvc/cli/handler/infer.py
Normal file
131
rvc/cli/handler/infer.py
Normal file
@@ -0,0 +1,131 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
from dotenv import load_dotenv
|
||||
from scipy.io import wavfile
|
||||
|
||||
from rvc.modules.vc.modules import VC
|
||||
|
||||
|
||||
logging.getLogger("numba").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
@click.command(
|
||||
context_settings={"help_option_names": ["-h", "--help"]},
|
||||
help="inference audio",
|
||||
)
|
||||
@click.option(
|
||||
"-m",
|
||||
"--modelPath",
|
||||
is_flag=False,
|
||||
type=str,
|
||||
help="Model path or filename (reads in the directory set in env)",
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"-i",
|
||||
"--inputPath",
|
||||
is_flag=False,
|
||||
type=Path,
|
||||
help="input audio path or folder",
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"-o",
|
||||
"--outputPath",
|
||||
is_flag=False,
|
||||
type=Path,
|
||||
help="output audio path or folder",
|
||||
required=True,
|
||||
)
|
||||
@click.option(
|
||||
"-s", "--sid", is_flag=False, type=int, help="Speaker/Singer id", default=0
|
||||
)
|
||||
@click.option("-fu", "--f0upkey", is_flag=False, type=int, help="Transpose", default=0)
|
||||
@click.option(
|
||||
"-fm",
|
||||
"--f0method",
|
||||
is_flag=False,
|
||||
type=str,
|
||||
help="Pitch extraction algorith",
|
||||
default="rmvpe",
|
||||
)
|
||||
@click.option(
|
||||
"-ff", "--f0file", is_flag=False, type=Path, help="F0 curve file (optional)"
|
||||
)
|
||||
@click.option("-if", "--indexFile", is_flag=False, type=Path, help="Feature index file")
|
||||
@click.option(
|
||||
"-ir",
|
||||
"--indexRate",
|
||||
is_flag=False,
|
||||
type=float,
|
||||
help="Search feature ratio",
|
||||
default=0.75,
|
||||
)
|
||||
@click.option(
|
||||
"-fr",
|
||||
"--filterRadius",
|
||||
is_flag=False,
|
||||
type=int,
|
||||
help="Apply median filtering",
|
||||
default=3,
|
||||
)
|
||||
@click.option(
|
||||
"-rsr",
|
||||
"--resamplesr",
|
||||
is_flag=False,
|
||||
type=int,
|
||||
help="Resample the output audio",
|
||||
default=0,
|
||||
)
|
||||
@click.option(
|
||||
"-rmr",
|
||||
"--rmsmixrate",
|
||||
is_flag=False,
|
||||
type=float,
|
||||
help="Adjust the volume envelope scaling",
|
||||
default=0.25,
|
||||
)
|
||||
@click.option(
|
||||
"-p",
|
||||
"--protect",
|
||||
is_flag=False,
|
||||
type=float,
|
||||
help="Protect voiceless consonants and breath sounds",
|
||||
default=0.33,
|
||||
)
|
||||
def infer(
|
||||
modelpath,
|
||||
inputpath,
|
||||
outputpath,
|
||||
sid,
|
||||
f0upkey,
|
||||
f0method,
|
||||
f0file,
|
||||
indexfile,
|
||||
indexrate,
|
||||
filterradius,
|
||||
resamplesr,
|
||||
rmsmixrate,
|
||||
protect,
|
||||
):
|
||||
load_dotenv()
|
||||
vc = VC()
|
||||
vc.get_vc(modelpath)
|
||||
tgt_sr, audio_opt, times, _ = vc.vc_single(
|
||||
sid,
|
||||
inputpath,
|
||||
f0upkey,
|
||||
f0method,
|
||||
f0file,
|
||||
indexfile,
|
||||
indexrate,
|
||||
filterradius,
|
||||
resamplesr,
|
||||
rmsmixrate,
|
||||
protect,
|
||||
)
|
||||
wavfile.write(outputpath, tgt_sr, audio_opt)
|
||||
click.echo(times)
|
||||
click.echo(f"Finish inference. Check {outputpath}")
|
||||
6
rvc/cli/handler/train.py
Normal file
6
rvc/cli/handler/train.py
Normal file
@@ -0,0 +1,6 @@
|
||||
import click
|
||||
|
||||
|
||||
@click.command()
|
||||
def train():
|
||||
pass
|
||||
6
rvc/cli/handler/uvr5.py
Normal file
6
rvc/cli/handler/uvr5.py
Normal file
@@ -0,0 +1,6 @@
|
||||
import click
|
||||
|
||||
|
||||
@click.command()
|
||||
def uvr():
|
||||
pass
|
||||
8
rvc/cli/utils/dlmodel.py
Normal file
8
rvc/cli/utils/dlmodel.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import urllib
|
||||
import click
|
||||
|
||||
|
||||
@click.command()
|
||||
def dlmodel() -> None:
|
||||
# Download models [harvest, uvr5, and more ]
|
||||
pass
|
||||
13
rvc/cli/utils/env.py
Normal file
13
rvc/cli/utils/env.py
Normal file
@@ -0,0 +1,13 @@
|
||||
"""
|
||||
setup or cleanup enviroment file
|
||||
usage: rvc env [set / cleanup]
|
||||
Default: [nowDir/.env]
|
||||
|
||||
"""
|
||||
|
||||
import click
|
||||
|
||||
|
||||
@click.command()
|
||||
def env():
|
||||
pass
|
||||
14
rvc/cli/utils/initialize.py
Normal file
14
rvc/cli/utils/initialize.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Uage: rvc init
|
||||
download model and setup environmmnt file
|
||||
|
||||
"""
|
||||
import click
|
||||
|
||||
|
||||
import click
|
||||
|
||||
|
||||
@click.command()
|
||||
def initialize():
|
||||
pass
|
||||
@@ -82,7 +82,8 @@ class Config:
|
||||
action="store_true",
|
||||
help="torch_dml",
|
||||
)
|
||||
cmd_opts: argparse.Namespace = parser.parse_args()
|
||||
cmd_opts: argparse.Namespace
|
||||
cmd_opts, _ = parser.parse_known_args()
|
||||
|
||||
cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ class UVR:
|
||||
export_format,
|
||||
is_hp3=is_hp3,
|
||||
)
|
||||
infos.append(f"{os.path.basename(process_path)}->Success" )
|
||||
infos.append(f"{os.path.basename(process_path)}->Success")
|
||||
yield "\n".join(infos)
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
@@ -45,7 +45,7 @@ class VC:
|
||||
to_return_protect[1] if self.if_f0 != 0 and to_return_protect else 0.33,
|
||||
]
|
||||
|
||||
person = f'{os.getenv("weight_root")}/{sid}'
|
||||
person = sid if os.path.exists(sid) else f'{os.getenv("weight_root")}/{sid}'
|
||||
logger.info(f"Loading: {person}")
|
||||
|
||||
self.cpt = torch.load(person, map_location="cpu")
|
||||
|
||||
Reference in New Issue
Block a user