This commit is contained in:
Ftps
2024-01-21 01:15:44 +09:00
parent c839cab165
commit 7e0326146d
7 changed files with 17 additions and 22 deletions

View File

@@ -6,11 +6,11 @@ import click
from rvc.cli.handler.infer import infer from rvc.cli.handler.infer import infer
from rvc.cli.handler.train import train from rvc.cli.handler.train import train
from rvc.cli.handler.uvr5 import uvr from rvc.cli.handler.uvr5 import uvr
from rvc.cli.utils.dlmodel import dlmodel from rvc.cli.utils.dlmodel import dlmodel
from rvc.cli.utils.env import env from rvc.cli.utils.env import env
from rvc.cli.utils.initialize import init from rvc.cli.utils.initialize import init
@click.group( @click.group(
context_settings={"help_option_names": ["-h", "--help"]}, context_settings={"help_option_names": ["-h", "--help"]},
help="rvc cli feature list", help="rvc cli feature list",
@@ -18,6 +18,7 @@ from rvc.cli.utils.initialize import init
def cli(): def cli():
pass pass
def main(): def main():
cli.add_command(infer) cli.add_command(infer)
cli.add_command(train) cli.add_command(train)

View File

@@ -7,7 +7,6 @@ from scipy.io import wavfile
from rvc.modules.vc.modules import VC from rvc.modules.vc.modules import VC
logging.getLogger("numba").setLevel(logging.WARNING) logging.getLogger("numba").setLevel(logging.WARNING)

View File

@@ -1,4 +1,5 @@
import urllib import urllib
import click import click

View File

@@ -6,9 +6,6 @@ download model and setup environmmnt file
import click import click
import click
@click.command() @click.command()
def init(): def init():
pass pass

View File

@@ -18,15 +18,13 @@ try:
except Exception: # pylint: disable=broad-exception-caught except Exception: # pylint: disable=broad-exception-caught
pass pass
import logging import logging
from time import time as ttime
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from librosa.filters import mel
from librosa.util import normalize, pad_center, tiny from librosa.util import normalize, pad_center, tiny
from scipy.signal import get_window from scipy.signal import get_window
from librosa.filters import mel
from time import time as ttime
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -161,7 +159,6 @@ class STFT(torch.nn.Module):
return reconstruction return reconstruction
class BiGRU(nn.Module): class BiGRU(nn.Module):
def __init__(self, input_features, hidden_features, num_layers): def __init__(self, input_features, hidden_features, num_layers):
super(BiGRU, self).__init__() super(BiGRU, self).__init__()
@@ -415,8 +412,6 @@ class E2E(nn.Module):
return x return x
class MelSpectrogram(torch.nn.Module): class MelSpectrogram(torch.nn.Module):
def __init__( def __init__(
self, self,

View File

@@ -64,11 +64,11 @@ class Slicer:
def _apply_slice(self, waveform, begin, end): def _apply_slice(self, waveform, begin, end):
if len(waveform.shape) > 1: if len(waveform.shape) > 1:
return waveform[ return waveform[
:, begin * self.hop_size: min(waveform.shape[1], end * self.hop_size) :, begin * self.hop_size : min(waveform.shape[1], end * self.hop_size)
] ]
else: else:
return waveform[ return waveform[
begin * self.hop_size: min(waveform.shape[0], end * self.hop_size) begin * self.hop_size : min(waveform.shape[0], end * self.hop_size)
] ]
# @timeit # @timeit
@@ -106,7 +106,7 @@ class Slicer:
continue continue
# Need slicing. Record the range of silent frames to be removed. # Need slicing. Record the range of silent frames to be removed.
if i - silence_start <= self.max_sil_kept: if i - silence_start <= self.max_sil_kept:
pos = rms_list[silence_start: i + 1].argmin() + silence_start pos = rms_list[silence_start : i + 1].argmin() + silence_start
if silence_start == 0: if silence_start == 0:
sil_tags.append((0, pos)) sil_tags.append((0, pos))
else: else:
@@ -114,17 +114,17 @@ class Slicer:
clip_start = pos clip_start = pos
elif i - silence_start <= self.max_sil_kept * 2: elif i - silence_start <= self.max_sil_kept * 2:
pos = rms_list[ pos = rms_list[
i - self.max_sil_kept: silence_start + self.max_sil_kept + 1 i - self.max_sil_kept : silence_start + self.max_sil_kept + 1
].argmin() ].argmin()
pos += i - self.max_sil_kept pos += i - self.max_sil_kept
pos_l = ( pos_l = (
rms_list[ rms_list[
silence_start: silence_start + self.max_sil_kept + 1 silence_start : silence_start + self.max_sil_kept + 1
].argmin() ].argmin()
+ silence_start + silence_start
) )
pos_r = ( pos_r = (
rms_list[i - self.max_sil_kept: i + 1].argmin() rms_list[i - self.max_sil_kept : i + 1].argmin()
+ i + i
- self.max_sil_kept - self.max_sil_kept
) )
@@ -137,12 +137,12 @@ class Slicer:
else: else:
pos_l = ( pos_l = (
rms_list[ rms_list[
silence_start: silence_start + self.max_sil_kept + 1 silence_start : silence_start + self.max_sil_kept + 1
].argmin() ].argmin()
+ silence_start + silence_start
) )
pos_r = ( pos_r = (
rms_list[i - self.max_sil_kept: i + 1].argmin() rms_list[i - self.max_sil_kept : i + 1].argmin()
+ i + i
- self.max_sil_kept - self.max_sil_kept
) )
@@ -159,7 +159,7 @@ class Slicer:
and total_frames - silence_start >= self.min_interval and total_frames - silence_start >= self.min_interval
): ):
silence_end = min(total_frames, silence_start + self.max_sil_kept) silence_end = min(total_frames, silence_start + self.max_sil_kept)
pos = rms_list[silence_start: silence_end + 1].argmin() + silence_start pos = rms_list[silence_start : silence_end + 1].argmin() + silence_start
sil_tags.append((pos, total_frames + 1)) sil_tags.append((pos, total_frames + 1))
# Apply and return slices. # Apply and return slices.
if len(sil_tags) == 0: if len(sil_tags) == 0:

View File

@@ -9,7 +9,9 @@ def export_onnx(ModelPath, ExportedPath):
vec_channels = 256 if cpt.get("version", "v1") == "v1" else 768 vec_channels = 256 if cpt.get("version", "v1") == "v1" else 768
test_phone = torch.rand(1, 200, vec_channels) # hidden unit test_phone = torch.rand(1, 200, vec_channels) # hidden unit
test_phone_lengths = torch.tensor([200]).long() # hidden unit length (doesn't seem to help) test_phone_lengths = torch.tensor(
[200]
).long() # hidden unit length (doesn't seem to help)
test_pitch = torch.randint(size=(1, 200), low=5, high=255) # Base frequency (in Hz) test_pitch = torch.randint(size=(1, 200), low=5, high=255) # Base frequency (in Hz)
test_pitchf = torch.rand(1, 200) # nsf base frequency test_pitchf = torch.rand(1, 200) # nsf base frequency
test_ds = torch.LongTensor([0]) # Speaker ID test_ds = torch.LongTensor([0]) # Speaker ID