Use new TTS+RVC api

This commit is contained in:
2026-03-01 01:20:59 -08:00
parent 1921e1b781
commit e48e74c20e
3 changed files with 46 additions and 9 deletions

View File

@@ -14,6 +14,8 @@ OPENAI_HOST="http://localhost:1234/v1"
REPLY_CHANCE=0.2
RVC_HOST="http://127.0.0.1:8001"
TTS_SPEAKER="Ono_Anna"
TTS_PITCH="0"
ENABLE_MOTD=1
MOTD_CHANNEL="123456789012345678"

View File

@@ -5,16 +5,21 @@ import { requestTTSResponse } from '../../util';
const config = {
ttsSettings: {
speaker: process.env.TTS_SPEAKER || 'Vivian',
pitch_change_oct: 1,
pitch_change_sem: 0,
pitch_change_sem: parseInt(process.env.TTS_PITCH || '24', 10),
},
};
async function ttsCommand(interaction: ChatInputCommandInteraction) {
const text = interaction.options.getString('text');
const speaker = interaction.options.getString('speaker') || config.ttsSettings.speaker;
const pitch = interaction.options.getInteger('pitch') ?? config.ttsSettings.pitch_change_sem;
const instruct = interaction.options.getString('instruct');
await interaction.reply(`generating audio for "${text}"...`);
try {
const audio = await requestTTSResponse(text);
const audio = await requestTTSResponse(text, speaker, pitch, instruct);
const audioBuf = await audio.arrayBuffer();
const audioFile = new AttachmentBuilder(Buffer.from(audioBuf)).setName('mikuified.wav');
await interaction.editReply({
@@ -30,7 +35,22 @@ export = {
data: new SlashCommandBuilder()
.setName('tts')
.setDescription("Read text in Miku's voice")
.addStringOption((opt) => opt.setName('text').setDescription('Text').setRequired(true)),
.addStringOption((opt) => opt.setName('text').setDescription('Text').setRequired(true))
.addStringOption((opt) =>
opt.setName('speaker').setDescription('Speaker voice to use').setRequired(false)
)
.addIntegerOption((opt) =>
opt
.setName('pitch')
.setDescription('Pitch shift in semitones (default: 24)')
.setRequired(false)
)
.addStringOption((opt) =>
opt
.setName('instruct')
.setDescription('Instruction for how to speak the text')
.setRequired(false)
),
execute: ttsCommand,
config: config,
};

View File

@@ -284,15 +284,30 @@ async function sync(guilds: GuildManager) {
}
}
async function requestTTSResponse(txt: string): Promise<NodeFetchBlob> {
const queryParams = new URLSearchParams();
queryParams.append('token', process.env.LLM_TOKEN);
queryParams.append('text', txt);
const ttsEndpoint = `${process.env.LLM_HOST}/tts?${queryParams.toString()}`;
async function requestTTSResponse(
txt: string,
speaker?: string,
pitch?: number,
instruct?: string
): Promise<NodeFetchBlob> {
const ttsEndpoint = `${process.env.RVC_HOST}/tts-inference`;
logInfo(`[bot] Requesting TTS response for "${txt}"`);
const requestBody = {
text: txt,
language: 'English',
speaker: speaker || 'Ono_Anna',
instruct: instruct || 'Speak in a friendly and enthusiastic tone',
modelpath: 'model.pth',
f0_up_key: pitch ?? 0,
};
const res = await fetch(ttsEndpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody),
});
const resContents = await res.blob();
return resContents;