Use new TTS+RVC api

2026-03-01 01:20:59 -08:00
parent 1921e1b781
commit e48e74c20e
3 changed files with 46 additions and 9 deletions
@@ -14,6 +14,8 @@ OPENAI_HOST="http://localhost:1234/v1"
 REPLY_CHANCE=0.2

 RVC_HOST="http://127.0.0.1:8001"
+TTS_SPEAKER="Ono_Anna"
+TTS_PITCH="0"

 ENABLE_MOTD=1
 MOTD_CHANNEL="123456789012345678"
@@ -5,16 +5,21 @@ import { requestTTSResponse } from '../../util';

 const config = {
    ttsSettings: {
+        speaker: process.env.TTS_SPEAKER || 'Vivian',
        pitch_change_oct: 1,
-        pitch_change_sem: 0,
+        pitch_change_sem: parseInt(process.env.TTS_PITCH || '24', 10),
    },
 };

 async function ttsCommand(interaction: ChatInputCommandInteraction) {
    const text = interaction.options.getString('text');
+    const speaker = interaction.options.getString('speaker') || config.ttsSettings.speaker;
+    const pitch = interaction.options.getInteger('pitch') ?? config.ttsSettings.pitch_change_sem;
+    const instruct = interaction.options.getString('instruct');
+
    await interaction.reply(`generating audio for "${text}"...`);
    try {
-        const audio = await requestTTSResponse(text);
+        const audio = await requestTTSResponse(text, speaker, pitch, instruct);
        const audioBuf = await audio.arrayBuffer();
        const audioFile = new AttachmentBuilder(Buffer.from(audioBuf)).setName('mikuified.wav');
        await interaction.editReply({
@@ -30,7 +35,22 @@ export = {
    data: new SlashCommandBuilder()
        .setName('tts')
        .setDescription("Read text in Miku's voice")
-        .addStringOption((opt) => opt.setName('text').setDescription('Text').setRequired(true)),
+        .addStringOption((opt) => opt.setName('text').setDescription('Text').setRequired(true))
+        .addStringOption((opt) =>
+            opt.setName('speaker').setDescription('Speaker voice to use').setRequired(false)
+        )
+        .addIntegerOption((opt) =>
+            opt
+                .setName('pitch')
+                .setDescription('Pitch shift in semitones (default: 24)')
+                .setRequired(false)
+        )
+        .addStringOption((opt) =>
+            opt
+                .setName('instruct')
+                .setDescription('Instruction for how to speak the text')
+                .setRequired(false)
+        ),
    execute: ttsCommand,
    config: config,
 };
@@ -284,15 +284,30 @@ async function sync(guilds: GuildManager) {
    }
 }

-async function requestTTSResponse(txt: string): Promise<NodeFetchBlob> {
-    const queryParams = new URLSearchParams();
-    queryParams.append('token', process.env.LLM_TOKEN);
-    queryParams.append('text', txt);
-
-    const ttsEndpoint = `${process.env.LLM_HOST}/tts?${queryParams.toString()}`;
+async function requestTTSResponse(
+    txt: string,
+    speaker?: string,
+    pitch?: number,
+    instruct?: string
+): Promise<NodeFetchBlob> {
+    const ttsEndpoint = `${process.env.RVC_HOST}/tts-inference`;
    logInfo(`[bot] Requesting TTS response for "${txt}"`);
+
+    const requestBody = {
+        text: txt,
+        language: 'English',
+        speaker: speaker || 'Ono_Anna',
+        instruct: instruct || 'Speak in a friendly and enthusiastic tone',
+        modelpath: 'model.pth',
+        f0_up_key: pitch ?? 0,
+    };
+
    const res = await fetch(ttsEndpoint, {
        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+        },
+        body: JSON.stringify(requestBody),
    });
    const resContents = await res.blob();
    return resContents;