From e48e74c20e733743fa074039d3b1501421cd6de0 Mon Sep 17 00:00:00 2001 From: james Date: Sun, 1 Mar 2026 01:20:59 -0800 Subject: [PATCH] Use new TTS+RVC api --- discord/.env.example | 2 ++ discord/commands/tts/tts.ts | 26 +++++++++++++++++++++++--- discord/util.ts | 27 +++++++++++++++++++++------ 3 files changed, 46 insertions(+), 9 deletions(-) diff --git a/discord/.env.example b/discord/.env.example index 052ff42..ddbf5e0 100644 --- a/discord/.env.example +++ b/discord/.env.example @@ -14,6 +14,8 @@ OPENAI_HOST="http://localhost:1234/v1" REPLY_CHANCE=0.2 RVC_HOST="http://127.0.0.1:8001" +TTS_SPEAKER="Ono_Anna" +TTS_PITCH="0" ENABLE_MOTD=1 MOTD_CHANNEL="123456789012345678" diff --git a/discord/commands/tts/tts.ts b/discord/commands/tts/tts.ts index b7a98bb..8e4b2d3 100644 --- a/discord/commands/tts/tts.ts +++ b/discord/commands/tts/tts.ts @@ -5,16 +5,21 @@ import { requestTTSResponse } from '../../util'; const config = { ttsSettings: { + speaker: process.env.TTS_SPEAKER || 'Vivian', pitch_change_oct: 1, - pitch_change_sem: 0, + pitch_change_sem: parseInt(process.env.TTS_PITCH || '24', 10), }, }; async function ttsCommand(interaction: ChatInputCommandInteraction) { const text = interaction.options.getString('text'); + const speaker = interaction.options.getString('speaker') || config.ttsSettings.speaker; + const pitch = interaction.options.getInteger('pitch') ?? config.ttsSettings.pitch_change_sem; + const instruct = interaction.options.getString('instruct'); + await interaction.reply(`generating audio for "${text}"...`); try { - const audio = await requestTTSResponse(text); + const audio = await requestTTSResponse(text, speaker, pitch, instruct); const audioBuf = await audio.arrayBuffer(); const audioFile = new AttachmentBuilder(Buffer.from(audioBuf)).setName('mikuified.wav'); await interaction.editReply({ @@ -30,7 +35,22 @@ export = { data: new SlashCommandBuilder() .setName('tts') .setDescription("Read text in Miku's voice") - .addStringOption((opt) => opt.setName('text').setDescription('Text').setRequired(true)), + .addStringOption((opt) => opt.setName('text').setDescription('Text').setRequired(true)) + .addStringOption((opt) => + opt.setName('speaker').setDescription('Speaker voice to use').setRequired(false) + ) + .addIntegerOption((opt) => + opt + .setName('pitch') + .setDescription('Pitch shift in semitones (default: 24)') + .setRequired(false) + ) + .addStringOption((opt) => + opt + .setName('instruct') + .setDescription('Instruction for how to speak the text') + .setRequired(false) + ), execute: ttsCommand, config: config, }; diff --git a/discord/util.ts b/discord/util.ts index bae6871..7af8498 100644 --- a/discord/util.ts +++ b/discord/util.ts @@ -284,15 +284,30 @@ async function sync(guilds: GuildManager) { } } -async function requestTTSResponse(txt: string): Promise { - const queryParams = new URLSearchParams(); - queryParams.append('token', process.env.LLM_TOKEN); - queryParams.append('text', txt); - - const ttsEndpoint = `${process.env.LLM_HOST}/tts?${queryParams.toString()}`; +async function requestTTSResponse( + txt: string, + speaker?: string, + pitch?: number, + instruct?: string +): Promise { + const ttsEndpoint = `${process.env.RVC_HOST}/tts-inference`; logInfo(`[bot] Requesting TTS response for "${txt}"`); + + const requestBody = { + text: txt, + language: 'English', + speaker: speaker || 'Ono_Anna', + instruct: instruct || 'Speak in a friendly and enthusiastic tone', + modelpath: 'model.pth', + f0_up_key: pitch ?? 0, + }; + const res = await fetch(ttsEndpoint, { method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(requestBody), }); const resContents = await res.blob(); return resContents;