Structured outputs for regular replies; streaming can be enabled/disabled

2026-03-01 19:25:15 -08:00
parent 15cffb3b66
commit 907a7caec6
7 changed files with 218 additions and 18 deletions
--- a/discord/tests/config.test.ts
+++ b/discord/tests/config.test.ts
@@ -11,6 +11,7 @@ jest.mock('discord.js', () => {
            setDescription: jest.fn().mockReturnThis(),
            addNumberOption: jest.fn().mockReturnThis(),
            addIntegerOption: jest.fn().mockReturnThis(),
+            addBooleanOption: jest.fn().mockReturnThis(),
        })),
    };
 });
@@ -23,6 +24,7 @@ describe('config command (llmconf)', () => {
        options: {
            getInteger: jest.Mock;
            getNumber: jest.Mock;
+            getBoolean: jest.Mock;
        };
        reply: jest.Mock;
    };
@@ -30,11 +32,22 @@ describe('config command (llmconf)', () => {
    beforeEach(() => {
        jest.clearAllMocks();
        process.env.ADMIN = '123456789012345678';
+        // Reset config to defaults
+        const state = configCommand.state();
+        state.max_new_tokens = 1500;
+        state.min_new_tokens = 1;
+        state.temperature = 0.8;
+        state.top_p = 0.6;
+        state.msg_context = 8;
+        state.frequency_penalty = 0.0;
+        state.presence_penalty = 0.0;
+        state.streaming = false;
        mockInteraction = {
            user: { id: '123456789012345678' },
            options: {
                getInteger: jest.fn(),
                getNumber: jest.fn(),
+                getBoolean: jest.fn(),
            },
            reply: jest.fn(),
        };
@@ -98,4 +111,47 @@ describe('config command (llmconf)', () => {
        const state = configCommand.state();
        expect(state.msg_context).toBe(16);
    });
+
+    it('should accept custom streaming value (true)', async () => {
+        mockInteraction.options.getBoolean.mockImplementation((name: string) => {
+            if (name === 'streaming') return true;
+            return null;
+        });
+
+        await configCommand.execute(mockInteraction);
+
+        const state = configCommand.state();
+        expect(state.streaming).toBe(true);
+    });
+
+    it('should accept custom streaming value (false)', async () => {
+        mockInteraction.options.getBoolean.mockImplementation((name: string) => {
+            if (name === 'streaming') return false;
+            return null;
+        });
+
+        await configCommand.execute(mockInteraction);
+
+        const state = configCommand.state();
+        expect(state.streaming).toBe(false);
+    });
+
+    it('should use default streaming value when not provided', async () => {
+        mockInteraction.options.getBoolean.mockReturnValue(null);
+
+        await configCommand.execute(mockInteraction);
+
+        const state = configCommand.state();
+        expect(state.streaming).toBe(false);
+    });
+
+    it('should include streaming in config output', async () => {
+        mockInteraction.options.getBoolean.mockReturnValue(null);
+
+        await configCommand.execute(mockInteraction);
+
+        expect(mockInteraction.reply).toHaveBeenCalled();
+        const replyContent = mockInteraction.reply.mock.calls[0][0];
+        expect(replyContent).toContain('streaming =');
+    });
 });
--- a/discord/tests/ollama_provider.test.ts
+++ b/discord/tests/ollama_provider.test.ts
@@ -40,6 +40,7 @@ describe('OllamaProvider', () => {
        frequency_penalty: 0.0,
        presence_penalty: 0.0,
        msg_context: 8,
+        streaming: true,
    };

    beforeEach(() => {
@@ -186,6 +187,7 @@ describe('OllamaProvider streaming', () => {
        frequency_penalty: 0.0,
        presence_penalty: 0.0,
        msg_context: 8,
+        streaming: true,
    };

    beforeEach(() => {
--- a/discord/tests/openai_provider.test.ts
+++ b/discord/tests/openai_provider.test.ts
@@ -46,6 +46,7 @@ describe('OpenAIProvider', () => {
        frequency_penalty: 0.0,
        presence_penalty: 0.0,
        msg_context: 8,
+        streaming: true,
    };

    beforeEach(() => {
@@ -84,7 +85,7 @@ describe('OpenAIProvider', () => {
            choices: [
                {
                    message: {
-                        content: 'Hello! This is a test response.',
+                        content: '{"content": "Hello! This is a test response."}',
                    },
                },
            ],
@@ -105,6 +106,29 @@ describe('OpenAIProvider', () => {

        expect(response).toBe('Hello! This is a test response.');
        expect(mockCreate).toHaveBeenCalled();
+
+        // Verify structured output format is used
+        expect(mockCreate).toHaveBeenCalledWith(
+            expect.objectContaining({
+                response_format: {
+                    type: 'json_schema',
+                    json_schema: {
+                        name: 'miku_message',
+                        schema: {
+                            type: 'object',
+                            properties: {
+                                content: {
+                                    type: 'string',
+                                    description: 'The message content as Hatsune Miku',
+                                },
+                            },
+                            required: ['content'],
+                            additionalProperties: false,
+                        },
+                    },
+                },
+            })
+        );
    });

    it('should handle empty response from API', async () => {
@@ -165,7 +189,7 @@ describe('OpenAIProvider', () => {
            choices: [
                {
                    message: {
-                        content: 'Response with defaults',
+                        content: '{"content": "Response with defaults"}',
                    },
                },
            ],
@@ -189,16 +213,22 @@ describe('OpenAIProvider', () => {
                temperature: 0.5,
                top_p: 0.9,
                max_tokens: 128,
+                response_format: {
+                    type: 'json_schema',
+                    json_schema: expect.objectContaining({
+                        name: 'miku_message',
+                    }),
+                },
            })
        );
    });

-    it('should strip </think> tags from response', async () => {
+    it('should parse JSON response and extract content field', async () => {
        mockCreate.mockResolvedValue({
            choices: [
                {
                    message: {
-                        content: '</think>Hello! This is the actual response.',
+                        content: '{"content": "Hello! This is the actual response."}',
                    },
                },
            ],
@@ -219,6 +249,33 @@ describe('OpenAIProvider', () => {

        expect(response).toBe('Hello! This is the actual response.');
    });
+
+    it('should handle empty content field in JSON response', async () => {
+        mockCreate.mockResolvedValue({
+            choices: [
+                {
+                    message: {
+                        content: '{"content": ""}',
+                    },
+                },
+            ],
+        });
+
+        const mockMessage = {
+            cleanContent: 'Hello!',
+            createdAt: new Date(),
+            author: { username: 'testuser' },
+        } as unknown as import('discord.js').Message;
+
+        const provider = new OpenAIProvider('test-token', 'gpt-4');
+        const response = await provider.requestLLMResponse(
+            [mockMessage],
+            'You are a helpful assistant',
+            mockConfig
+        );
+
+        expect(response).toBe('');
+    });
 });

 describe('OpenAIProvider streaming', () => {
@@ -230,6 +287,7 @@ describe('OpenAIProvider streaming', () => {
        frequency_penalty: 0.0,
        presence_penalty: 0.0,
        msg_context: 8,
+        streaming: true,
    };

    beforeEach(() => {
@@ -354,6 +412,7 @@ describe('OpenAIProvider structured voice response', () => {
        frequency_penalty: 0.0,
        presence_penalty: 0.0,
        msg_context: 8,
+        streaming: true,
    };

    beforeEach(() => {
@@ -390,12 +449,20 @@ describe('OpenAIProvider structured voice response', () => {
        });
        expect(mockCreate).toHaveBeenCalledWith(
            expect.objectContaining({
-                response_format: { type: 'json_object' },
+                response_format: {
+                    type: 'json_schema',
+                    json_schema: {
+                        name: 'voice_message_response',
+                        schema: expect.objectContaining({
+                            type: 'object',
+                        }),
+                    },
+                },
            })
        );
    });

-    it('should use json_object response format', async () => {
+    it('should use json_schema response format', async () => {
        mockCreate.mockResolvedValue({
            choices: [
                {
@@ -410,7 +477,29 @@ describe('OpenAIProvider structured voice response', () => {
        await provider.requestStructuredVoiceResponse('Test message', 'You are Miku', mockConfig);

        const callArgs = mockCreate.mock.calls[0][0];
-        expect(callArgs.response_format).toEqual({ type: 'json_object' });
+        expect(callArgs.response_format).toEqual({
+            type: 'json_schema',
+            json_schema: {
+                name: 'voice_message_response',
+                schema: {
+                    type: 'object',
+                    properties: {
+                        message: {
+                            type: 'string',
+                            description:
+                                'Your spoken response as Miku (keep it concise, 1-3 sentences)',
+                        },
+                        instruct: {
+                            type: 'string',
+                            description:
+                                'A one-sentence instruction describing the expression/tone to use',
+                        },
+                    },
+                    required: ['message', 'instruct'],
+                    additionalProperties: false,
+                },
+            },
+        });
    });

    it('should handle empty response from API', async () => {
--- a/discord/bot.ts
+++ b/discord/bot.ts
@@ -249,11 +249,12 @@ async function onNewMessage(message: Message) {

        // Check if provider supports streaming
        const provider = state.provider!();
+        const useStreaming = provider.requestLLMResponseStreaming && state.llmconf!().streaming;
        logInfo(
-            `[bot] Provider: ${provider.name()}, streaming supported: ${!!provider.requestLLMResponseStreaming}`
+            `[bot] Provider: ${provider.name()}, streaming supported: ${!!provider.requestLLMResponseStreaming}, streaming enabled: ${useStreaming}`
        );

-        if (provider.requestLLMResponseStreaming) {
+        if (useStreaming) {
            // Use streaming - accumulate all chunks, show only the delta (newest piece) in embed
            let lastUpdateTime = Date.now();
            const updateIntervalMs = 1500; // Update every ~1.5 seconds
--- a/discord/commands/config/config.ts
+++ b/discord/commands/config/config.ts
@@ -10,6 +10,7 @@ const config: LLMConfig = {
    msg_context: 8,
    frequency_penalty: 0.0,
    presence_penalty: 0.0,
+    streaming: false,
 };

 async function configCommand(interaction: ChatInputCommandInteraction) {
@@ -29,6 +30,7 @@ async function configCommand(interaction: ChatInputCommandInteraction) {
        interaction.options.getNumber('frequency_penalty') ?? config.frequency_penalty;
    config.presence_penalty =
        interaction.options.getNumber('presence_penalty') ?? config.presence_penalty;
+    config.streaming = interaction.options.getBoolean('streaming') ?? config.streaming;
    await interaction.reply(`
 \`\`\`
 max_new_tokens = ${config.max_new_tokens}
@@ -38,6 +40,7 @@ temperature = ${config.temperature}
 top_p = ${config.top_p}
 frequency_penalty = ${config.frequency_penalty}
 presence_penalty = ${config.presence_penalty}
+streaming = ${config.streaming}
 \`\`\`
    `);
 }
@@ -80,6 +83,11 @@ export = {
        )
        .addIntegerOption((opt) =>
            opt.setName('msg_context').setDescription('Num. messages in context (default: 8)')
+        )
+        .addBooleanOption((opt) =>
+            opt
+                .setName('streaming')
+                .setDescription('Enable/disable streaming responses (default: true)')
        ),
    execute: configCommand,
    state: () => config,
--- a/discord/commands/types.ts
+++ b/discord/commands/types.ts
@@ -6,4 +6,5 @@ export interface LLMConfig {
    frequency_penalty: number;
    presence_penalty: number;
    msg_context: number;
+    streaming: boolean;
 }
--- a/discord/provider/openai.ts
+++ b/discord/provider/openai.ts
@@ -6,11 +6,15 @@ import { serializeMessageHistory } from '../util';
 import { logError, logInfo } from '../../logging';
 import { LLMConfig } from '../commands/types';

-const USER_PROMPT = `Continue the following Discord conversation by completing the next message, playing the role of Hatsune Miku. The conversation must progress forward, and you must avoid repeating yourself.
+const USER_PROMPT = `Complete the next message as Hatsune Miku. Return JSON with only the "content" field filled in.

-Each message is represented as a line of JSON. Refer to other users by their "name" instead of their "author" field whenever possible.
+Conversation (last line is yours to complete):

-The conversation is as follows. The last line is the message you have to complete. Please ONLY return the string contents of the "content" field, that go in place of the ellipses. Do not include the enclosing quotation marks in your response.
+`;
+
+const USER_PROMPT_STREAMING = `Complete the next message as Hatsune Miku. Output ONLY the raw message content (no JSON, no quotes).
+
+Conversation (last line is yours to complete):

 `;

@@ -83,18 +87,35 @@ export class OpenAIProvider implements LLMProvider {
                temperature: params?.temperature || 0.5,
                top_p: params?.top_p || 0.9,
                max_tokens: params?.max_new_tokens || 128,
+                response_format: {
+                    type: 'json_schema',
+                    json_schema: {
+                        name: 'miku_message',
+                        schema: {
+                            type: 'object',
+                            properties: {
+                                content: {
+                                    type: 'string',
+                                    description: 'The message content as Hatsune Miku',
+                                },
+                            },
+                            required: ['content'],
+                            additionalProperties: false,
+                        },
+                    },
+                },
            });

            let content = response.choices[0].message.content;
            if (!content) {
                throw new TypeError('OpenAI API returned no message.');
            }
-            if (content.lastIndexOf('</think>') > -1) {
-                content = content.slice(content.lastIndexOf('</think>') + 8);
-            }
+
            logInfo(`[openai] API response: ${content}`);

-            return content;
+            // Parse JSON and extract content field
+            const parsed = JSON.parse(content);
+            return parsed.content || '';
        } catch (err) {
            logError(`[openai] API Error: ` + err);
            throw err;
@@ -134,7 +155,7 @@ export class OpenAIProvider implements LLMProvider {
                model: this.model,
                messages: [
                    { role: 'system', content: sysprompt },
-                    { role: 'user', content: USER_PROMPT + messageHistoryTxt },
+                    { role: 'user', content: USER_PROMPT_STREAMING + messageHistoryTxt },
                ],
                temperature: params?.temperature || 0.5,
                top_p: params?.top_p || 0.9,
@@ -215,7 +236,29 @@ Return ONLY valid JSON, no other text.`;
                temperature: params?.temperature || 0.7,
                top_p: params?.top_p || 0.9,
                max_tokens: params?.max_new_tokens || 256,
-                response_format: { type: 'json_object' },
+                response_format: {
+                    type: 'json_schema',
+                    json_schema: {
+                        name: 'voice_message_response',
+                        schema: {
+                            type: 'object',
+                            properties: {
+                                message: {
+                                    type: 'string',
+                                    description:
+                                        'Your spoken response as Miku (keep it concise, 1-3 sentences)',
+                                },
+                                instruct: {
+                                    type: 'string',
+                                    description:
+                                        'A one-sentence instruction describing the expression/tone to use',
+                                },
+                            },
+                            required: ['message', 'instruct'],
+                            additionalProperties: false,
+                        },
+                    },
+                },
            });

            let content = response.choices[0].message.content;