FemScoreboard/discord/provider/openai.ts

import { Message } from 'discord.js';
import { LLMProvider } from './provider';
import { OpenAI } from 'openai';
import 'dotenv/config';
import { serializeMessageHistory } from '../util';
import { logError, logInfo } from '../../logging';
import { LLMConfig } from '../commands/types';

const USER_PROMPT = `Complete the next message as Hatsune Miku. Return JSON with only the "content" field filled in.

Conversation (last line is yours to complete):

`;

const USER_PROMPT_STREAMING = `Complete the next message as Hatsune Miku. Output ONLY the raw message content (no JSON, no quotes).

Conversation (last line is yours to complete):

`;

export class OpenAIProvider implements LLMProvider {
    private client: OpenAI;
    private model: string;

    constructor(
        token: string | undefined = process.env.LLM_TOKEN,
        model = 'zai-org/glm-4.7-flash'
    ) {
        if (!token) {
            throw new TypeError(
                'LLM token was not passed in, and environment variable LLM_TOKEN was unset!'
            );
        }
        this.client = new OpenAI({
            baseURL: process.env.OPENAI_HOST,
            apiKey: token,
        });
        this.model = model;
    }

    name() {
        return `OpenAI (${this.model})`;
    }

    setModel(model: string) {
        this.model = model;
    }

    async requestLLMResponse(
        history: Message[],
        sysprompt: string,
        params: LLMConfig
    ): Promise<string> {
        let messageList = await Promise.all(history.map(serializeMessageHistory));
        messageList = messageList.filter((x) => !!x);

        if (messageList.length === 0) {
            throw new TypeError('No messages with content provided in history!');
        }

        // dummy message for last line of prompt
        const lastMsg = messageList[messageList.length - 1];

        // advance by 5 seconds
        let newDate = new Date(lastMsg!.timestamp);
        newDate.setSeconds(newDate.getSeconds() + 5);

        let templateMsgTxt = JSON.stringify({
            timestamp: newDate.toUTCString(),
            author: 'Hatsune Miku',
            name: 'Hatsune Miku',
            context: lastMsg!.content,
            content: '...',
        });

        const messageHistoryTxt =
            messageList.map((msg) => JSON.stringify(msg)).join('\n') + '\n' + templateMsgTxt;
        logInfo(`[openai] Requesting response for message history: ${messageHistoryTxt}`);

        try {
            const response = await this.client.chat.completions.create({
                model: this.model,
                messages: [
                    { role: 'system', content: sysprompt },
                    { role: 'user', content: USER_PROMPT + messageHistoryTxt },
                ],
                temperature: params?.temperature || 0.5,
                top_p: params?.top_p || 0.9,
                max_tokens: params?.max_new_tokens || 128,
                response_format: {
                    type: 'json_schema',
                    json_schema: {
                        name: 'miku_message',
                        schema: {
                            type: 'object',
                            properties: {
                                content: {
                                    type: 'string',
                                    description: 'The message content as Hatsune Miku',
                                },
                            },
                            required: ['content'],
                            additionalProperties: false,
                        },
                    },
                },
            });

            let content = response.choices[0].message.content;
            if (!content) {
                throw new TypeError('OpenAI API returned no message.');
            }

            logInfo(`[openai] API response: ${content}`);

            // Parse JSON and extract content field
            const parsed = JSON.parse(content);
            return parsed.content || '';
        } catch (err) {
            logError(`[openai] API Error: ` + err);
            throw err;
        }
    }

    async *requestLLMResponseStreaming(
        history: Message[],
        sysprompt: string,
        params: LLMConfig
    ): AsyncGenerator<{ reasoning?: string; content?: string; done?: boolean }, string, unknown> {
        let messageList = await Promise.all(history.map(serializeMessageHistory));
        messageList = messageList.filter((x) => !!x);

        if (messageList.length === 0) {
            throw new TypeError('No messages with content provided in history!');
        }

        const lastMsg = messageList[messageList.length - 1];
        let newDate = new Date(lastMsg!.timestamp);
        newDate.setSeconds(newDate.getSeconds() + 5);

        let templateMsgTxt = JSON.stringify({
            timestamp: newDate.toUTCString(),
            author: 'Hatsune Miku',
            name: 'Hatsune Miku',
            context: lastMsg!.content,
            content: '...',
        });

        const messageHistoryTxt =
            messageList.map((msg) => JSON.stringify(msg)).join('\n') + '\n' + templateMsgTxt;
        logInfo(`[openai] Requesting streaming response for message history: ${messageHistoryTxt}`);

        try {
            const stream = await this.client.chat.completions.create({
                model: this.model,
                messages: [
                    { role: 'system', content: sysprompt },
                    { role: 'user', content: USER_PROMPT_STREAMING + messageHistoryTxt },
                ],
                temperature: params?.temperature || 0.5,
                top_p: params?.top_p || 0.9,
                max_tokens: params?.max_new_tokens || 128,
                stream: true,
            });

            let fullContent = '';
            let reasoningContent = '';
            let chunkCount = 0;

            for await (const chunk of stream) {
                chunkCount++;
                const delta = chunk.choices[0]?.delta;

                // Handle reasoning content if present (some models include it)
                // Also check for 'reasoning' field which some OpenAI-compatible APIs use
                const reasoningDelta =
                    ('reasoning_content' in delta && delta.reasoning_content) ||
                    ('reasoning' in delta && delta.reasoning);
                if (reasoningDelta) {
                    reasoningContent += reasoningDelta;
                    yield { reasoning: reasoningContent };
                }

                // Handle regular content
                if (delta.content) {
                    fullContent += delta.content;
                    yield { content: fullContent };
                }
            }

            logInfo(
                `[openai] Streaming complete: ${chunkCount} chunks, ${fullContent.length} chars`
            );

            // Strip </think> tags if present
            if (fullContent.lastIndexOf('</think>') > -1) {
                fullContent = fullContent.slice(fullContent.lastIndexOf('</think>') + 8);
            }

            logInfo(`[openai] Streaming API response: ${fullContent}`);
            return fullContent;
        } catch (err) {
            logError(`[openai] Streaming API Error: ` + err);
            throw err;
        }
    }

    /**
     * Request a structured response for voice messages with message and instruct fields.
     * Uses OpenAI's structured outputs via JSON mode.
     */
    async requestStructuredVoiceResponse(
        userText: string,
        sysprompt: string,
        params: LLMConfig
    ): Promise<{ message: string; instruct: string }> {
        const prompt = `You are Hatsune Miku. A user wants you to respond with a voice message.

User message: "${userText}"

Respond with a JSON object containing:
- "message": Your spoken response as Miku (keep it concise, 1-3 sentences)
- "instruct": A one-sentence instruction describing the expression/tone to use (e.g., "Speak cheerfully and energetically", "Whisper softly and sweetly")

Return ONLY valid JSON, no other text.`;

        logInfo(`[openai] Requesting structured voice response for: "${userText}"`);

        try {
            const response = await this.client.chat.completions.create({
                model: this.model,
                messages: [
                    { role: 'system', content: sysprompt },
                    { role: 'user', content: prompt },
                ],
                temperature: params?.temperature || 0.7,
                top_p: params?.top_p || 0.9,
                max_tokens: params?.max_new_tokens || 256,
                response_format: {
                    type: 'json_schema',
                    json_schema: {
                        name: 'voice_message_response',
                        schema: {
                            type: 'object',
                            properties: {
                                message: {
                                    type: 'string',
                                    description:
                                        'Your spoken response as Miku (keep it concise, 1-3 sentences)',
                                },
                                instruct: {
                                    type: 'string',
                                    description:
                                        'A one-sentence instruction describing the expression/tone to use',
                                },
                            },
                            required: ['message', 'instruct'],
                            additionalProperties: false,
                        },
                    },
                },
            });

            let content = response.choices[0].message.content;
            if (!content) {
                throw new TypeError('OpenAI API returned no message.');
            }

            logInfo(`[openai] Structured API response: ${content}`);

            // Parse and validate JSON response
            const parsed = JSON.parse(content);
            return {
                message: parsed.message || 'Hello! I am Miku~ ♪',
                instruct: parsed.instruct || 'Speak in a friendly and enthusiastic tone',
            };
        } catch (err) {
            logError(`[openai] Structured API Error: ` + err);
            throw err;
        }
    }
}