import { Message } from 'discord.js'; import { LLMProvider } from './provider'; import { OpenAI } from 'openai'; import 'dotenv/config'; import { serializeMessageHistory } from '../util'; import { logError, logInfo } from '../../logging'; import { LLMConfig } from '../commands/types'; const USER_PROMPT = `Complete the next message as Hatsune Miku. Return JSON with only the "content" field filled in. Conversation (last line is yours to complete): `; const USER_PROMPT_STREAMING = `Complete the next message as Hatsune Miku. Output ONLY the raw message content (no JSON, no quotes). Conversation (last line is yours to complete): `; export class OpenAIProvider implements LLMProvider { private client: OpenAI; private model: string; constructor( token: string | undefined = process.env.LLM_TOKEN, model = 'zai-org/glm-4.7-flash' ) { if (!token) { throw new TypeError( 'LLM token was not passed in, and environment variable LLM_TOKEN was unset!' ); } this.client = new OpenAI({ baseURL: process.env.OPENAI_HOST, apiKey: token, }); this.model = model; } name() { return `OpenAI (${this.model})`; } setModel(model: string) { this.model = model; } async requestLLMResponse( history: Message[], sysprompt: string, params: LLMConfig ): Promise { let messageList = await Promise.all(history.map(serializeMessageHistory)); messageList = messageList.filter((x) => !!x); if (messageList.length === 0) { throw new TypeError('No messages with content provided in history!'); } // dummy message for last line of prompt const lastMsg = messageList[messageList.length - 1]; // advance by 5 seconds let newDate = new Date(lastMsg!.timestamp); newDate.setSeconds(newDate.getSeconds() + 5); let templateMsgTxt = JSON.stringify({ timestamp: newDate.toUTCString(), author: 'Hatsune Miku', name: 'Hatsune Miku', context: lastMsg!.content, content: '...', }); const messageHistoryTxt = messageList.map((msg) => JSON.stringify(msg)).join('\n') + '\n' + templateMsgTxt; logInfo(`[openai] Requesting response for message history: ${messageHistoryTxt}`); try { const response = await this.client.chat.completions.create({ model: this.model, messages: [ { role: 'system', content: sysprompt }, { role: 'user', content: USER_PROMPT + messageHistoryTxt }, ], temperature: params?.temperature || 0.5, top_p: params?.top_p || 0.9, max_tokens: params?.max_new_tokens || 128, response_format: { type: 'json_schema', json_schema: { name: 'miku_message', schema: { type: 'object', properties: { content: { type: 'string', description: 'The message content as Hatsune Miku', }, }, required: ['content'], additionalProperties: false, }, }, }, }); let content = response.choices[0].message.content; if (!content) { throw new TypeError('OpenAI API returned no message.'); } logInfo(`[openai] API response: ${content}`); // Parse JSON and extract content field const parsed = JSON.parse(content); return parsed.content || ''; } catch (err) { logError(`[openai] API Error: ` + err); throw err; } } async *requestLLMResponseStreaming( history: Message[], sysprompt: string, params: LLMConfig ): AsyncGenerator<{ reasoning?: string; content?: string; done?: boolean }, string, unknown> { let messageList = await Promise.all(history.map(serializeMessageHistory)); messageList = messageList.filter((x) => !!x); if (messageList.length === 0) { throw new TypeError('No messages with content provided in history!'); } const lastMsg = messageList[messageList.length - 1]; let newDate = new Date(lastMsg!.timestamp); newDate.setSeconds(newDate.getSeconds() + 5); let templateMsgTxt = JSON.stringify({ timestamp: newDate.toUTCString(), author: 'Hatsune Miku', name: 'Hatsune Miku', context: lastMsg!.content, content: '...', }); const messageHistoryTxt = messageList.map((msg) => JSON.stringify(msg)).join('\n') + '\n' + templateMsgTxt; logInfo(`[openai] Requesting streaming response for message history: ${messageHistoryTxt}`); try { const stream = await this.client.chat.completions.create({ model: this.model, messages: [ { role: 'system', content: sysprompt }, { role: 'user', content: USER_PROMPT_STREAMING + messageHistoryTxt }, ], temperature: params?.temperature || 0.5, top_p: params?.top_p || 0.9, max_tokens: params?.max_new_tokens || 128, stream: true, }); let fullContent = ''; let reasoningContent = ''; let chunkCount = 0; for await (const chunk of stream) { chunkCount++; const delta = chunk.choices[0]?.delta; // Handle reasoning content if present (some models include it) // Also check for 'reasoning' field which some OpenAI-compatible APIs use const reasoningDelta = ('reasoning_content' in delta && delta.reasoning_content) || ('reasoning' in delta && delta.reasoning); if (reasoningDelta) { reasoningContent += reasoningDelta; yield { reasoning: reasoningContent }; } // Handle regular content if (delta.content) { fullContent += delta.content; yield { content: fullContent }; } } logInfo( `[openai] Streaming complete: ${chunkCount} chunks, ${fullContent.length} chars` ); // Strip tags if present if (fullContent.lastIndexOf('') > -1) { fullContent = fullContent.slice(fullContent.lastIndexOf('') + 8); } logInfo(`[openai] Streaming API response: ${fullContent}`); return fullContent; } catch (err) { logError(`[openai] Streaming API Error: ` + err); throw err; } } /** * Request a structured response for voice messages with message and instruct fields. * Uses OpenAI's structured outputs via JSON mode. */ async requestStructuredVoiceResponse( userText: string, sysprompt: string, params: LLMConfig ): Promise<{ message: string; instruct: string }> { const prompt = `You are Hatsune Miku. A user wants you to respond with a voice message. User message: "${userText}" Respond with a JSON object containing: - "message": Your spoken response as Miku (keep it concise, 1-3 sentences) - "instruct": A one-sentence instruction describing the expression/tone to use (e.g., "Speak cheerfully and energetically", "Whisper softly and sweetly") Return ONLY valid JSON, no other text.`; logInfo(`[openai] Requesting structured voice response for: "${userText}"`); try { const response = await this.client.chat.completions.create({ model: this.model, messages: [ { role: 'system', content: sysprompt }, { role: 'user', content: prompt }, ], temperature: params?.temperature || 0.7, top_p: params?.top_p || 0.9, max_tokens: params?.max_new_tokens || 256, response_format: { type: 'json_schema', json_schema: { name: 'voice_message_response', schema: { type: 'object', properties: { message: { type: 'string', description: 'Your spoken response as Miku (keep it concise, 1-3 sentences)', }, instruct: { type: 'string', description: 'A one-sentence instruction describing the expression/tone to use', }, }, required: ['message', 'instruct'], additionalProperties: false, }, }, }, }); let content = response.choices[0].message.content; if (!content) { throw new TypeError('OpenAI API returned no message.'); } logInfo(`[openai] Structured API response: ${content}`); // Parse and validate JSON response const parsed = JSON.parse(content); return { message: parsed.message || 'Hello! I am Miku~ ♪', instruct: parsed.instruct || 'Speak in a friendly and enthusiastic tone', }; } catch (err) { logError(`[openai] Structured API Error: ` + err); throw err; } } }