From afe9ba87755a7b196a485a31eadb1afa82417bde Mon Sep 17 00:00:00 2001 From: keitakn Date: Wed, 29 Jan 2025 16:53:33 +0900 Subject: [PATCH] =?UTF-8?q?:art:=20#21=20=E3=81=AB=E3=81=98=E3=83=9C?= =?UTF-8?q?=E3=82=A4=E3=82=B9=E3=81=A7=E9=9F=B3=E5=A3=B0=E3=82=92=E7=94=9F?= =?UTF-8?q?=E6=88=90=E3=81=99=E3=82=8B=E5=87=A6=E7=90=86=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- frontend/src/app/api/voices/route.ts | 92 +++++++++++++++++++ .../voice-chat/_components/VoiceChatForm.tsx | 36 +++++++- 2 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 frontend/src/app/api/voices/route.ts diff --git a/frontend/src/app/api/voices/route.ts b/frontend/src/app/api/voices/route.ts new file mode 100644 index 0000000..94a5e39 --- /dev/null +++ b/frontend/src/app/api/voices/route.ts @@ -0,0 +1,92 @@ +import z from 'zod'; + +function getAcceptableScriptLength() { + return 2000; +} + +function isSurrogatePear(upper: number, lower: number): boolean { + return upper >= 0xD800 && upper <= 0xDBFF && lower >= 0xDC00 && lower <= 0xDFFF; +} + +function mbStrLen(str: string): number { + let ret = 0; + + for (let i = 0; i < str.length; i++, ret++) { + const upper = str.charCodeAt(i); + const lower = str.length > i + 1 ? str.charCodeAt(i + 1) : 0; + + if (isSurrogatePear(upper, lower)) { + i++; + } + } + + return ret; +} +function isAcceptableScript(script: unknown): boolean { + if (typeof script !== 'string') { + return false; + } + + return mbStrLen(script) <= getAcceptableScriptLength(); +} + +const generateVoiceRequestSchema = z.object({ + script: z + .string({ message: '文章は必須です。' }) + .refine(value => isAcceptableScript(value), { + message: `文章は2000文字まで入力が可能です。`, + }), +}); + +const nijivoiceGeneratedVoiceSchema = z.object({ + base64Audio: z.string().base64(), + duration: z.number().nonnegative(), + remainingCredits: z.number().nonnegative(), +}); + +const nijivoiceGenerateVoiceResponseBodySchema = z.object({ + generatedVoice: nijivoiceGeneratedVoiceSchema, +}); + +type NijivoiceGenerateVoiceResponseBody = z.infer; + +function isNijivoiceGenerateVoiceResponseBody(value: unknown): value is NijivoiceGenerateVoiceResponseBody { + const result = nijivoiceGenerateVoiceResponseBodySchema.safeParse(value); + + return result.success; +} + +export const runtime = 'edge'; + +export async function POST(request: Request) { + const requestBody = await request.json(); + + generateVoiceRequestSchema.parse(requestBody); + + // https://app.nijivoice.com/characters/16e979a8-cd0f-49d4-a4c4-7a25aa42e184 を利用 + const url = 'https://api.nijivoice.com/api/platform/v1/voice-actors/16e979a8-cd0f-49d4-a4c4-7a25aa42e184/generate-encoded-voice'; + const options = { + method: 'POST', + headers: { + 'x-api-key': String(process.env.NIJIVOICE_API_KEY), + 'accept': 'application/json', + 'content-type': 'application/json', + }, + body: JSON.stringify({ + script: requestBody.script, + format: 'wav', + // 「ぽの」の推奨スピードは0.8なので0.8に設定 + // https://app.nijivoice.com/characters/16e979a8-cd0f-49d4-a4c4-7a25aa42e184 + speed: '0.8', + }), + } as const; + + const response = await fetch(url, options); + + const responseBody = await response.json(); + if (isNijivoiceGenerateVoiceResponseBody(responseBody)) { + return Response.json({ base64Audio: responseBody.generatedVoice.base64Audio }, { status: 201 }); + } + + return Response.json({ requestBody }); +} diff --git a/frontend/src/app/voice-chat/_components/VoiceChatForm.tsx b/frontend/src/app/voice-chat/_components/VoiceChatForm.tsx index 1bd45b4..7bcd2e9 100644 --- a/frontend/src/app/voice-chat/_components/VoiceChatForm.tsx +++ b/frontend/src/app/voice-chat/_components/VoiceChatForm.tsx @@ -346,7 +346,7 @@ export function VoiceChatForm() { // Update getEphemeralToken to use the backend endpoint const getEphemeralToken = async () => { - const response = await fetch('http://localhost:8000/realtime-apis/voice-chat', { + const response = await fetch(String(process.env.NEXT_PUBLIC_EPHEMERAL_TOKEN_ENDPOINT), { method: 'POST', }); @@ -365,7 +365,7 @@ export function VoiceChatForm() { let newResponseMessage = ''; // Handle incoming messages from the data channel - const handleDataChannelMessage = (event: MessageEvent) => { + const handleDataChannelMessage = async (event: MessageEvent) => { try { const msg = JSON.parse(event.data); // log.debug('Received message:', msg); @@ -388,11 +388,41 @@ export function VoiceChatForm() { case 'response.text.done': if (newResponseMessage !== '') { const lastAssistantMessage = newResponseMessage; - // TODO: lastAssistantMessageを使って音声を再生する + + // メッセージを追加 setMessages(prev => [...prev, { role: 'assistant', message: lastAssistantMessage, }]); + + newResponseMessage = ''; + setStreamingMessage(''); + + // 音声を生成して再生 + try { + const response = await fetch('/api/voices', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + script: lastAssistantMessage, + }), + }); + + if (!response.ok) { + throw new Error('音声生成に失敗しました'); + } + + const audioData = await response.json(); + if (audioData.base64Audio) { + await playAudio(audioData.base64Audio); + } + } + catch (error) { + log.error('音声生成エラー:', error); + } + newResponseMessage = ''; setStreamingMessage(''); }