Skip to content

Commit

Permalink
Merge pull request #30 from keitakn/feature/issue29
Browse files Browse the repository at this point in the history
にじボイスのAPIからBase64の音声データを返すAPIが出たのでそれを利用するように変更
  • Loading branch information
keitakn authored Jan 13, 2025
2 parents a1ee62a + 7297163 commit 178c1d6
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 221 deletions.
4 changes: 0 additions & 4 deletions backend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ MacOSを利用する前提の手順になります。
```bash
export GEMINI_API_KEY="https://aistudio.google.com/ で発行したAPIキー"
export NIJIVOICE_API_KEY="https://platform.nijivoice.com/ で発行したAPIキー"
export R2_ENDPOINT_URL="Cloudflareで作成したR2バケットのエンドポイントURLを指定(S3 APIの値)"
export R2_ACCESS_KEY_ID="Cloudflareで作成したアクセスキーID"
export R2_SECRET_ACCESS_KEY="Cloudflareで作成したアクセスシークレットキー"
export R2_BUCKET_NAME="Cloudflareで作成したR2バケット名"
```

### uvのインストール
Expand Down
4 changes: 0 additions & 4 deletions backend/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@ services:
environment:
GEMINI_API_KEY: ${GEMINI_API_KEY}
NIJIVOICE_API_KEY: ${NIJIVOICE_API_KEY}
R2_ENDPOINT_URL: ${R2_ENDPOINT_URL}
R2_ACCESS_KEY_ID: ${R2_ACCESS_KEY_ID}
R2_SECRET_ACCESS_KEY: ${R2_SECRET_ACCESS_KEY}
R2_BUCKET_NAME: ${R2_BUCKET_NAME}
volumes:
- ./Makefile:/Makefile
- ./pyproject.toml:/pyproject.toml
Expand Down
1 change: 0 additions & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ description = "AIとのリアルタイムなやり取りを行う為の実験用
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"boto3>=1.35.91",
"fastapi>=0.115.6",
"google-genai>=0.4.0",
"types-requests>=2.32.0.20241016",
Expand Down
73 changes: 7 additions & 66 deletions backend/src/presentation/router/realtime_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,62 +8,12 @@
from google import genai
from google.genai.live import AsyncSession # noqa: F401
from log.logger import AppLogger
import boto3
from botocore.config import Config
import uuid
from datetime import datetime

router = APIRouter()
app_logger = AppLogger()

# R2の設定
r2 = boto3.client(
"s3",
endpoint_url=os.getenv("R2_ENDPOINT_URL"),
aws_access_key_id=os.getenv("R2_ACCESS_KEY_ID"),
aws_secret_access_key=os.getenv("R2_SECRET_ACCESS_KEY"),
config=Config(signature_version="s3v4"),
region_name="auto",
)

R2_BUCKET_NAME = os.getenv("R2_BUCKET_NAME")


async def upload_to_r2(audio_url: str) -> str:
"""
TTSから取得した音声ファイルをR2にアップロードし、署名付きURLを生成する
"""
try:
# TTSの音声ファイルをダウンロード
response = requests.get(audio_url)
response.raise_for_status()

# 現在の日時を取得
now = datetime.now()

# UUIDを生成してファイルパスを構築
directory_uuid = str(uuid.uuid4())
file_key = f"anonymous-users/generated-audio-files/year={now.year:04d}/month={now.month:02d}/date={now.day:02d}/{directory_uuid}/audio.wav"

# R2にアップロード
r2.put_object(
Bucket=R2_BUCKET_NAME,
Key=file_key,
Body=response.content,
ContentType="audio/wav",
)

# 署名付きURLを生成(有効期限1時間)
url: str = r2.generate_presigned_url(
"get_object",
Params={"Bucket": R2_BUCKET_NAME, "Key": file_key},
ExpiresIn=3600,
)

return url
except Exception as e:
app_logger.logger.error(f"R2へのアップロード中にエラーが発生: {e}")
raise e
TTS_API_URL = "https://api.nijivoice.com/api/platform/v1/voice-actors/16e979a8-cd0f-49d4-a4c4-7a25aa42e184/generate-encoded-voice"
TTS_API_KEY = os.getenv("NIJIVOICE_API_KEY")


class SendEmailDto(TypedDict):
Expand Down Expand Up @@ -215,9 +165,6 @@ async def create_google_calendar_event(
"system_instruction": system_prompt,
}

TTS_API_URL = "https://api.nijivoice.com/api/platform/v1/voice-actors/16e979a8-cd0f-49d4-a4c4-7a25aa42e184/generate-voice"
TTS_API_KEY = os.getenv("NIJIVOICE_API_KEY")


@router.websocket("/realtime-apis/video-chat")
async def video_chat_websocket_endpoint(websocket: WebSocket) -> None:
Expand Down Expand Up @@ -443,23 +390,17 @@ async def receive_from_gemini() -> None:
tts_data = tts_response.json()
if (
"generatedVoice" in tts_data
and "audioFileUrl"
and "base64Audio"
in tts_data["generatedVoice"]
):
tts_audio_url = tts_data["generatedVoice"][
"audioFileUrl"
base64_audio = tts_data["generatedVoice"][
"base64Audio"
]

# R2にアップロードして署名付きURLを取得
r2_audio_url = await upload_to_r2(
tts_audio_url
)

await websocket.send_text(
json.dumps({"audio": r2_audio_url})
json.dumps({"audio": base64_audio})
)

combined_text = ""
combined_text = ""

# クライアント側にAI Assistantのターンが終わった事を知らせる
await websocket.send_text(
Expand Down
72 changes: 0 additions & 72 deletions backend/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 8 additions & 6 deletions frontend/src/app/_components/InputPromptForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,9 @@ class Response {
type Message = {
role: 'user' | 'assistant';
message: string;
audioUrl?: string;
};

const log = logger.child({ module: 'src/app/_components/InputPromptForm.tsx' });
const log = logger.child({ module: 'InputPromptForm' });

export function InputPromptForm() {
const [prompt, setPrompt] = useState<string>('');
Expand Down Expand Up @@ -141,8 +140,13 @@ export function InputPromptForm() {
return;
}

const response = await fetch(audioUrl.current);
const arrayBuffer = await response.arrayBuffer();
// Base64データをデコードしてArrayBufferに変換
const binaryString = atob(audioUrl.current);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
const arrayBuffer = bytes.buffer;

const audioBuffer = await playAudioContextRef.current.decodeAudioData(arrayBuffer);

Expand Down Expand Up @@ -203,7 +207,6 @@ export function InputPromptForm() {
setMessages(prev => [...prev, {
role: 'assistant',
message: lastAssistantMessage,
audioUrl: audioUrl.current || undefined,
}]);
newResponseMessage = '';
setStreamingMessage('');
Expand Down Expand Up @@ -436,7 +439,6 @@ export function InputPromptForm() {
avatar="/omochi.png"
message={message.message}
showFeedback
audioUrl={message.audioUrl}
/>
);
})}
Expand Down
70 changes: 2 additions & 68 deletions frontend/src/app/_components/MessageCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { Icon } from '@iconify/react';
import { Avatar, Badge, Button, cn, Link, Tooltip } from '@nextui-org/react';
import { useClipboard } from '@nextui-org/use-clipboard';
import { type HTMLAttributes, type ReactNode, type RefObject, useCallback, useEffect, useRef, useState } from 'react';
import { type HTMLAttributes, type ReactNode, type RefObject, useCallback, useRef, useState } from 'react';

type Props = HTMLAttributes<HTMLDivElement> & {
avatar?: string;
Expand All @@ -13,20 +13,17 @@ type Props = HTMLAttributes<HTMLDivElement> & {
status?: 'success' | 'failed';
attempts?: number;
messageClassName?: string;
audioUrl?: string;
onAttemptChange?: (attempt: number) => void;
onMessageCopy?: (content: string | string[]) => void;
onFeedback?: (feedback: 'like' | 'dislike') => void;
onAttemptFeedback?: (feedback: 'like' | 'dislike' | 'same') => void;
};

export function MessageCard({ ref, avatar, message, showFeedback, attempts = 1, currentAttempt = 1, status, onMessageCopy, onAttemptChange, onFeedback, onAttemptFeedback, className, messageClassName, audioUrl, ...props }: Props & { ref?: RefObject<HTMLDivElement> }) {
export function MessageCard({ ref, avatar, message, showFeedback, attempts = 1, currentAttempt = 1, status, onMessageCopy, onAttemptChange, onFeedback, onAttemptFeedback, className, messageClassName, ...props }: Props & { ref?: RefObject<HTMLDivElement> }) {
const [feedback, setFeedback] = useState<'like' | 'dislike'>();
const [attemptFeedback, setAttemptFeedback] = useState<'like' | 'dislike' | 'same'>();
const [isPlaying, setIsPlaying] = useState(false);

const messageRef = useRef<HTMLDivElement>(null);
const audioRef = useRef<HTMLAudioElement | null>(null);

const { copied, copy } = useClipboard();

Expand Down Expand Up @@ -86,55 +83,6 @@ export function MessageCard({ ref, avatar, message, showFeedback, attempts = 1,
[onAttemptFeedback],
);

const handlePlayAudio = async () => {
if (!audioUrl)
return;

if (audioRef.current) {
if (isPlaying) {
audioRef.current.pause();
setIsPlaying(false);
return;
}
}

try {
const audio = new Audio(audioUrl);
audioRef.current = audio;

// iOS対応の設定を追加
audio.playsInline = true;
audio.webkitPlaysInline = true;

audio.addEventListener('ended', () => {
setIsPlaying(false);
audioRef.current = null;
});

audio.addEventListener('error', () => {
setIsPlaying(false);
audioRef.current = null;
});

await audio.play();
setIsPlaying(true);
}
catch (error) {
console.error('音声再生エラー:', error);
setIsPlaying(false);
}
};

// コンポーネントのアンマウント時にクリーンアップ
useEffect(() => {
return () => {
if (audioRef.current) {
audioRef.current.pause();
audioRef.current = null;
}
};
}, []);

return (
<div {...props} ref={ref} className={cn('flex gap-3', className)}>
<div className="relative flex-none">
Expand Down Expand Up @@ -162,20 +110,6 @@ export function MessageCard({ ref, avatar, message, showFeedback, attempts = 1,
</div>
{showFeedback && !hasFailed && (
<div className="absolute right-2 top-2 flex rounded-full bg-content2 shadow-small">
{audioUrl && (
<Button
isIconOnly
radius="full"
size="sm"
variant="light"
onPress={handlePlayAudio}
>
<Icon
className="text-lg text-default-600"
icon={isPlaying ? 'solar:pause-circle-linear' : 'solar:play-circle-linear'}
/>
</Button>
)}
<Button isIconOnly radius="full" size="sm" variant="light" onPress={handleCopy}>
{copied
? (
Expand Down

0 comments on commit 178c1d6

Please sign in to comment.