diff --git a/.example.env b/.example.env new file mode 100644 index 0000000..c31b6a9 --- /dev/null +++ b/.example.env @@ -0,0 +1,12 @@ +### SERVER URL +SERVER_URL="http://...:8001" +SERVER_API_KEY="..." + +### SPEECH TO TEXT +GROQ_API_KEY="gsk_..." +GROQ_API_MODEL="whisper-large-v3" + +### LARGE LANGUAGE MODELS +API_URL="https://.../v1/chat/completions" +API_KEY="api-key" +API_MODEL="gpt-4o" \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000..872294b --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,37 @@ +name: Deploy +on: + push: + branches: + - 'main' + - 'preview' + pull_request: + branches: + - 'main' + - 'preview' + +jobs: + deploy: + name: Deploy + runs-on: ubuntu-latest + + permissions: + id-token: write # Needed for auth with Deno Deploy + contents: read # Needed to clone the repository + + steps: + - name: Clone repository + uses: actions/checkout@v4 + + - name: Install Deno + uses: denoland/setup-deno@v1 + with: + deno-version: v1.x + + - name: Build step + run: "deno task build" # 📝 Update the build command(s) if necessary + + - name: Upload to Deno Deploy + uses: denoland/deployctl@v1 + with: + project: "school-bud-e" # 📝 Update the deploy project name if necessary + entrypoint: "./main.ts" # 📝 Update the entrypoint if necessary diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3c667ac --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local +.DS_Store +.vscode + +# Fresh build directory +_fresh/ +# npm dependencies +node_modules/ diff --git a/License.txt b/License.txt new file mode 100644 index 0000000..2af8eef --- /dev/null +++ b/License.txt @@ -0,0 +1,19 @@ +Copyright (c) 2024 LAION e.V. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE./ diff --git a/README.md b/README.md new file mode 100644 index 0000000..b88c224 --- /dev/null +++ b/README.md @@ -0,0 +1,83 @@ +# School Bud-E 🎓🤖 + +![School Bud-E Banner](banner.png) + +Welcome to School Bud-E, your AI-powered educational assistant! 🚀 + +[![Join us on Discord](https://img.shields.io/discord/823813159592001537?color=5865F2&logo=discord&logoColor=white)](https://discord.gg/xBPBXfcFHd) + +## 🌟 Overview + +School Bud-E is an intelligent and empathetic learning assistant designed to revolutionize the educational experience. Developed by [LAION](https://laion.ai) in collaboration with the ELLIS Institute Tübingen, Collabora, the Tübingen AI Center and the DFKI, School Bud-E focuses on empathy, natural interaction, and personalized learning. + +## 🚀 Features (WIP) + +- 💬 Real-time responses to student queries +- 🧠 Emotionally intelligent interactions +- 🔄 Continuous conversation context +- 👥 Multi-speaker and multi-language support +- 🖥️ Local operation on consumer-grade hardware +- 🔒 Privacy-focused design + +## 🛠️ Technology Stack + +- **Frontend**: Fresh framework (Preact-based) +- **Styling**: Tailwind CSS +- **Language Support**: Internationalization for English and German +- **AI Models**: + - Speech-to-Text: Whisper Large V3 (via Groq API) + - Large Language Model: GPT-4o or equivalent + +## 🏗️ Project Structure + +- `routes/`: Application routes +- `components/`: Reusable UI components +- `islands/`: Interactive components (Fresh islands) +- `internalization/`: Language-specific content +- `static/`: Static assets + +## 🚀 Getting Started + +1. Clone the repository: + + ```bash + git clone https://github.com/LAION-AI/school-bud-e.git + ``` + +2. Install dependencies: + + ```bash + deno task start + ``` + +3. Set up environment variables: + - Copy `.example.env` to `.env` + - Fill in the required API keys and endpoints + +4. Run the development server: + + ```bash + deno task start + ``` + +5. Open `http://localhost:8000` in your browser + +## 🤝 Contributing + +We welcome contributions to School Bud-E! Please join our [Discord server](https://discord.com/invite/eq3cAMZtCC) or contact us at to get involved. + +## 🚧 Experimental Demo Version + +Please note that this is an early prototype application that may provide inaccurate answers or generate content that is not suitable for all audiences. We advise caution and encourage you to report any issues you encounter to us. + +## 📄 License + +This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. + +## 🙏 Acknowledgements + +Special thanks to LAION, ELLIS Institute Tübingen, Collabora, the Tübingen AI Center and the German Research Center for Artificial Intelligence (DFKI) for their contributions and support to this project. + +--- + +Built with ❤️ for the future of education. diff --git a/banner.png b/banner.png new file mode 100644 index 0000000..b2632dc Binary files /dev/null and b/banner.png differ diff --git a/components/ChatSubmitButton.tsx b/components/ChatSubmitButton.tsx new file mode 100644 index 0000000..8e8c481 --- /dev/null +++ b/components/ChatSubmitButton.tsx @@ -0,0 +1,38 @@ +// Button.tsx +import { JSX } from "preact"; +import { IS_BROWSER } from "$fresh/runtime.ts"; + +export function ChatSubmitButton(props: JSX.HTMLAttributes) { + // Destructure `class` from props to apply alongside Tailwind classes + const { class: className, ...buttonProps } = props; + + return ( + + ); +} diff --git a/components/ChatTemplate.tsx b/components/ChatTemplate.tsx new file mode 100644 index 0000000..7b23853 --- /dev/null +++ b/components/ChatTemplate.tsx @@ -0,0 +1,182 @@ +import { useEffect, useState } from "preact/hooks"; + +import { chatTemplateContent } from "../internalization/content.ts"; + +function ChatTemplate( + { + lang, + parentImages, + messages, + readAlways, + audioFileDict, + onRefreshAction, + onSpeakAtGroupIndexAction, + onImageChange, + onToggleReadAlwaysAction, + }: { + lang: string; + parentImages: Image[]; + messages: Message[]; + isComplete: boolean; + readAlways: boolean; + audioFileDict: AudioFileDict; + onToggleReadAlwaysAction: () => void; + onSpeakAtGroupIndexAction: (groupIndex: number) => void; + onRefreshAction: (groupIndex: number) => void; + onEditAction: (groupIndex: number) => void; + onUploadActionToMessages: (uploadedMessages: Message[]) => void; + onImageChange: (images: Image[]) => void; + onTrashAction: () => void; + }, +) { + const [images, setImages] = useState([]); + const [imageFiles, setImageFiles] = useState([]); + + // deno-lint-ignore no-explicit-any + const deleteImage = (event: any) => { + const index = images.findIndex((image) => + image.preview === event.target.src + ); + const newImages = [...images]; + const newImageFiles = [...imageFiles]; + newImages.splice(index, 1); + newImageFiles.splice(index, 1); + setImages(newImages); + setImageFiles(newImageFiles); + onImageChange(newImageFiles); + }; + + useEffect(() => { + setImages(parentImages); + }, [parentImages]); + + return ( +
+ + {messages?.map((item, groupIndex) => { + return ( +
+ + {item.role === "user" ? "Du" : "School Bud-E"} + {item.role !== "user" && groupIndex !== 0 && ( + + )} + {item.role !== "user" && ( + + )} + +
+ {typeof item.content === "string" + ? {item.content} + : ( + + {typeof item.content[0] === "string" + ? item.content.join("") + : ( +
+ {(item.content as unknown as { + "type": string; + "text": string; + "image_url": { url: string }; + }[]).map((content, contentIndex) => { + if (content.type === "text") { + return ( + {content.text} + ); + } else if (content.type === "image_url") { + return ( + User uploaded image + ); + } + })} +
+ )} +
+ )} +
+
+ ); + })} + {images.length > 0 && ( +
+
+ {images.map((image, index) => ( + {`Thumbnail + ))} +
+
+ )} +
+ ); +} + +export default ChatTemplate; diff --git a/components/ImageUploadButton.tsx b/components/ImageUploadButton.tsx new file mode 100644 index 0000000..f28ca8c --- /dev/null +++ b/components/ImageUploadButton.tsx @@ -0,0 +1,103 @@ +import { useRef, useState } from "preact/hooks"; +import { IS_BROWSER } from "$fresh/runtime.ts"; + +function ImageUploadButton({ + onImagesUploaded, +}: { + onImagesUploaded: (images: Image[]) => void; +}) { + // deno-lint-ignore no-explicit-any + const [previewImages, setPreviewImages] = useState([]); + const [imageFiles, _setImageFiles] = useState([]); + const fileInputRef = useRef(null); + + const onButtonClick = () => { + if (fileInputRef.current) { + fileInputRef.current.click(); + } + }; + + // deno-lint-ignore no-explicit-any + const handleImageUpload = (event: any) => { + const files = Array.from(event.target.files); + const newImages = files.map((file) => ({ + file, + preview: URL.createObjectURL(file as Blob), + })); + const previousImages = previewImages; + setPreviewImages([...previousImages, ...newImages]); + + // deno-lint-ignore no-explicit-any + const newPreviewImages: any[] = []; + const promises = files.map((file) => { + return new Promise((resolve) => { + const FR = new FileReader(); + + FR.addEventListener("load", (e) => { + const data_url = e.target!.result; + // const type = data_url.split(";")[0].split(":")[1]; + const imageObject = { + type: "image_url", + image_url: { + url: data_url, + detail: "high", + }, + }; + + newPreviewImages.push(imageObject); + resolve(); + }); + + FR.readAsDataURL(file as Blob); + }); + }); + + Promise.all(promises).then(() => { + // All files have been processed and newImages is ready for postprocessing + console.log("All files processed", newPreviewImages); + const finalImages = [...imageFiles, ...newPreviewImages]; + onImagesUploaded(finalImages); + }); + }; + + return ( + <> + + + + ); +} + +export default ImageUploadButton; diff --git a/components/VoiceRecordButton.tsx b/components/VoiceRecordButton.tsx new file mode 100644 index 0000000..eb7e76b --- /dev/null +++ b/components/VoiceRecordButton.tsx @@ -0,0 +1,169 @@ +import { useEffect, useRef, useState } from "preact/hooks"; +import { IS_BROWSER } from "$fresh/runtime.ts"; + +/** + * VoiceRecordButton component. + * + * @component + * @param {Object} props - The component props. + * @param {Function} props.onFinishRecording - Callback function called when recording is finished. It receives the transcript as a parameter. + * @param {Function} props.onInterimTranscript - Callback function called when interim transcript is available. It receives the interim transcript as a parameter. + * @param {number} props.resetTranscript - A number used to trigger a reset of the transcript. + * @returns {JSX.Element} The VoiceRecordButton component. + */ +function VoiceRecordButton({ + onFinishRecording, + onInterimTranscript, + resetTranscript, +}: { + onFinishRecording: (transcript: string) => void; + onInterimTranscript: (transcript: string) => void; + resetTranscript: number; +}) { + const [isRecording, setIsRecording] = useState(false); + const mediaRecorderRef = useRef(null); + const audioChunksRef = useRef([]); + const recognitionRef = useRef(null); + + // deno-lint-ignore no-explicit-any + (globalThis as any).SpeechRecognition = + // deno-lint-ignore no-explicit-any + (globalThis as any).SpeechRecognition || + // deno-lint-ignore no-explicit-any + (globalThis as any).webkitSpeechRecognition; + + useEffect(() => { + if (resetTranscript > 0) { + console.log("Resetting transcript due to reset signal change."); + } + restartRecording(); + }, [resetTranscript]); + + function restartRecording() { + if (recognitionRef.current) { + recognitionRef.current.onend = null; + recognitionRef.current.stop(); + // deno-lint-ignore no-explicit-any + recognitionRef.current = new (globalThis as any).SpeechRecognition(); + (recognitionRef.current as SpeechRecognition).continuous = false; + (recognitionRef.current as SpeechRecognition).lang = "de-DE"; + (recognitionRef.current as SpeechRecognition).interimResults = true; + (recognitionRef.current as SpeechRecognition).onend = onEnd; + (recognitionRef.current as SpeechRecognition).addEventListener( + "result", + onSpeak, + ); + (recognitionRef.current as SpeechRecognition).start(); + setIsRecording(true); + } + } + + async function toggleRecording() { + if (isRecording) { + // Stop recording + mediaRecorderRef.current?.stop(); + setIsRecording(false); + } else { + // Start recording + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mediaRecorder = new MediaRecorder(stream); + mediaRecorderRef.current = mediaRecorder; + + mediaRecorder.ondataavailable = (event) => { + audioChunksRef.current.push(event.data); + }; + + mediaRecorder.onstop = async () => { + const audioBlob = new Blob(audioChunksRef.current, { + type: "audio/wav", + }); + audioChunksRef.current = []; + await sendAudioToServer(audioBlob); + }; + + mediaRecorder.start(); + setIsRecording(true); + } + } + + const sendAudioToServer = async (audioBlob: Blob) => { + const formData = new FormData(); + formData.append("audio", audioBlob, "recording.wav"); + + try { + const response = await fetch("/api/upload-recording", { + method: "POST", + body: formData, + }); + + if (response.ok) { + console.log("Audio uploaded successfully"); + const text = await response.text(); + console.log("Text from VoiceRecordButton:", text); + onFinishRecording(text); + } else { + console.error("Failed to upload audio"); + } + } catch (error) { + console.error("Error uploading audio:", error); + } + }; + + function onEnd() { + console.log("Speech recognition has stopped. Starting again ..."); + setIsRecording(false); + // restartRecording(); + } + + const prependToTranscript = ""; + // deno-lint-ignore no-explicit-any + function onSpeak(event: any) { + // console.log(resetTranscript); + let interimTranscript = ""; + for (let i = event.resultIndex; i < event.results.length; ++i) { + if (event.results[i].isFinal) { + console.log("Final transcript: ", event.results[i][0].transcript); + interimTranscript = event.results[i][0].transcript; + } else { + interimTranscript += event.results[i][0].transcript; + } + } + // Here, you call onInterimTranscript with the interimTranscript + if (interimTranscript) { + console.log("Interim transcript: ", prependToTranscript); + onInterimTranscript(prependToTranscript + interimTranscript); + } + } + + return ( + + ); +} + +export default VoiceRecordButton; diff --git a/components/Warning.tsx b/components/Warning.tsx new file mode 100644 index 0000000..1eac3a6 --- /dev/null +++ b/components/Warning.tsx @@ -0,0 +1,17 @@ +import { warningContent } from "../internalization/content.ts"; + +function Warning({ lang }: { lang: string }) { + return ( + + ); +} + +export default Warning; diff --git a/deno.json b/deno.json new file mode 100644 index 0000000..84635de --- /dev/null +++ b/deno.json @@ -0,0 +1,27 @@ +{ + "lock": false, + "tasks": { + "check": "deno fmt --check && deno lint && deno check **/*.ts && deno check **/*.tsx", + "cli": "echo \"import '\\$fresh/src/dev/cli.ts'\" | deno run --unstable -A -", + "manifest": "deno task cli manifest $(pwd)", + "start": "deno run -A --watch=static/,routes/ dev.ts", + "build": "deno run -A dev.ts build", + "preview": "deno run -A main.ts", + "update": "deno run -A -r https://fresh.deno.dev/update ." + }, + "lint": { "rules": { "tags": ["fresh", "recommended"] } }, + "exclude": ["**/_fresh/*"], + "imports": { + "$fresh/": "https://deno.land/x/fresh@1.7.1/", + "preact": "https://esm.sh/preact@10.19.6", + "preact/": "https://esm.sh/preact@10.19.6/", + "@preact/signals": "https://esm.sh/*@preact/signals@1.2.2", + "@preact/signals-core": "https://esm.sh/*@preact/signals-core@1.5.1", + "tailwindcss": "npm:tailwindcss@3.4.1", + "tailwindcss/": "npm:/tailwindcss@3.4.1/", + "tailwindcss/plugin": "npm:/tailwindcss@3.4.1/plugin.js", + "$std/": "https://deno.land/std@0.216.0/" + }, + "compilerOptions": { "jsx": "react-jsx", "jsxImportSource": "preact" }, + "nodeModulesDir": true +} diff --git a/dev.ts b/dev.ts new file mode 100755 index 0000000..ae73946 --- /dev/null +++ b/dev.ts @@ -0,0 +1,8 @@ +#!/usr/bin/env -S deno run -A --watch=static/,routes/ + +import dev from "$fresh/dev.ts"; +import config from "./fresh.config.ts"; + +import "$std/dotenv/load.ts"; + +await dev(import.meta.url, "./main.ts", config); diff --git a/fresh.config.ts b/fresh.config.ts new file mode 100644 index 0000000..f50b17a --- /dev/null +++ b/fresh.config.ts @@ -0,0 +1,6 @@ +import { defineConfig } from "$fresh/server.ts"; +import tailwind from "$fresh/plugins/tailwind.ts"; + +export default defineConfig({ + plugins: [tailwind()], +}); diff --git a/fresh.gen.ts b/fresh.gen.ts new file mode 100644 index 0000000..241836c --- /dev/null +++ b/fresh.gen.ts @@ -0,0 +1,37 @@ +// DO NOT EDIT. This file is generated by Fresh. +// This file SHOULD be checked into source version control. +// This file is automatically updated during development when running `dev.ts`. + +import * as $_404 from "./routes/_404.tsx"; +import * as $_app from "./routes/_app.tsx"; +import * as $about from "./routes/about.tsx"; +import * as $api_chat from "./routes/api/chat.ts"; +import * as $api_getClientId from "./routes/api/getClientId.ts"; +import * as $api_tts from "./routes/api/tts.ts"; +import * as $api_upload_recording from "./routes/api/upload-recording.ts"; +import * as $index from "./routes/index.tsx"; +import * as $ChatIsland from "./islands/ChatIsland.tsx"; +import * as $Header from "./islands/Header.tsx"; +import * as $Menu from "./islands/Menu.tsx"; +import type { Manifest } from "$fresh/server.ts"; + +const manifest = { + routes: { + "./routes/_404.tsx": $_404, + "./routes/_app.tsx": $_app, + "./routes/about.tsx": $about, + "./routes/api/chat.ts": $api_chat, + "./routes/api/getClientId.ts": $api_getClientId, + "./routes/api/tts.ts": $api_tts, + "./routes/api/upload-recording.ts": $api_upload_recording, + "./routes/index.tsx": $index, + }, + islands: { + "./islands/ChatIsland.tsx": $ChatIsland, + "./islands/Header.tsx": $Header, + "./islands/Menu.tsx": $Menu, + }, + baseUrl: import.meta.url, +} satisfies Manifest; + +export default manifest; diff --git a/internalization/content.ts b/internalization/content.ts new file mode 100644 index 0000000..9a1f5b3 --- /dev/null +++ b/internalization/content.ts @@ -0,0 +1,140 @@ +export const headerContent: InternalizationContent = { + en: { + overTitle: "Experimental", + title: "School Bud-E!", + }, + de: { + overTitle: "Experimenteller", + title: "School Bud-E!", + }, +}; + +export const menuContent: InternalizationContent = { + en: { + about: "About School Bud-E", + imprint: "Imprint", + }, + de: { + about: "Über School Bud-E", + imprint: "Impressum", + }, +}; + +export const warningContent: InternalizationContent = { + en: { + title: "🚧 Experimental Demo Version 🚧", + content: + "Please note that this is an early prototype application that may provide inaccurate answers or generate content that is not suitable for all audiences. We advise caution and encourage you to report any issues you encounter to us.", + }, + de: { + title: "🚧 Experimentelle Demoversion 🚧", + content: + "Bitte beachten Sie, dass dies eine frühe Prototyp-Anwendung ist, die möglicherweise ungenaue Antworten liefert oder Inhalte erzeugt, die nicht für alle Zielgruppen geeignet sind. Wir raten zur Vorsicht und raten Ihnen uns alle Probleme, die Sie feststellen, mitzuteilen.", + }, +}; + +export const chatIslandContent: InternalizationContent = { + en: { + welcomeMessage: + "Hello! I am School Bud-E, your personal AI assistant. How can I help you today?", + deleteCurrentChat: "current chat", + deleteAllChats: "all chats", + backupChat: "Download", + restoreChat: "Upload", + placeholderText: "Chat with the School Bud-E...", + }, + de: { + welcomeMessage: + "Hallo! Ich bin School Bud-E, dein persönlicher Assistent. Wie kann ich dir helfen?", + deleteCurrentChat: "diesen Chat", + deleteAllChats: "alle Chats", + backupChat: "Download", + restoreChat: "Upload", + placeholderText: "Schreibe mit dem School Bud-E...", + }, +}; + +export const chatTemplateContent: InternalizationContent = { + "en": { + readOutText: "Read out text", + silent: "Silent", + }, + "de": { + readOutText: "Text vorlesen", + silent: "Stumm", + }, +}; + +export const chatContent: InternalizationContent = { + en: { + systemPrompt: "You are an intelligent and empathetic learning assistant. Always respond empathetically, friendly, curiously and appropriately to the school context. Respond briefly and to the point. Your name is School Bud-E and you would be created by LAION. LAION is a non-profit organization for the democratization of open source AI. Try to keep the conversation friendly, educational and entertaining and to keep it running while taking into account previously said information. Respond briefly, concisely and to the point." + }, + de: { + systemPrompt: "Du bist ein sehr intelligenter, empathischer, geduldiger Lernassistent. Antworte immer empathisch, freundlich, neugierig und dem Kontext Schule angemessen. Antworte kurz und auf den Punkt gebracht. Dein Name ist School Bud-E und Du würdest von LAION erschaffen. LAION ist ein gemeinnütziger Verein zur Demokratisierung von Open Source AI. Versuche so gut es geht die Unterhaltung freundlich, lehrreich und unterhaltsam am laufen zu halten." + } +} + +export const aboutContent: InternalizationContent = { + en: { + title: "About School Bud-E", + partOneOne: + "In today's world, where education is increasingly intertwined with technology, School Bud-E emerges as an empathetic AI voice assistant specifically designed for the dynamic needs of the education sector. Developed by", + partOneTwo: + "in collaboration with the ELLIS Institute Tübingen, Collabora, and the Tübingen AI Center, School Bud-E enables the learning experience with a focus on empathy, natural interaction, and", + headingOne: "Redefining Education with AI", + partTwoOne: + "School Bud-E is not just an AI voice assistant; it is a digital companion that supports educational growth through:", + partTwoTwo: + "Real-time responses to student queries that facilitate immediate learning opportunities.", + partTwoThree: + "Emotionally intelligent interactions that recognize the learner's emotional state and adapt to it to foster a supportive learning environment.", + partTwoFour: + "Maintaining conversation context across sessions, enabling personalized learning experiences that build over time.", + partTwoFive: + "Handling complex multi-speaker scenarios, such as classroom discussions in multiple languages.", + partTwoSix: + "Operating on local, consumer-grade hardware, ensuring privacy and accessibility.", + headingTwo: "Technological Innovation for Education", + partThreeOne: + "At the core of School Bud-E's development is the pursuit of low latency and maximum conversational naturalness. Through rigorous testing and evaluating various speech-to-text, speech understanding, and text-to-speech models, the team has achieved remarkable responsiveness and quality on devices common in schools.", + partThreeTwo: + "Since January 2024, School Bud-E has been operating with latencies between 300 and 500 ms, promising near-instant interaction that is crucial to keeping students engaged and supporting educators in real time.", + headingThree: "Supporting the Education Revolution", + partFourOne: + "The development of School Bud-E is an ongoing collaboration. We are committed to continuously enhancing its capabilities to better serve students and educators alike. From reducing system requirements and latency to enriching its understanding of conversational nuances, each update aims to make School Bud-E an indispensable asset in educational institutions. At the same time, we are building an architecture that enables the technology to be implemented in various educational environments, to scale, and to integrate modules tailored to the specific needs of students and educators in different learning settings.", + partFourTwo: + "Are you interested in contributing to the School Bud-E project or integrating it into your suite of educational technologies? Then join our", + partFourThree: "or contact us directly at", + }, + de: { + title: "Über School Bud-E", + partOneOne: + "In der heutigen Zeit, in der Bildung zunehmend mit Technologie verflochten ist, tritt School Bud-E als empathischer KI-Sprachassistent hervor, der speziell für die dynamischen Bedürfnisse im Bildungsbereich entwickelt wurde. Entwickelt von", + partOneTwo: + "in Zusammenarbeit mit dem ELLIS-Institut Tübingen, Collabora und dem Tübinger KI-Zentrum, ermöglicht School Bud-E das Lernerlebnis mit einem Schwerpunkt auf Empathie, natürliche Interaktion und", + headingOne: "Bildung mit KI neu definieren", + partTwoOne: + "School Bud-E ist nicht nur ein KI-Sprachassistent; es ist ein digitaler Begleiter, der das Bildungswachstum durch unterstützt:", + partTwoTwo: + "Echtzeit-Antworten auf Schüleranfragen, die sofortige Lernmöglichkeiten erleichtern.", + partTwoThree: + "Emotional intelligente Interaktionen, die den emotionalen Zustand des Lernenden erkennen und sich an diesen anpassen, um eine unterstützende Lernumgebung zu fördern.", + partTwoFour: + "Beibehaltung des Gesprächskontexts über Sitzungen hinweg, was personalisierte Lernerfahrungen ermöglicht, die sich im Laufe der Zeit aufbauen.", + partTwoFive: + "Bewältigung von komplexen Mehrsprecher-Szenarien, wie Klassenzimmerdiskussionen auf mehreren Sprachen.", + partTwoSix: + "Betrieb auf lokaler, verbraucherüblicher Hardware, gewährleistet Datenschutz und Zugänglichkeit.", + headingTwo: "Technologische Innovation für die Bildung", + partThreeOne: + "Im Mittelpunkt der Entwicklung von School Bud-E steht das Streben nach geringer Latenz und maximaler Natürlichkeit im Gespräch. Durch rigoroses Testen und Evaluieren verschiedener Sprach-zu-Text-, Sprachverständnis- und Text-zu-Sprach-Modelle hat das Team eine bemerkenswerte Reaktionsfähigkeit und Qualität auf Geräten erreicht, die in Schulen üblich sind.", + partThreeTwo: + "Seit Januar 2024 arbeitet School Bud-E mit Latenzen zwischen 300 und 500 ms und verspricht eine nahezu sofortige Interaktion, die entscheidend ist, um Schüler engagiert zu halten und Pädagogen in Echtzeit zu unterstützen.", + headingThree: "Unterstützt die Bildungsrevolution", + partFourOne: + "Die Entwicklung von School Bud-E ist eine fortwährende Zusammenarbeit. Wir sind darauf bedacht, seine Fähigkeiten kontinuierlich zu verbessern, um Schülern und Pädagogen gleichermaßen besser zu dienen. Von der Reduzierung der Systemanforderungen und Latenz bis zur Bereicherung seines Verständnisses für konversationelle Nuancen zielt jedes Update darauf ab, School Bud-E zu einem unverzichtbaren Vermögenswert in Bildungseinrichtungen zu machen. Gleichzeitig erschaffen bauen wir eine Architektur, die es ermöglicht, die Technologie in verschiedenen Bildungsumgebungen zu implementieren, zu skalieren und Module zu integrieren, die auf die spezifischen Bedürfnisse von Schülern und Pädagogen in verschiedenen Lernsettings zugeschnitten sind.", + partFourTwo: + "Sind Sie interessiert, am School Bud-E Projekt mitzuarbeiten oder es in Ihre Suite von Bildungstechnologien zu integrieren? Dann treten Sie unserem", + partFourThree: "bei oder kontaktieren Sie uns direkt unter", + }, +}; diff --git a/islands/ChatIsland.tsx b/islands/ChatIsland.tsx new file mode 100644 index 0000000..5e2e865 --- /dev/null +++ b/islands/ChatIsland.tsx @@ -0,0 +1,714 @@ +import { useEffect, useState } from "preact/hooks"; +import { fetchEventSource } from "https://esm.sh/@microsoft/fetch-event-source@2.0.1"; +import { ChatSubmitButton } from "../components/ChatSubmitButton.tsx"; +import { IS_BROWSER } from "$fresh/runtime.ts"; +import ChatTemplate from "../components/ChatTemplate.tsx"; +import VoiceRecordButton from "../components/VoiceRecordButton.tsx"; +import ImageUploadButton from "../components/ImageUploadButton.tsx"; + +import { chatIslandContent } from "../internalization/content.ts"; + +class RetriableError extends Error {} +class FatalError extends Error {} + +export default function ChatIsland({ lang }: { lang: string }) { + const [clientId, setClientId] = useState(""); + const [localStorageKeys, setLocalStorageKeys] = useState([] as string[]); + const [currentChatSuffix, setCurrentChatSuffix] = useState("0"); + const [audioFileDict, setAudioFileDict] = useState< + Record + >({}); + const [resetTranscript, setResetTranscript] = useState(0); + const [readAlways, setReadAlways] = useState(true); + const [query, setQuery] = useState(""); + const [firstLoad, setFirstLoad] = useState(true); + const [images, setImages] = useState([] as Image[]); + const [isStreamComplete, setIsStreamComplete] = useState(true); + + const expectedWelcomeMessage = chatIslandContent[lang]["welcomeMessage"]; + + // load messages form localStorage + const [messages, setMessages] = useState([ + { + "role": "assistant", + "content": [ + chatIslandContent[lang]["welcomeMessage"], + ], + }, + ] as Message[]); + + useEffect(() => { + if (IS_BROWSER) { + let localStorageKeys: string[] = Object.keys(localStorage).filter((key) => + key.startsWith("bude-chat-") + ); + localStorageKeys = localStorageKeys.length > 0 + ? localStorageKeys + : ["bude-chat-0"]; + const currentChatSuffix = localStorageKeys.length > 0 + ? String(localStorageKeys.sort()[0].slice(10)) + : "0"; + let localStorageMessages = JSON.parse( + String(localStorage.getItem("bude-chat-" + currentChatSuffix)), + ); + localStorageMessages = localStorageMessages || [ + { + "role": "assistant", + "content": [ + chatIslandContent[lang]["welcomeMessage"], + ], + }, + ]; + setLocalStorageKeys(localStorageKeys); + setMessages(localStorageMessages); + setCurrentChatSuffix(currentChatSuffix); + getClientId(); + } + }, []); + + useEffect(() => { + if (IS_BROWSER) { + if (isStreamComplete) { + if ("content" in messages[messages.length - 1]) { + let lastMessageFromBuddy: string; + const lastMessageContent = messages[messages.length - 1]["content"]; + + if (typeof lastMessageContent === "string") { + lastMessageFromBuddy = lastMessageContent; + } else { + lastMessageFromBuddy = (lastMessageContent as string[]).join(""); + } + + if (lastMessageFromBuddy !== "" && messages.length > 1) { + messages[messages.length - 1]["content"] = lastMessageFromBuddy; + + console.log("IS_STREAM_COMPLETE", currentChatSuffix); + localStorage.setItem( + "bude-chat-" + currentChatSuffix, + JSON.stringify(messages), + ); + + if (!localStorageKeys.includes("bude-chat-" + currentChatSuffix)) { + setLocalStorageKeys([ + ...localStorageKeys, + "bude-chat-" + currentChatSuffix, + ]); + } + } + if (lastMessageFromBuddy !== "") { + const groupIndex = messages.length - 1; + if (groupIndex === 0) { + getTTS(lastMessageFromBuddy, groupIndex, "stream"); + } + } + } + } + } + }, [isStreamComplete]); + + useEffect(() => { + // Only proceed if we're not already scrolling + const currentPosition = globalThis.innerHeight + + globalThis.scrollY; + const totalScrollHeight = document.body.scrollHeight; + + // Only scroll if the deviation is more than 100 pixels + if (totalScrollHeight - currentPosition > 500) { + globalThis.scrollTo({ + top: totalScrollHeight, + behavior: "smooth", + }); + } + + if (IS_BROWSER && !firstLoad) { + // console.log("MESSAGES", currentChatSuffix); + localStorage.setItem( + "bude-chat-" + currentChatSuffix, + JSON.stringify(messages), + ); + setLocalStorageKeys( + Object.keys(localStorage).filter((key) => key.startsWith("bude-chat-")), + ); + } + + if (firstLoad) { + setFirstLoad(false); + } + }, [messages]); + + useEffect(() => { + // load messages from localStorage if they exist, else start with the default introductory message + if (IS_BROWSER) { + const localStorageMessages = JSON.parse( + String(localStorage.getItem("bude-chat-" + currentChatSuffix)), + ) || [ + { + "role": "assistant", + "content": [ + chatIslandContent[lang]["welcomeMessage"], + ], + }, + ]; + if (localStorageMessages.length === 1) { + if (localStorageMessages[0].content[0] !== expectedWelcomeMessage) { + localStorageMessages[0].content[0] = expectedWelcomeMessage; + } + } + setMessages(localStorageMessages); + } + }, [currentChatSuffix]); + + useEffect(() => { + console.log("[LOG] audioFileDict useEffect", audioFileDict); + for (const key in audioFileDict) { + audioFileDict[key][audioFileDict[key].length - 1].onended = () => { + setAudioFileDict({ ...audioFileDict }); + }; + } + }, [audioFileDict]); + + const handleOnSpeakAtGroupIndexAction = (groupIndex: number) => { + console.log("[LOG] handleOnSpeakAtGroupIndexAction", groupIndex); + if (!audioFileDict[groupIndex]) { + const lastMessage = Array.isArray(messages[groupIndex]) + ? messages[groupIndex][0] + : messages[groupIndex]; + getTTS(lastMessage["content"] as string, groupIndex, "handleOnSpeakAtGroupIndexAction"); + return; + } else { + const isAudioPlayingInAnyGroup = Object.values(audioFileDict).some(group => + group.some(audio => !audio.paused) + ); + if (isAudioPlayingInAnyGroup) { + Object.values(audioFileDict).forEach(group => { + group.forEach(audio => { + if (!audio.paused) { + audio.pause(); + audio.currentTime = 0; + } + }); + }); + setAudioFileDict({ ...audioFileDict }); + return; + } + + audioFileDict[groupIndex][0].currentTime = 0; + if (!audioFileDict[groupIndex][0].paused) { + audioFileDict[groupIndex][0].pause(); + setAudioFileDict({ ...audioFileDict }); + } else { + audioFileDict[groupIndex][0].play(); + setAudioFileDict({ ...audioFileDict }); + } + + audioFileDict[groupIndex][0].onended = () => { + setAudioFileDict({ ...audioFileDict }); + // play the second audio file as well + if (audioFileDict[groupIndex][1]) { + audioFileDict[groupIndex][1].play(); + setAudioFileDict({ ...audioFileDict }); + } + } + } + }; + + const handleUploadActionToMessages = (uploadedMessages: Message[]) => { + console.log("From hanldeUploadActionToMessages"); + console.log(uploadedMessages); + const newMessages = uploadedMessages.map((msg) => [msg]).flat(); + newMessages[newMessages.length - 1] = newMessages[newMessages.length - 1]; + setMessages(newMessages); + const textarea = document.querySelector("textarea"); + textarea!.focus(); + }; + + const getClientId = async () => { + const response = await fetch("/api/getClientId", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + }); + const data = await response.json(); + if (data.client_id) { + setClientId(data.client_id); + } else { + console.error("Failed to get client ID"); + } + }; + + const getTTS = async (text: string, groupIndex: number, sourceFunction: string) => { + console.log("[LOG] getTTS"); + if ( + text === chatIslandContent[lang]["welcomeMessage"] + ) { + const audioFile = text === chatIslandContent["de"]["welcomeMessage"] + ? "./intro.wav" + : "./intro-en.wav"; + const audio = new Audio(audioFile); + if (audioFileDict[groupIndex]) { + audioFileDict[groupIndex].push(audio); + } else { + audioFileDict[groupIndex] = [audio]; + } + setAudioFileDict((prev) => ({ ...prev, [groupIndex]: audioFileDict[groupIndex] })); + // setAudioFileDict((prev) => ({ ...prev, [groupIndex]: audio })); + console.log( + "[LOG] Audio file loaded into audioQueue with groupIndex:", + groupIndex, + ); + if (sourceFunction === "handleOnSpeakAtGroupIndexAction") { + handleOnSpeakAtGroupIndexAction(groupIndex); + } + return; + } + + try { + const response = await fetch("/api/tts", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + text: text, + clientId: clientId, + voice: lang === "en" ? "Stefanie" : "Florian", + }), + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const audioData = await response.arrayBuffer(); + const audioBlob = new Blob([audioData], { type: "audio/wav" }); + const audioUrl = URL.createObjectURL(audioBlob); + const audio = new Audio(audioUrl); + + if (audioFileDict[groupIndex]) { + audioFileDict[groupIndex].push(audio); + } else { + audioFileDict[groupIndex] = [audio]; + } + if (sourceFunction === "stream1" && readAlways) { + audioFileDict[groupIndex][0].play(); + } + if (sourceFunction === "stream2" && readAlways) { + if (audioFileDict[groupIndex][0].paused) { + audioFileDict[groupIndex][1].play(); + } else { + audioFileDict[groupIndex][0].onended = () => { + audioFileDict[groupIndex][1].play(); + }; + } + } + setAudioFileDict((prev) => ({ ...prev, [groupIndex]: audioFileDict[groupIndex] })); + if (sourceFunction === "handleOnSpeakAtGroupIndexAction") { + handleOnSpeakAtGroupIndexAction(groupIndex); + } + // if (sourceFunction == "stream1") { + // audioFileDict[groupIndex].push(audio); + + // } + // if (sourceFunction == "stream2") { + // streamFirst.current.onended(() => { + // streamSecond.current = audio; + // streamSecond.current.play(); + // }); + // } + } catch (error) { + console.error("Error fetching TTS:", error); + } + }; + + const handleEditAction = (groupIndex: number) => { + const lastMessage = Array.isArray(messages[groupIndex]) + ? messages[groupIndex][0] + : messages[groupIndex]; + + setMessages((prevMessages) => { + const updatedMessages = prevMessages.slice(0, groupIndex); + return updatedMessages; + }); + setQuery(lastMessage); + const textarea = document.querySelector("textarea"); + textarea!.focus(); + }; + + const handleImagesUploaded = (newImages: Image[]) => { + setImages((prevImages) => [...prevImages, ...newImages]); + }; + + const handleImageChange = (images: Image[]) => { + console.log("Images from ChatIsland: ", images); + + setImages(images); + }; + + const handleRefreshAction = (groupIndex: number) => { + if (groupIndex > 0 && groupIndex <= messages.length) { + const slicedMessages = messages.slice(0, groupIndex - 1) as Message[]; + setMessages(slicedMessages); + + // const lastMessage = isArray(messages[groupIndex - 1]["content"]) ; + const refreshMessage = Array.isArray(messages[groupIndex - 1]["content"]) + ? messages[groupIndex - 1]["content"] + : messages[groupIndex - 1]["content"]; + + startStream(refreshMessage as string, slicedMessages); + } + }; + + const startNewChat = () => { + const maxValueInChatSuffix = Math.max( + ...localStorageKeys.map((key) => Number(key.slice(10))), + ); + const newChatSuffix = String(Number(maxValueInChatSuffix) + 1); + // console.log([...localStorageKeys, "bude-chat-" + newChatSuffix]); + setMessages([ + { + "role": "assistant", + "content": [ + chatIslandContent[lang]["welcomeMessage"], + ], + }, + ]); + setCurrentChatSuffix(newChatSuffix); + }; + + const startStream = (transcript: string, prevMessages?: Message[]) => { + const ongoingStream: string[] = []; + let ttsFromFirstSentence = false; + if (isStreamComplete) { + setIsStreamComplete(false); + setResetTranscript(resetTranscript + 1); + + const currentQuerry = transcript !== "" ? transcript : query; + let previousMessages = prevMessages || messages; + + previousMessages = previousMessages.map((msg) => { + if (typeof msg.content === "string") { + return msg; + } + if (typeof msg.content[0] === "string") { + return { "role": msg.role, "content": msg.content.join("") }; + } + return msg; + }); + + const queryWithImages = []; + if (images.length !== 0) { + queryWithImages.push({ "type": "text", "text": currentQuerry }); + for (const img of images) { + queryWithImages.push(img); + } + } + + const newMessages = [...previousMessages, { + "role": "user", + "content": images.length === 0 ? currentQuerry : queryWithImages, + }]; + + setImages([]); + + setMessages(newMessages as Message[]); + + setQuery(""); + + fetchEventSource("/api/chat", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + lang: lang, + messages: newMessages, + }), + onmessage(ev: TextEvent) { + const parsedData = JSON.parse(ev.data); + ongoingStream.push(parsedData); + if (ttsFromFirstSentence === false) { + if (/(? { + const lastArray = + prevMessagesRoundTwo[prevMessagesRoundTwo.length - 1]; + (lastArray.content as string[]).push(parsedData); + return [ + ...prevMessagesRoundTwo.slice(0, -1), + lastArray, + ]; + }); + }, + async onopen(response: Response) { + const prevMessagesRoundTwo = newMessages; + prevMessagesRoundTwo.push({ "role": "assistant", "content": [] }); + setMessages((prevMessagesRoundTwo) => prevMessagesRoundTwo); + await true; + if (response.ok) { + return; // everything's good + } else if ( + response.status >= 400 && response.status < 500 && + response.status !== 429 + ) { + // client-side errors are usually non-retriable: + throw new FatalError(); + } else { + throw new RetriableError(); + } + }, + onerror(err: TextEvent) { + console.error("Stream error:", err); + }, + onclose() { + console.log("Stream closed"); + setIsStreamComplete(true); + setQuery(""); + getTTS(ongoingStream.join(""), newMessages.length - 1, "stream2"); + console.log("ONGOING STREAM: ", ongoingStream); + }, + }); + } + }; + + const toggleReadAlways = (value: boolean) => { + setReadAlways(value); + if (!value) { + Object.values(audioFileDict).forEach(group => { + group.forEach(audio => { + if (!audio.paused) { + audio.pause(); + audio.currentTime = 0; + } + }); + }); + } + }; + + const deleteAllChats = () => { + if (IS_BROWSER) { + localStorage.clear(); + setMessages([ + { + "role": "assistant", + "content": [ + chatIslandContent[lang]["welcomeMessage"], + ], + }, + ]); + setLocalStorageKeys([]); + setCurrentChatSuffix("0"); + } + }; + + const deleteCurrentChat = () => { + if (IS_BROWSER) { + if (localStorageKeys.length > 1) { + localStorage.removeItem("bude-chat-" + currentChatSuffix); + + const nextChatSuffix = localStorageKeys.filter((key) => + key !== "bude-chat-" + currentChatSuffix + )[0].slice(10); + + setMessages( + JSON.parse( + String(localStorage.getItem("bude-chat-" + nextChatSuffix)), + ), + ); + setCurrentChatSuffix(nextChatSuffix); + } else { + setMessages([ + { + "role": "assistant", + "content": [ + chatIslandContent[lang]["welcomeMessage"], + ], + }, + ]); + } + } + }; + + const saveChatsToLocalFile = () => { + if (IS_BROWSER) { + // deno-lint-ignore no-explicit-any + const chats = {} as any; + for (const key of localStorageKeys) { + chats[key] = JSON.parse(String(localStorage.getItem(key))); + } + const chatsString = JSON.stringify(chats); + const blob = new Blob([chatsString], { type: "application/json" }); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + const currentDate = new Date(); + a.download = `chats-${currentDate.toISOString()}.json`; + a.click(); + } + }; + + // deno-lint-ignore no-explicit-any + const restoreChatsFromLocalFile = (e: any) => { + if (IS_BROWSER) { + const file = e.target.files[0]; + if (!file) { + console.error("No file selected"); + return; + } + + const reader = new FileReader(); + reader.onload = (event) => { + try { + const chats = JSON.parse(event.target?.result as string); + + // Restore chats to localStorage + for (const [key, value] of Object.entries(chats)) { + localStorage.setItem(key, JSON.stringify(value)); + } + + const newChatSuffix = chats + ? Object.keys(chats).sort()[0].slice(10) + : "0"; + setLocalStorageKeys( + Object.keys(localStorage).filter((key) => + key.startsWith("bude-chat-") + ), + ); + setCurrentChatSuffix(newChatSuffix); + setMessages(chats["bude-chat-" + newChatSuffix]); + } catch (error) { + console.error("Error parsing JSON file:", error); + } + }; + + reader.onerror = (error) => { + console.error("Error reading file:", error); + }; + + reader.readAsText(file); + } + }; + + return ( +
+ {localStorageKeys.sort().map((key) => { + // remove bude-chat- from the beginning of the key + const chatSuffix = key.substring(10); + return ( + + ); + })} + + {Object.keys(localStorageKeys).length > 0 && ( + + )} + {Object.keys(localStorageKeys).length > 0 && ( + + )} + {Object.keys(localStorageKeys).length > 0 && ( + + )} + restoreChatsFromLocalFile(e)} + /> + + toggleReadAlways(!readAlways)} + onRefreshAction={handleRefreshAction} + onEditAction={handleEditAction} + onUploadActionToMessages={handleUploadActionToMessages} + onImageChange={handleImageChange} + onTrashAction={() => setMessages([])} + /> + +
+