init commit

ngxson · Mar 13, 2024 · f71ef18 · f71ef18
commit f71ef18
Show file tree

Hide file tree

Showing 41 changed files with 30,955 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+node_modules
+/cache
+
+/dist
+/wasm
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "llama.cpp"]
+	path = llama.cpp
+	url = https://github.com/ggerganov/llama.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,18 @@
+cmake_minimum_required(VERSION 3.14)
+project("wllama")
+add_subdirectory(llama.cpp llamacpp)
+
+set(CMAKE_THREAD_LIBS_INIT "-lpthread")
+set(CMAKE_HAVE_THREADS_LIBRARY 1)
+set(CMAKE_USE_WIN32_THREADS_INIT 0)
+set(CMAKE_USE_PTHREADS_INIT 1)
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
+set(COMMON_SRC actions.hpp
+    json.hpp
+    llama.cpp/llama.h)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp/common)
+
+add_executable(wllama wllama.cpp ${COMMON_SRC})
+target_link_libraries(wllama PRIVATE ggml common ${CMAKE_THREAD_LIBS_INIT})
diff --git a/README.md b/README.md
@@ -0,0 +1,51 @@
+# wllama - WASM low-level binding for llama.cpp
+
+Another WASM binding for llama.cpp. Inspired by [tangledgroup/llama-cpp-wasm](https://github.com/tangledgroup/llama-cpp-wasm), but unlike it, **wllama** aims to supports **low-level API** like (de)tokenization, embeddings,...
+
+## Demo
+
+TODO
+
+## How to build
+
+```shell
+# Require having docker compose installed
+# Firstly, build llama.cpp into wasm
+./scripts/build_wasm.sh
+# (Optionally) Build ES6 module
+npm run build
+```
+
+## How to use
+
+See in `examples`
+
+```javascript
+import { Wllama } from '../../esm/index.js';
+
+(async () => {
+  // Automatically switch between single-thread and multi-thread version based on browser support
+  // If you want to enforce single-thread, remove "wasmMultiThreadPath" and "workerMultiThreadPath"
+  const wllama = new Wllama({
+    wasmSingleThreadPath: '../../esm/single-thread/wllama.wasm',
+    wasmMultiThreadPath: '../../esm/multi-thread/wllama.wasm',
+    workerMultiThreadPath: '../../esm/multi-thread/wllama.worker.mjs',
+  });
+  await wllama.loadModel('https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf', {});
+  const outputText = await wllama.createCompletion(elemInput.value, {
+    nPredict: 50,
+    sampling: {
+      temp: 0.5,
+      top_k: 40,
+      top_p: 0.9,
+    },
+  });
+  console.log(outputText);
+})();
+```
+
+## TODO
+
+- Better documentation
+- Support multi-sequences: knowing the resource limitation when using WASM, I don't think having multi-sequences is a good idea
+- Multi-modal: Waiting for refactoring LLaVA implementation from llama.cpp