Skip to content

Commit

Permalink
sync to latest upstream source code (#129)
Browse files Browse the repository at this point in the history
* main : fix formatChat

* sync with latest upstream source code

* v1.16.4
  • Loading branch information
ngxson authored Oct 24, 2024
1 parent ffcd98a commit b727c3c
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"files.associations": {
"__locale": "c"
"typeinfo": "cpp"
}
}
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
cmake_minimum_required(VERSION 3.14)
project("wllama")
add_subdirectory(llama.cpp llamacpp)
add_subdirectory(llama.cpp)
add_subdirectory(llama.cpp/common)

set(CMAKE_THREAD_LIBS_INIT "-lpthread")
set(CMAKE_HAVE_THREADS_LIBRARY 1)
Expand All @@ -12,7 +13,8 @@ set(COMMON_SRC actions.hpp
json.hpp
llama.cpp/include/llama.h)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/llama.cpp/common)

add_executable(wllama wllama.cpp ${COMMON_SRC})
target_link_libraries(wllama PRIVATE ggml common ${CMAKE_THREAD_LIBS_INIT})
target_link_libraries(wllama PRIVATE ggml llama common ${CMAKE_THREAD_LIBS_INIT})
36 changes: 18 additions & 18 deletions actions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct app_t
{
llama_model *model;
llama_context *ctx;
struct gpt_sampler *ctx_sampling = nullptr;
common_sampler *ctx_sampling = nullptr;
llama_batch batch = llama_batch_init(512, 0, 1);
std::vector<llama_token> tokens;
int32_t seed = LLAMA_DEFAULT_SEED;
Expand Down Expand Up @@ -121,7 +121,7 @@ void free_all(app_t &app)
if (app.model != nullptr)
llama_free_model(app.model);
if (app.ctx_sampling != nullptr)
gpt_sampler_free(app.ctx_sampling);
common_sampler_free(app.ctx_sampling);
}

json dump_metadata(app_t &app)
Expand Down Expand Up @@ -285,7 +285,7 @@ json action_set_options(app_t &app, json &body)
json action_sampling_init(app_t &app, json &body)
{
// sampling
gpt_sampler_params sparams;
common_sampler_params sparams;
sparams.seed = app.seed;
if (sparams.seed == LLAMA_DEFAULT_SEED)
sparams.seed = time(NULL);
Expand Down Expand Up @@ -345,15 +345,15 @@ json action_sampling_init(app_t &app, json &body)
// maybe free before creating a new one
if (app.ctx_sampling != nullptr)
{
gpt_sampler_free(app.ctx_sampling);
common_sampler_free(app.ctx_sampling);
}
app.ctx_sampling = gpt_sampler_init(app.model, sparams);
app.ctx_sampling = common_sampler_init(app.model, sparams);
if (body.contains("tokens"))
{
std::vector<llama_token> tokens = body["tokens"];
for (auto id : tokens)
{
gpt_sampler_accept(app.ctx_sampling, id, false);
common_sampler_accept(app.ctx_sampling, id, false);
}
}
return json{{"success", true}};
Expand All @@ -366,7 +366,7 @@ json action_get_vocab(app_t &app, json &body)
std::vector<std::vector<unsigned int> > vocab(max_tokens);
for (int32_t id = 0; id < max_tokens; id++)
{
std::string token_as_str = llama_token_to_piece(app.ctx, id);
std::string token_as_str = common_token_to_piece(app.ctx, id);
vocab[id] = convert_string_to_int_arr(token_as_str);
}
return json{
Expand All @@ -382,7 +382,7 @@ json action_lookup_token(app_t &app, json &body)
int32_t max_tokens = llama_n_vocab(app.model);
for (int32_t id = 0; id < max_tokens; id++)
{
std::string token_as_str = llama_token_to_piece(app.ctx, id);
std::string token_as_str = common_token_to_piece(app.ctx, id);
if (token_as_str == piece)
{
return json{
Expand All @@ -401,7 +401,7 @@ json action_tokenize(app_t &app, json &body)
std::string text = body["text"];
bool special = body.contains("special");
std::vector<llama_token> tokens_list;
tokens_list = ::llama_tokenize(app.model, text, false, special);
tokens_list = common_tokenize(app.model, text, false, special);
return json{
{"success", true},
{"tokens", tokens_list},
Expand All @@ -415,7 +415,7 @@ json action_detokenize(app_t &app, json &body)
std::stringstream output;
for (auto id : tokens)
{
output << llama_token_to_piece(app.ctx, id);
output << common_token_to_piece(app.ctx, id);
}
std::string parsed_str = output.str();
return json{
Expand All @@ -430,12 +430,12 @@ json action_decode(app_t &app, json &body)
std::vector<llama_token> tokens_list = body["tokens"];
bool skip_logits = body.contains("skip_logits");
size_t i = 0;
llama_batch_clear(app.batch);
common_batch_clear(app.batch);
for (auto id : tokens_list)
{
bool grp_attn_enabled = false; // TODO: maybe remove grp_attn
int32_t n_past = app.tokens.size();
llama_batch_add(app.batch, id, n_past, {0}, false);
common_batch_add(app.batch, id, n_past, {0}, false);
app.tokens.push_back(id);
i++;
}
Expand Down Expand Up @@ -466,10 +466,10 @@ json action_encode(app_t &app, json &body)
return json{{"error", "this model does not have an encoder"}};
}
size_t n_past = 0;
llama_batch_clear(app.batch);
common_batch_clear(app.batch);
for (auto id : tokens_list)
{
llama_batch_add(app.batch, id, n_past, {0}, false);
common_batch_add(app.batch, id, n_past, {0}, false);
n_past++;
}
if (llama_encode(app.ctx, app.batch) != 0)
Expand All @@ -489,8 +489,8 @@ json action_encode(app_t &app, json &body)
json action_sampling_sample(app_t &app, json &body)
{
int32_t idx = app.batch.n_tokens - 1;
const llama_token new_token_id = gpt_sampler_sample(app.ctx_sampling, app.ctx, idx, false);
std::string piece = llama_token_to_piece(app.ctx, new_token_id);
const llama_token new_token_id = common_sampler_sample(app.ctx_sampling, app.ctx, idx, false);
std::string piece = common_token_to_piece(app.ctx, new_token_id);
return json{
{"success", true},
{"piece", convert_string_to_int_arr(piece)},
Expand All @@ -504,7 +504,7 @@ json action_sampling_accept(app_t &app, json &body)
std::vector<llama_token> tokens_list = body["tokens"];
for (auto id : tokens_list)
{
gpt_sampler_accept(app.ctx_sampling, id, false);
common_sampler_accept(app.ctx_sampling, id, false);
}
return json{{"success", true}};
}
Expand Down Expand Up @@ -578,7 +578,7 @@ json action_embeddings(app_t &app, json &body)
return json{{"error", "failed to get embeddings"}};
}
}
llama_embd_normalize(embd, out, n_embd);
common_embd_normalize(embd, out, n_embd);
return json{
{"success", true},
{"embeddings", embeddings},
Expand Down
8 changes: 6 additions & 2 deletions examples/main/src/utils/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import { Message, Screen } from './types';
import { Wllama } from '@wllama/wllama';
import { DEFAULT_CHAT_TEMPLATE } from '../config';

const textDecoder = new TextDecoder();

export const delay = (ms: number) =>
new Promise((resolve) => setTimeout(resolve, ms));

Expand Down Expand Up @@ -39,10 +41,12 @@ export const formatChat = async (
const template = new Template(
modelWllama.getChatTemplate() ?? DEFAULT_CHAT_TEMPLATE
);
const bos_token: string = textDecoder.decode(await modelWllama.detokenize([modelWllama.getBOS()]));
const eos_token: string = textDecoder.decode(await modelWllama.detokenize([modelWllama.getEOS()]));
return template.render({
messages,
bos_token: await modelWllama.detokenize([modelWllama.getBOS()]),
eos_token: await modelWllama.detokenize([modelWllama.getEOS()]),
bos_token,
eos_token,
add_generation_prompt: true,
});
};
Expand Down
2 changes: 1 addition & 1 deletion llama.cpp
Submodule llama.cpp updated 129 files
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@wllama/wllama",
"version": "1.16.3",
"version": "1.16.4",
"description": "Low-level WASM binding for llama.cpp",
"main": "index.js",
"type": "module",
Expand Down
2 changes: 1 addition & 1 deletion src/multi-thread/wllama.js

Large diffs are not rendered by default.

Binary file modified src/multi-thread/wllama.wasm
Binary file not shown.
2 changes: 1 addition & 1 deletion src/single-thread/wllama.js

Large diffs are not rendered by default.

Binary file modified src/single-thread/wllama.wasm
Binary file not shown.

0 comments on commit b727c3c

Please sign in to comment.