Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update llama.cpp to b2757 #101

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rllm/llama-cpp-low/llama.cpp
Submodule llama.cpp updated 330 files
10 changes: 5 additions & 5 deletions rllm/llama-cpp-low/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ impl Default for ModelParams {
}

pub enum SplitMode {
None = llama_split_mode_LLAMA_SPLIT_NONE as isize,
None = llama_split_mode_LLAMA_SPLIT_MODE_NONE as isize,
/// split layers and KV across GPUs
Layer = llama_split_mode_LLAMA_SPLIT_LAYER as isize,
Layer = llama_split_mode_LLAMA_SPLIT_MODE_LAYER as isize,
/// split rows across GPUs
Row = llama_split_mode_LLAMA_SPLIT_ROW as isize,
Row = llama_split_mode_LLAMA_SPLIT_MODE_ROW as isize,
}

impl ModelParams {
Expand Down Expand Up @@ -170,8 +170,7 @@ impl Model {
pub fn from_file(file: &str, mparams: ModelParams) -> Result<Self> {
unsafe {
llama_log_set(Some(llama_log), std::ptr::null_mut());
let numa = false;
llama_backend_init(numa); // TODO: only call this once?
llama_backend_init();
let c = CString::new(file).unwrap();
let model = llama_load_model_from_file(c.as_ptr(), mparams);
if model == std::ptr::null_mut() {
Expand Down Expand Up @@ -242,6 +241,7 @@ impl Model {
token as i32,
res.as_mut_ptr() as *mut c_char,
res.len() as i32,
false,
)
});
if ntok < 0 {
Expand Down
Loading