Skip to content

Commit

Permalink
Add numerical C value store
Browse files Browse the repository at this point in the history
  • Loading branch information
weiliw-amz committed Sep 28, 2023
1 parent c3bccd5 commit d4694fa
Show file tree
Hide file tree
Showing 5 changed files with 595 additions and 0 deletions.
85 changes: 85 additions & 0 deletions pecos/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ def __init__(self, dirname, soname, forced_rebuild=False):
self.link_tfidf_vectorizer()
self.link_ann_hnsw_methods()
self.link_mmap_hashmap_methods()
self.link_mmap_valstore_methods()

def link_xlinear_methods(self):
"""
Expand Down Expand Up @@ -1799,5 +1800,89 @@ def mmap_hashmap_init(self, map_type):
raise NotImplementedError(f"map_type={map_type} is not implemented.")
return self.mmap_map_fn_dict[map_type]

def _get_num_f32_mmap_valstore_methods(self):
"""
Specify C-lib's numerical float32 Memory-mappable store methods arguments and return types.
"""
fn_prefix = "mmap_valstore"
map_type = "float32"

local_fn_dict = {}

fn_name = "new"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], c_void_p, None)

fn_name = "destruct"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], None, [c_void_p])

fn_name = "n_row"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], c_uint64, None)

fn_name = "n_col"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], c_uint32, None)

fn_name = "save"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], None, [c_void_p, c_char_p])

fn_name = "load"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(local_fn_dict[fn_name], c_void_p, [c_char_p, c_bool])

fn_name = "from_vals"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(
local_fn_dict[fn_name], None, [c_void_p, c_uint64, c_uint32, POINTER(c_float)]
)

fn_name = "get_submatrix"
local_fn_dict[fn_name] = getattr(self.clib_float32, f"{fn_prefix}_{fn_name}_{map_type}")
corelib.fillprototype(
local_fn_dict[fn_name],
None,
[
c_void_p,
c_uint32,
c_uint32,
POINTER(c_uint64),
POINTER(c_uint32),
POINTER(c_float),
c_uint32,
],
)

return local_fn_dict

def _get_str_mmap_valstore_methods(self):
"""
Specify C-lib's numerical Memory-mappable value store methods arguments and return types.
"""
return {}

def link_mmap_valstore_methods(self):
"""
Specify C-lib's Memory-mappable value store methods arguments and return types.
"""

self.mmap_valstore_fn_dict = {
"num_f32": self._get_num_f32_mmap_valstore_methods(),
"string": self._get_str_mmap_valstore_methods(),
}

def mmap_valstore_init(self, store_type):
"""Python to C/C++ interface for Memory-mappable store initialization
Args:
store_type (string): Type of store.
Returns:
mmap_valstore_fn_dict (dict): a dictionary that holds clib's C/C++ functions for Python to call
"""
if store_type not in self.mmap_valstore_fn_dict:
raise NotImplementedError(f"store_type={store_type} is not implemented.")
return self.mmap_valstore_fn_dict[store_type]


clib = corelib(os.path.join(os.path.dirname(os.path.abspath(pecos.__file__)), "core"), "libpecos")
68 changes: 68 additions & 0 deletions pecos/core/libpecos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "utils/clustering.hpp"
#include "utils/matrix.hpp"
#include "utils/mmap_hashmap.hpp"
#include "utils/mmap_valstore.hpp"
#include "utils/tfidf.hpp"
#include "utils/parallel.hpp"
#include "xmc/inference.hpp"
Expand Down Expand Up @@ -550,4 +551,71 @@ extern "C" {
return static_cast<mmap_hashmap_ ## SUFFIX *>(map_ptr)->contains(FUNC_CALL_KEY); }
MMAP_MAP_CONTAINS(str2int, KEY_SINGLE_ARG(const char* key, uint32_t key_len), KEY_SINGLE_ARG(key, key_len))
MMAP_MAP_CONTAINS(int2int, uint64_t key, key)


// ==== C Interface of Memory-mappable Value Store ====

typedef pecos::mmap_valstore::Float32Store mmap_valstore_float32;

// New
#define MMAP_VALSTORE_NEW(SUFFIX) \
void* mmap_valstore_new_ ## SUFFIX () { \
return static_cast<void*>(new mmap_valstore_ ## SUFFIX()); }
MMAP_VALSTORE_NEW(float32)

// Destruct
#define MMAP_VALSTORE_DESTRUCT(SUFFIX) \
void mmap_valstore_destruct_ ## SUFFIX (void* map_ptr) { \
delete static_cast<mmap_valstore_ ## SUFFIX *>(map_ptr); }
MMAP_VALSTORE_DESTRUCT(float32)

// Number of rows
#define MMAP_MAP_N_ROW(SUFFIX) \
size_t mmap_valstore_n_row_ ## SUFFIX (void* map_ptr) { \
return static_cast<mmap_valstore_ ## SUFFIX *>(map_ptr)->n_row(); }
MMAP_MAP_N_ROW(float32)

// Number of columns
#define MMAP_MAP_N_COL(SUFFIX) \
size_t mmap_valstore_n_col_ ## SUFFIX (void* map_ptr) { \
return static_cast<mmap_valstore_ ## SUFFIX *>(map_ptr)->n_col(); }
MMAP_MAP_N_COL(float32)

// Save
#define MMAP_VALSTORE_SAVE(SUFFIX) \
void mmap_valstore_save_ ## SUFFIX (void* map_ptr, const char* map_dir) { \
static_cast<mmap_valstore_ ## SUFFIX *>(map_ptr)->save(map_dir); }
MMAP_VALSTORE_SAVE(float32)

// Load
#define MMAP_VALSTORE_LOAD(SUFFIX) \
void* mmap_valstore_load_ ## SUFFIX (const char* map_dir, const bool lazy_load) { \
mmap_valstore_ ## SUFFIX * map_ptr = new mmap_valstore_ ## SUFFIX(); \
map_ptr->load(map_dir, lazy_load); \
return static_cast<void *>(map_ptr); }
MMAP_VALSTORE_LOAD(float32)

// Create view from external values pointer
void mmap_valstore_from_vals_float32 (
void* map_ptr,
const mmap_valstore_float32::row_type n_row,
const mmap_valstore_float32::col_type n_col,
const mmap_valstore_float32::value_type* vals
) {
static_cast<mmap_valstore_float32 *>(map_ptr)->from_vals(n_row, n_col, vals);
}

// Get sub-matrix
void mmap_valstore_get_submatrix_float32 (
void* map_ptr,
const uint32_t n_sub_row,
const uint32_t n_sub_col,
const mmap_valstore_float32::row_type* sub_rows,
const mmap_valstore_float32::col_type* sub_cols,
mmap_valstore_float32::value_type* ret,
const int threads
) {
static_cast<mmap_valstore_float32 *>(map_ptr)->get_submatrix(
n_sub_row, n_sub_col, sub_rows, sub_cols, ret, threads);
}
}
100 changes: 100 additions & 0 deletions pecos/core/utils/mmap_valstore.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
* with the License. A copy of the License is located at
*
* http://aws.amazon.com/apache2.0/
*
* or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
* OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions
* and limitations under the License.
*/

#ifndef __MMAP_VALSTORE_H__
#define __MMAP_VALSTORE_H__

#include <omp.h>
#include "mmap_util.hpp"


namespace pecos {
namespace mmap_valstore {


class Float32Store {
typedef float float32_t;

public:
typedef float32_t value_type;
typedef uint32_t col_type;
typedef uint64_t row_type;

Float32Store():
n_row_(0),
n_col_(0),
vals_(nullptr)
{}

row_type n_row() {
return n_row_;
}

col_type n_col() {
return n_col_;
}

// View from external values pointer, does not hold memory
void from_vals(const row_type n_row, const col_type n_col, const value_type* vals) {
n_row_ = n_row;
n_col_ = n_col;
vals_ = vals;
}

void get_submatrix(const uint32_t n_sub_row, const uint32_t n_sub_col, const row_type* sub_rows, const col_type* sub_cols, value_type* ret, const int threads=1) {
#pragma omp parallel for schedule(static, 1) num_threads(threads)
for (uint32_t i=0; i<n_sub_row; ++i) {
for (uint32_t j=0; j<n_sub_col; ++j) {
ret[i * n_sub_col + j] = vals_[sub_rows[i] * n_col_ + sub_cols[j]];
}
}
}

void save(const std::string& folderpath) {
auto mmap_s = pecos::mmap_util::MmapStore();
mmap_s.open(mmap_file_name(folderpath), "w");

mmap_s.fput_one<row_type>(n_row_);
mmap_s.fput_one<col_type>(n_col_);
mmap_s.fput_multiple<value_type>(vals_, n_row_ * n_col_);

mmap_s.close();
}

void load(const std::string& folderpath, const bool lazy_load) {
mmap_store_.open(mmap_file_name(folderpath), lazy_load?"r_lazy":"r");

n_row_ = mmap_store_.fget_one<row_type>();
n_col_ = mmap_store_.fget_one<col_type>();
vals_ = mmap_store_.fget_multiple<value_type>(n_row_ * n_col_);
}


private:
row_type n_row_;
col_type n_col_;
const value_type* vals_;

pecos::mmap_util::MmapStore mmap_store_;

inline std::string mmap_file_name(const std::string& folderpath) const {
return folderpath + "/numerical_float32_2d.mmap_store";
}
};



} // end namespace mmap_valstore
} // end namespace pecos

#endif // end of __MMAP_VALSTORE_H__
Loading

0 comments on commit d4694fa

Please sign in to comment.