From 95ec80cc9a48fb6d1ff3e58429ef86f417e7c50b Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Mon, 11 Sep 2023 10:53:20 -0700 Subject: [PATCH 1/2] the beginning of a painful migration --- README.md | 4 ++ bindings.cpp | 58 +++++++++++++++++ bindings.py | 30 +++++++++ setup.py | 38 +++++------ src/rds2py/{lib/parser.pyx => _Parser.py} | 0 src/rds2py/_cpphelpers.py | 39 +++++++++++ src/rds2py/lib/bindings.cpp | 58 +++++++++++++++++ src/rds2py/lib/{rds_parser.cpp => parser.cpp} | 64 ++++++++++--------- src/rds2py/lib/parser.pxd | 29 --------- 9 files changed, 240 insertions(+), 80 deletions(-) create mode 100644 bindings.cpp create mode 100644 bindings.py rename src/rds2py/{lib/parser.pyx => _Parser.py} (100%) create mode 100644 src/rds2py/_cpphelpers.py create mode 100644 src/rds2py/lib/bindings.cpp rename src/rds2py/lib/{rds_parser.cpp => parser.cpp} (83%) delete mode 100644 src/rds2py/lib/parser.pxd diff --git a/README.md b/README.md index dbeb3d1..ffc3a58 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,10 @@ For more examples converting `data.frame`, `dgCMatrix`, `dgRMatrix`, `dgTMatrix` This project uses Cython to provide bindings from C++ to Python. +```shell +cpptypes src/rds2py/lib --py src/rds2py/_cpphelpers.py --cpp src/rds2py/lib/bindings.cpp --dll _core +``` + Steps to setup dependencies - - git submodules is initialized in `extern/rds2cpp` diff --git a/bindings.cpp b/bindings.cpp new file mode 100644 index 0000000..6422873 --- /dev/null +++ b/bindings.cpp @@ -0,0 +1,58 @@ +/* DO NOT MODIFY: this is automatically generated by the cpptypes */ + +#include +#include +#include + +#ifdef _WIN32 +#define PYAPI __declspec(dllexport) +#else +#define PYAPI +#endif + +static char* copy_error_message(const char* original) { + auto n = std::strlen(original); + auto copy = new char[n + 1]; + std::strcpy(copy, original); + return copy; +} + +inline uintptr_t py_parser_extract_robject(uintptr_t); + +inline uintptr_t py_parser_rds_file(std::string); + +extern "C" { + +PYAPI void free_error_message(char** msg) { + delete [] *msg; +} + +PYAPI inline uintptr_t py_py_parser_extract_robject(uintptr_t ptr, int32_t* errcode, char** errmsg) { + inline uintptr_t output = 0; + try { + output = py_parser_extract_robject(ptr); + } catch(std::exception& e) { + *errcode = 1; + *errmsg = copy_error_message(e.what()); + } catch(...) { + *errcode = 1; + *errmsg = copy_error_message("unknown C++ exception"); + } + return output; +} + +PYAPI inline uintptr_t py_py_parser_rds_file(std::string file, int32_t* errcode, char** errmsg) { + inline uintptr_t output = 0; + try { + output = py_parser_rds_file(file); + } catch(std::exception& e) { + *errcode = 1; + *errmsg = copy_error_message(e.what()); + } catch(...) { + *errcode = 1; + *errmsg = copy_error_message("unknown C++ exception"); + } + return output; +} + +} diff --git a/bindings.py b/bindings.py new file mode 100644 index 0000000..08a78a5 --- /dev/null +++ b/bindings.py @@ -0,0 +1,30 @@ +# DO NOT MODIFY: this is automatically generated by the cpptypes + +import os +import ctypes as ct + +def _catch_errors(f): + def wrapper(*args): + errcode = ct.c_int32(0) + errmsg = ct.c_char_p(0) + output = f(*args, ct.byref(errcode), ct.byref(errmsg)) + if errcode.value != 0: + msg = errmsg.value.decode('ascii') + lib.free_error_message(errmsg) + raise RuntimeError(msg) + return output + return wrapper + +# TODO: surely there's a better way than whatever this is. +dirname = os.path.dirname(os.path.abspath(__file__)) +contents = os.listdir(dirname) +lib = None +for x in contents: + if x.startswith('core') and not x.endswith("py"): + lib = ct.CDLL(os.path.join(dirname, x)) + break + +if lib is None: + raise ImportError("failed to find the core.* module") + +lib.free_error_message.argtypes = [ ct.POINTER(ct.c_char_p) ] \ No newline at end of file diff --git a/setup.py b/setup.py index 36fd2c5..d43999d 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,6 @@ Learn more under: https://pyscaffold.org/ """ import numpy -from Cython.Build import cythonize from setuptools import setup from setuptools.extension import Extension @@ -13,26 +12,23 @@ try: setup( use_scm_version={"version_scheme": "no-guess-dev"}, - ext_modules=cythonize( - [ - Extension( - "rds2py.core", - ["src/rds2py/lib/rds_parser.cpp", "src/rds2py/lib/parser.pyx"], - include_dirs=[ - "extern/rds2cpp/include", - "extern/rds2cpp/_deps/byteme-src/include", - numpy.get_include(), - ], - language="c++", - extra_compile_args=[ - "-std=c++17", - ], - extra_link_args=["-lz"], - ) - ], - compiler_directives={"language_level": "3"}, - ), - ) + ext_modules=[ + Extension( + "rds2py.core", + ["src/rds2py/lib/rds_parser.cpp", "src/rds2py/lib/parser.pyx"], + include_dirs=[ + "extern/rds2cpp/include", + "extern/rds2cpp/_deps/byteme-src/include", + numpy.get_include(), + ], + language="c++", + extra_compile_args=[ + "-std=c++17", + ], + extra_link_args=["-lz"], + ) + ], + ), except: # noqa print( "\n\nAn error occurred while building the project, " diff --git a/src/rds2py/lib/parser.pyx b/src/rds2py/_Parser.py similarity index 100% rename from src/rds2py/lib/parser.pyx rename to src/rds2py/_Parser.py diff --git a/src/rds2py/_cpphelpers.py b/src/rds2py/_cpphelpers.py new file mode 100644 index 0000000..24404b7 --- /dev/null +++ b/src/rds2py/_cpphelpers.py @@ -0,0 +1,39 @@ +# DO NOT MODIFY: this is automatically generated by the cpptypes + +import os +import ctypes as ct + +def _catch_errors(f): + def wrapper(*args): + errcode = ct.c_int32(0) + errmsg = ct.c_char_p(0) + output = f(*args, ct.byref(errcode), ct.byref(errmsg)) + if errcode.value != 0: + msg = errmsg.value.decode('ascii') + lib.free_error_message(errmsg) + raise RuntimeError(msg) + return output + return wrapper + +# TODO: surely there's a better way than whatever this is. +dirname = os.path.dirname(os.path.abspath(__file__)) +contents = os.listdir(dirname) +lib = None +for x in contents: + if x.startswith('_core') and not x.endswith("py"): + lib = ct.CDLL(os.path.join(dirname, x)) + break + +if lib is None: + raise ImportError("failed to find the _core.* module") + +lib.free_error_message.argtypes = [ ct.POINTER(ct.c_char_p) ] + +lib.py_py_parser_extract_robject.restype = ct.c_void_p +lib.py_py_parser_extract_robject.argtypes = [ + ct.c_void_p, + ct.POINTER(ct.c_int32), + ct.POINTER(ct.c_char_p) +] + +lib.py_py_parser_rds_file.restype = ct.c_void_p diff --git a/src/rds2py/lib/bindings.cpp b/src/rds2py/lib/bindings.cpp new file mode 100644 index 0000000..ac90a12 --- /dev/null +++ b/src/rds2py/lib/bindings.cpp @@ -0,0 +1,58 @@ +/* DO NOT MODIFY: this is automatically generated by the cpptypes */ + +#include +#include +#include + +#ifdef _WIN32 +#define PYAPI __declspec(dllexport) +#else +#define PYAPI +#endif + +static char* copy_error_message(const char* original) { + auto n = std::strlen(original); + auto copy = new char[n + 1]; + std::strcpy(copy, original); + return copy; +} + +uintptr_t py_parser_extract_robject(uintptr_t); + +uintptr_t py_parser_rds_file(std::string); + +extern "C" { + +PYAPI void free_error_message(char** msg) { + delete [] *msg; +} + +PYAPI uintptr_t py_py_parser_extract_robject(uintptr_t ptr, int32_t* errcode, char** errmsg) { + uintptr_t output = 0; + try { + output = py_parser_extract_robject(ptr); + } catch(std::exception& e) { + *errcode = 1; + *errmsg = copy_error_message(e.what()); + } catch(...) { + *errcode = 1; + *errmsg = copy_error_message("unknown C++ exception"); + } + return output; +} + +PYAPI uintptr_t py_py_parser_rds_file(std::string file, int32_t* errcode, char** errmsg) { + uintptr_t output = 0; + try { + output = py_parser_rds_file(file); + } catch(std::exception& e) { + *errcode = 1; + *errmsg = copy_error_message(e.what()); + } catch(...) { + *errcode = 1; + *errmsg = copy_error_message("unknown C++ exception"); + } + return output; +} + +} diff --git a/src/rds2py/lib/rds_parser.cpp b/src/rds2py/lib/parser.cpp similarity index 83% rename from src/rds2py/lib/rds_parser.cpp rename to src/rds2py/lib/parser.cpp index 7537d1a..9540b00 100644 --- a/src/rds2py/lib/rds_parser.cpp +++ b/src/rds2py/lib/parser.cpp @@ -1,27 +1,32 @@ #include "rds2cpp/rds2cpp.hpp" #include -// Interface methods to Parser Object +/** Interface methods to parser object **/ -inline uintptr_t py_parser_rds_file(std::string file) { +//[[export]] +uintptr_t py_parser_rds_file(std::string file) { rds2cpp::Parsed res = rds2cpp::parse_rds(file); return reinterpret_cast(new rds2cpp::Parsed(std::move(res))); } -inline uintptr_t py_parser_extract_robject(uintptr_t ptr) { +//[[export]] +uintptr_t py_parser_extract_robject(uintptr_t ptr /** void_p */) { auto parsed = reinterpret_cast(ptr); return reinterpret_cast(parsed->object.get()); } -// probably don't need this, mostly for testing -inline void py_read_parsed_ptr(uintptr_t ptr) { +/** probably don't need this, mostly for testing **/ + + +void py_read_parsed_ptr(uintptr_t ptr /** void_p */) { auto parsed = reinterpret_cast(ptr); } -// Interface Methods to RObject +/** Interface Methods to RObject **/ -inline std::string py_robject_extract_type(uintptr_t ptr) { +//[export] +std::string py_robject_extract_type(uintptr_t ptr /** void_p */) { auto parsed = reinterpret_cast(ptr); switch (parsed->type()) { case rds2cpp::SEXPType::INT: @@ -50,7 +55,8 @@ int _size_(const rds2cpp::RObject* ptr) { return xptr->data.size(); } -inline int py_robject_extract_size(uintptr_t ptr) { +//[export] +int py_robject_extract_size(uintptr_t ptr /** void_p */) { auto parsed = reinterpret_cast(ptr); switch (parsed->type()) { case rds2cpp::SEXPType::INT: @@ -75,7 +81,8 @@ uintptr_t _get_vector_ptr(const rds2cpp::RObject* ptr) { return reinterpret_cast(xptr->data.data()); } -inline uintptr_t parse_robject_int_vector(uintptr_t ptr) { +//[export] +uintptr_t parse_robject_int_vector(uintptr_t ptr /** void_p */) { auto parsed = reinterpret_cast(ptr); switch (parsed->type()) { case rds2cpp::SEXPType::INT: @@ -91,19 +98,8 @@ inline uintptr_t parse_robject_int_vector(uintptr_t ptr) { return _get_vector_ptr(parsed); // avoid compiler warning. } -// inline uintptr_t parse_robject_double_vector(uintptr_t ptr) { -// auto parsed = reinterpret_cast(ptr); -// switch (parsed->type()) { -// case rds2cpp::SEXPType::REAL: -// return _get_vector_ptr(parsed); -// default: -// break; -// } -// throw std::runtime_error("cannot obtain numeric values for non-numeric RObject type"); -// return _get_vector_ptr(parsed); // avoid compiler warning. -// } - -inline std::vector parse_robject_string_vector(uintptr_t ptr) { +//[export] +std::vector parse_robject_string_vector(uintptr_t ptr /** void_p */) { auto parsed = reinterpret_cast(ptr); if (parsed->type() != rds2cpp::SEXPType::STR) { throw std::runtime_error("cannot return string values for non-string RObject type"); @@ -119,7 +115,8 @@ const rds2cpp::Attributes& _get_attr_ptr(const rds2cpp::RObject* ptr) { return aptr->attributes; } -inline std::vector parse_robject_attribute_names(uintptr_t ptr) { +//[export] +std::vector parse_robject_attribute_names(uintptr_t ptr /** void_p */) { auto parsed = reinterpret_cast(ptr); switch (parsed->type()) { case rds2cpp::SEXPType::INT: @@ -157,7 +154,8 @@ int _contains_attr_(const rds2cpp::RObject* ptr, const std::string& name) { return -1; } -inline int parse_robject_find_attribute(uintptr_t ptr, std::string name) { +//[export] +int parse_robject_find_attribute(uintptr_t ptr /** void_p */, std::string name) { auto parsed = reinterpret_cast(ptr); switch (parsed->type()) { case rds2cpp::SEXPType::INT: @@ -188,7 +186,8 @@ uintptr_t _load_attr_idx_(const rds2cpp::RObject* ptr, int i) { return reinterpret_cast(chosen.get()); } -inline uintptr_t parse_robject_load_attribute_by_index(uintptr_t ptr, int i) { +//[export] +uintptr_t parse_robject_load_attribute_by_index(uintptr_t ptr /** void_p */, int i) { auto parsed = reinterpret_cast(ptr); switch (parsed->type()) { case rds2cpp::SEXPType::INT: @@ -211,7 +210,8 @@ inline uintptr_t parse_robject_load_attribute_by_index(uintptr_t ptr, int i) { return _load_attr_idx_(parsed, i); // avoid compiler warnings. } -inline uintptr_t parse_robject_load_attribute_by_name(uintptr_t ptr, std::string name) { +//[export] +uintptr_t parse_robject_load_attribute_by_name(uintptr_t ptr /** void_p */, std::string name) { auto parsed = reinterpret_cast(ptr); int idx = parse_robject_find_attribute(ptr, name); if (idx < 0) { @@ -220,7 +220,8 @@ inline uintptr_t parse_robject_load_attribute_by_name(uintptr_t ptr, std::string return parse_robject_load_attribute_by_index(ptr, idx); } -inline uintptr_t parse_robject_load_vec_element(uintptr_t ptr, int i) { +//[export] +uintptr_t parse_robject_load_vec_element(uintptr_t ptr /** void_p */, int i) { auto parsed = reinterpret_cast(ptr); if (parsed->type() != rds2cpp::SEXPType::VEC) { throw std::runtime_error("cannot return list element for non-list R object"); @@ -229,7 +230,8 @@ inline uintptr_t parse_robject_load_vec_element(uintptr_t ptr, int i) { return reinterpret_cast(lptr->data[i].get()); } -inline std::string parse_robject_class_name(uintptr_t ptr) { +//[export] +std::string parse_robject_class_name(uintptr_t ptr /** void_p */) { auto parsed = reinterpret_cast(ptr); if (parsed->type() != rds2cpp::SEXPType::S4) { throw std::runtime_error("cannot return class name for non-S4 R object"); @@ -238,7 +240,8 @@ inline std::string parse_robject_class_name(uintptr_t ptr) { return sptr->class_name; } -inline std::string parse_robject_package_name(uintptr_t ptr) { +//[export] +std::string parse_robject_package_name(uintptr_t ptr /** void_p */) { auto parsed = reinterpret_cast(ptr); if (parsed->type() != rds2cpp::SEXPType::S4) { throw std::runtime_error("cannot return class name for non-S4 R object"); @@ -247,7 +250,8 @@ inline std::string parse_robject_package_name(uintptr_t ptr) { return sptr->package_name; } -inline std::pair parse_robject_dimensions(uintptr_t ptr) { +//[export] +std::pair parse_robject_dimensions(uintptr_t ptr /** void_p */) { auto dimobj = reinterpret_cast(ptr); if (dimobj->type() != rds2cpp::SEXPType::INT) { throw std::runtime_error("expected matrix dimensions to be integer"); diff --git a/src/rds2py/lib/parser.pxd b/src/rds2py/lib/parser.pxd deleted file mode 100644 index 7ac6cba..0000000 --- a/src/rds2py/lib/parser.pxd +++ /dev/null @@ -1,29 +0,0 @@ -# pretty basic Cython wrapper - -from libcpp.string cimport string as string_c -from libc.stdint cimport uintptr_t -from libcpp.vector cimport vector -from libcpp.utility cimport pair - -cdef extern from "rds_parser.cpp": - uintptr_t py_parser_rds_file(string_c file) nogil except + - uintptr_t py_parser_extract_robject(int ptr) nogil except + - - void py_read_parsed_ptr(uintptr_t ptr) nogil except + - - string_c py_robject_extract_type(uintptr_t ptr) nogil except + - int py_robject_extract_size(uintptr_t ptr) nogil except + - - uintptr_t parse_robject_int_vector(uintptr_t ptr) nogil except + - vector[string_c] parse_robject_string_vector(uintptr_t ptr) nogil except + - vector[string_c] parse_robject_attribute_names(uintptr_t ptr) nogil except + - - int parse_robject_find_attribute(uintptr_t ptr, string_c name) nogil except + - uintptr_t parse_robject_load_attribute_by_index(uintptr_t ptr, int i) nogil except + - uintptr_t parse_robject_load_attribute_by_name(uintptr_t ptr, string_c name) nogil except + - uintptr_t parse_robject_load_vec_element(uintptr_t ptr, int i) except + - - string_c parse_robject_class_name(uintptr_t ptr) nogil except + - string_c parse_robject_package_name(uintptr_t ptr) nogil except + - - pair[int, int] parse_robject_dimensions(uintptr_t ptr) nogil except + From faf71e778946351a90e2703b78e8ea2dd30dfe32 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 11 Sep 2023 17:53:34 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- bindings.py | 9 ++++++--- src/rds2py/_cpphelpers.py | 11 +++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/bindings.py b/bindings.py index 08a78a5..f2f24cf 100644 --- a/bindings.py +++ b/bindings.py @@ -3,28 +3,31 @@ import os import ctypes as ct + def _catch_errors(f): def wrapper(*args): errcode = ct.c_int32(0) errmsg = ct.c_char_p(0) output = f(*args, ct.byref(errcode), ct.byref(errmsg)) if errcode.value != 0: - msg = errmsg.value.decode('ascii') + msg = errmsg.value.decode("ascii") lib.free_error_message(errmsg) raise RuntimeError(msg) return output + return wrapper + # TODO: surely there's a better way than whatever this is. dirname = os.path.dirname(os.path.abspath(__file__)) contents = os.listdir(dirname) lib = None for x in contents: - if x.startswith('core') and not x.endswith("py"): + if x.startswith("core") and not x.endswith("py"): lib = ct.CDLL(os.path.join(dirname, x)) break if lib is None: raise ImportError("failed to find the core.* module") -lib.free_error_message.argtypes = [ ct.POINTER(ct.c_char_p) ] \ No newline at end of file +lib.free_error_message.argtypes = [ct.POINTER(ct.c_char_p)] diff --git a/src/rds2py/_cpphelpers.py b/src/rds2py/_cpphelpers.py index 24404b7..f482f88 100644 --- a/src/rds2py/_cpphelpers.py +++ b/src/rds2py/_cpphelpers.py @@ -3,37 +3,40 @@ import os import ctypes as ct + def _catch_errors(f): def wrapper(*args): errcode = ct.c_int32(0) errmsg = ct.c_char_p(0) output = f(*args, ct.byref(errcode), ct.byref(errmsg)) if errcode.value != 0: - msg = errmsg.value.decode('ascii') + msg = errmsg.value.decode("ascii") lib.free_error_message(errmsg) raise RuntimeError(msg) return output + return wrapper + # TODO: surely there's a better way than whatever this is. dirname = os.path.dirname(os.path.abspath(__file__)) contents = os.listdir(dirname) lib = None for x in contents: - if x.startswith('_core') and not x.endswith("py"): + if x.startswith("_core") and not x.endswith("py"): lib = ct.CDLL(os.path.join(dirname, x)) break if lib is None: raise ImportError("failed to find the _core.* module") -lib.free_error_message.argtypes = [ ct.POINTER(ct.c_char_p) ] +lib.free_error_message.argtypes = [ct.POINTER(ct.c_char_p)] lib.py_py_parser_extract_robject.restype = ct.c_void_p lib.py_py_parser_extract_robject.argtypes = [ ct.c_void_p, ct.POINTER(ct.c_int32), - ct.POINTER(ct.c_char_p) + ct.POINTER(ct.c_char_p), ] lib.py_py_parser_rds_file.restype = ct.c_void_p