diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc index f003a11525076..fc1d66ae244a3 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc @@ -17,6 +17,8 @@ #include "arrow/matlab/array/proxy/array.h" +#include "arrow/matlab/bit/bit_unpack_arrow_buffer.h" + namespace arrow::matlab::array::proxy { Array::Array(const libmexclass::proxy::FunctionArguments& constructor_arguments) { @@ -25,6 +27,7 @@ namespace arrow::matlab::array::proxy { REGISTER_METHOD(Array, toString); REGISTER_METHOD(Array, toMATLAB); REGISTER_METHOD(Array, length); + REGISTER_METHOD(Array, valid); } void Array::toString(libmexclass::proxy::method::Context& context) { @@ -40,4 +43,25 @@ namespace arrow::matlab::array::proxy { auto length_mda = factory.createScalar(array->length()); context.outputs[0] = length_mda; } + + void Array::valid(libmexclass::proxy::method::Context& context) { + auto array_length = static_cast(array->length()); + + // If the Arrow array has no null values, then return a MATLAB + // logical array that is all "true" for the validity bitmap. + if (array->null_count() == 0) { + ::matlab::data::ArrayFactory factory; + auto validity_buffer = factory.createBuffer(array_length); + auto validity_buffer_ptr = validity_buffer.get(); + std::fill(validity_buffer_ptr, validity_buffer_ptr + array_length, true); + auto valid_elements_mda = factory.createArrayFromBuffer({array_length, 1}, std::move(validity_buffer)); + context.outputs[0] = valid_elements_mda; + return; + } + + auto validity_bitmap = array->null_bitmap(); + auto valid_elements_mda = arrow::matlab::bit::bitUnpackArrowBuffer(validity_bitmap, array_length); + context.outputs[0] = valid_elements_mda; + } + } diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/array/proxy/array.h index a0ef0a94f38f3..0a69f6fcad900 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h @@ -35,6 +35,8 @@ class Array : public libmexclass::proxy::Proxy { void length(libmexclass::proxy::method::Context& context); + void valid(libmexclass::proxy::method::Context& context); + virtual void toMATLAB(libmexclass::proxy::method::Context& context) = 0; std::shared_ptr array; diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h index 2f2a9925c0c3f..ad2242a7559c2 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/numeric_array.h @@ -26,11 +26,19 @@ #include "arrow/type_traits.h" #include "arrow/matlab/array/proxy/array.h" +#include "arrow/matlab/bit/bit_pack_matlab_logical_array.h" #include "libmexclass/proxy/Proxy.h" namespace arrow::matlab::array::proxy { +namespace { +const uint8_t* getUnpackedValidityBitmap(const ::matlab::data::TypedArray& valid_elements) { + const auto valid_elements_iterator(valid_elements.cbegin()); + return reinterpret_cast(valid_elements_iterator.operator->()); +} +} // anonymous namespace + template class NumericArray : public arrow::matlab::array::proxy::Array { public: @@ -43,6 +51,8 @@ class NumericArray : public arrow::matlab::array::proxy::Array { const ::matlab::data::TypedArray numeric_mda = constructor_arguments[0]; const ::matlab::data::TypedArray make_copy = constructor_arguments[1]; + const auto has_validity_bitmap = constructor_arguments.getNumberOfElements() > 2; + // Get raw pointer of mxArray auto it(numeric_mda.cbegin()); auto dt = it.operator->(); @@ -50,8 +60,11 @@ class NumericArray : public arrow::matlab::array::proxy::Array { const auto make_deep_copy = make_copy[0]; if (make_deep_copy) { + // Get the unpacked validity bitmap (if it exists) + auto unpacked_validity_bitmap = has_validity_bitmap ? getUnpackedValidityBitmap(constructor_arguments[2]) : nullptr; + BuilderType builder; - auto st = builder.AppendValues(dt, numeric_mda.getNumberOfElements()); + auto st = builder.AppendValues(dt, numeric_mda.getNumberOfElements(), unpacked_validity_bitmap); // TODO: handle error case if (st.ok()) { @@ -68,12 +81,11 @@ class NumericArray : public arrow::matlab::array::proxy::Array { auto data_buffer = std::make_shared(reinterpret_cast(dt), sizeof(CType) * numeric_mda.getNumberOfElements()); - // TODO: Implement null support - std::shared_ptr null_buffer = nullptr; + // Pack the validity bitmap values. + auto packed_validity_bitmap = has_validity_bitmap ? arrow::matlab::bit::bitPackMatlabLogicalArray(constructor_arguments[2]).ValueOrDie() : nullptr; - auto array_data = arrow::ArrayData::Make(data_type, length, {null_buffer, data_buffer}); + auto array_data = arrow::ArrayData::Make(data_type, length, {packed_validity_bitmap, data_buffer}); array = arrow::MakeArray(array_data); - } } diff --git a/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc b/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc new file mode 100644 index 0000000000000..45c6e39347d9b --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include // std::ceil + +#include +#include + +#include "arrow/matlab/bit/bit_pack_matlab_logical_array.h" + +namespace arrow::matlab::bit { + + // Calculate the number of bytes required in the bit-packed validity buffer. + int64_t bitPackedLength(int64_t num_elements) { + // Since MATLAB logical values are encoded using a full byte (8 bits), + // we can divide the number of elements in the logical array by 8 to get + // the bit packed length. + return static_cast(std::ceil(num_elements / 8.0)); + } + + // Pack an unpacked MATLAB logical array into into a bit-packed arrow::Buffer. + arrow::Result> bitPackMatlabLogicalArray(const ::matlab::data::TypedArray matlab_logical_array) { + // Validate that the input arrow::Buffer has sufficient size to store a full bit-packed + // representation of the input MATLAB logical array. + const auto unpacked_buffer_length = matlab_logical_array.getNumberOfElements(); + + // Compute the bit packed length from the unpacked length. + const auto packed_buffer_length = bitPackedLength(unpacked_buffer_length); + + ARROW_ASSIGN_OR_RAISE(auto packed_validity_bitmap_buffer, arrow::AllocateResizableBuffer(packed_buffer_length)); + + // Get pointers to the internal uint8_t arrays behind arrow::Buffer and mxArray + // Get raw bool array pointer from MATLAB logical array. + // Get an iterator to the raw bool data behind the MATLAB logical array. + auto unpacked_bool_data_iterator = matlab_logical_array.cbegin(); + + // Iterate over the mxLogical array and write bit-packed bools to the arrow::Buffer. + // Call into a loop-unrolled Arrow utility for better performance when bit-packing. + auto generator = [&]() -> bool { return *(unpacked_bool_data_iterator++); }; + const int64_t start_offset = 0; + + auto mutable_data = packed_validity_bitmap_buffer->mutable_data(); + + arrow::internal::GenerateBitsUnrolled(mutable_data, start_offset, unpacked_buffer_length, generator); + + return packed_validity_bitmap_buffer; + } + +} diff --git a/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.h b/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.h new file mode 100644 index 0000000000000..cceb22a2f3139 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.h @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "MatlabDataArray.hpp" + +namespace arrow::matlab::bit { + // Calculate the number of bytes required in the bit-packed validity buffer. + int64_t bitPackedLength(int64_t num_elements); + // Pack an unpacked MATLAB logical array into into a bit-packed arrow::Buffer. + arrow::Result> bitPackMatlabLogicalArray(const ::matlab::data::TypedArray matlab_logical_array); +} diff --git a/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.cc b/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.cc new file mode 100644 index 0000000000000..a83cda8aca62d --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.cc @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/bit/bit_unpack_arrow_buffer.h" + +#include "arrow/util/bitmap_visit.h" + +namespace arrow::matlab::bit { + ::matlab::data::TypedArray bitUnpackArrowBuffer(const std::shared_ptr& packed_buffer, int64_t length) { + const auto packed_buffer_ptr = packed_buffer->data(); + + ::matlab::data::ArrayFactory factory; + + const auto array_length = static_cast(length); + + auto unpacked_buffer = factory.createBuffer(array_length); + auto unpacked_buffer_ptr = unpacked_buffer.get(); + auto visitFcn = [&](const bool is_valid) { *unpacked_buffer_ptr++ = is_valid; }; + + const int64_t start_offset = 0; + arrow::internal::VisitBitsUnrolled(packed_buffer_ptr, start_offset, length, visitFcn); + + ::matlab::data::TypedArray unpacked_matlab_logical_Array = factory.createArrayFromBuffer({array_length, 1}, std::move(unpacked_buffer)); + + return unpacked_matlab_logical_Array; + } +} diff --git a/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.h b/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.h new file mode 100644 index 0000000000000..9b88cb16de6ca --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.h @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/buffer.h" + +#include "MatlabDataArray.hpp" + +namespace arrow::matlab::bit { + ::matlab::data::TypedArray bitUnpackArrowBuffer(const std::shared_ptr& packed_buffer, int64_t length); +} diff --git a/matlab/src/matlab/+arrow/+array/Array.m b/matlab/src/matlab/+arrow/+array/Array.m index a1778b17c2aa8..c13c85167a4a2 100644 --- a/matlab/src/matlab/+arrow/+array/Array.m +++ b/matlab/src/matlab/+arrow/+array/Array.m @@ -24,6 +24,7 @@ properties (Dependent) Length + Valid % Validity bitmap end methods @@ -35,6 +36,10 @@ numElements = obj.Proxy.length(); end + function validElements = get.Valid(obj) + validElements = obj.Proxy.valid(); + end + function matlabArray = toMATLAB(obj) matlabArray = obj.Proxy.toMATLAB(); end diff --git a/matlab/src/matlab/+arrow/+array/Float64Array.m b/matlab/src/matlab/+arrow/+array/Float64Array.m index 0b74f5a45555f..841bbcc6e7950 100644 --- a/matlab/src/matlab/+arrow/+array/Float64Array.m +++ b/matlab/src/matlab/+arrow/+array/Float64Array.m @@ -18,6 +18,7 @@ properties (Hidden, SetAccess=private) MatlabArray + NullSubstitionValue = NaN; end methods @@ -29,13 +30,23 @@ validateattributes(data, "double", ["2d", "nonsparse", "real"]); if ~isempty(data), validateattributes(data, "double", "vector"); end - obj@arrow.array.Array("Name", "arrow.array.proxy.Float64Array", "ConstructorArguments", {data, opts.DeepCopy}); + % Extract missing (i.e. null) values. + % TODO: Determine a more robust approach to handling "detection" of null values. + % For example - add a name-value pair to allow clients to choose which values + % should be considered null (if any). + validElements = ~isnan(data); + obj@arrow.array.Array("Name", "arrow.array.proxy.Float64Array", "ConstructorArguments", {data, opts.DeepCopy, validElements}); % Store a reference to the array if not doing a deep copy if (~opts.DeepCopy), obj.MatlabArray = data; end end function data = double(obj) - data = obj.Proxy.toMATLAB(); + data = obj.toMATLAB(); + end + + function matlabArray = toMATLAB(obj) + matlabArray = obj.Proxy.toMATLAB(); + matlabArray(~obj.Valid) = obj.NullSubstitionValue; end end end diff --git a/matlab/test/arrow/array/tFloat64Array.m b/matlab/test/arrow/array/tFloat64Array.m index 6bd84d8f67e1c..b166fd3195ec7 100755 --- a/matlab/test/arrow/array/tFloat64Array.m +++ b/matlab/test/arrow/array/tFloat64Array.m @@ -37,5 +37,48 @@ function ErrorIfSparse(testCase, MakeDeepCopy) fcn = @() arrow.array.Float64Array(sparse(ones([10 1])), DeepCopy=MakeDeepCopy); testCase.verifyError(fcn, "MATLAB:expectedNonsparse"); end + + function ValidBasic(testCase, MakeDeepCopy) + % Create a MATLAB array with one null value (i.e. one NaN). + matlabArray = [1, NaN, 3]'; + arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy); + expectedValid = [true, false, true]'; + testCase.verifyEqual(arrowArray.Valid, expectedValid); + end + + function ValidNoNulls(testCase, MakeDeepCopy) + % Create a MATLAB array with no null values (i.e. no NaNs). + matlabArray = [1, 2, 3]'; + arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy); + expectedValid = [true, true, true]'; + testCase.verifyEqual(arrowArray.Valid, expectedValid); + end + + function ValidAllNulls(testCase, MakeDeepCopy) + % Create a MATLAB array with all null values (i.e. all NaNs). + matlabArray = [NaN, NaN, NaN]'; + arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy); + expectedValid = [false, false, false]'; + testCase.verifyEqual(arrowArray.Valid, expectedValid); + end + + function ValidEmpty(testCase, MakeDeepCopy) + % Create an empty 0x0 MATLAB array. + matlabArray = double.empty(0, 0); + arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy); + expectedValid = logical.empty(0, 1); + testCase.verifyEqual(arrowArray.Valid, expectedValid); + + % Create an empty 0x1 MATLAB array. + matlabArray = double.empty(0, 1); + arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy); + testCase.verifyEqual(arrowArray.Valid, expectedValid); + + % Create an empty 1x0 MATLAB array. + matlabArray = double.empty(1, 0); + arrowArray = arrow.array.Float64Array(matlabArray, DeepCopy=MakeDeepCopy); + testCase.verifyEqual(arrowArray.Valid, expectedValid); + end + end end diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 92ed955ed4e0d..0dda3fb770997 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -33,8 +33,11 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_SOURCE_SUBDIR "libmexclass/cpp set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_NAME arrowproxy) set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_ROOT_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp") -set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy") -set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc") +set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit") +set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/bit_pack_matlab_logical_array.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/bit_unpack_arrow_buffer.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy/factory.cc")