From 97ef4faadfa3162000dcf108f0c9f40ba96b9b38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 26 Jun 2024 18:47:25 +0200 Subject: [PATCH] Fix test collection and add corresponding marks for numpy --- ci/scripts/python_test.sh | 2 +- docker-compose.yml | 7 - .../tests/interchange/test_conversion.py | 6 +- .../interchange/test_interchange_spec.py | 7 +- python/pyarrow/tests/parquet/common.py | 5 +- python/pyarrow/tests/parquet/test_basic.py | 5 +- .../pyarrow/tests/parquet/test_data_types.py | 7 +- python/pyarrow/tests/parquet/test_dataset.py | 6 +- python/pyarrow/tests/parquet/test_datetime.py | 5 +- python/pyarrow/tests/parquet/test_metadata.py | 6 +- python/pyarrow/tests/parquet/test_pandas.py | 5 +- .../pyarrow/tests/test_adhoc_memory_leak.py | 5 +- python/pyarrow/tests/test_builder.py | 14 +- python/pyarrow/tests/test_cython.py | 2 + .../pyarrow/tests/test_dataset_encryption.py | 6 +- python/pyarrow/tests/test_dlpack.py | 5 +- python/pyarrow/tests/test_extension_type.py | 9 +- python/pyarrow/tests/test_feather.py | 9 +- python/pyarrow/tests/test_io.py | 23 ++- python/pyarrow/tests/test_ipc.py | 26 +++- python/pyarrow/tests/test_json.py | 8 +- python/pyarrow/tests/test_pandas.py | 7 +- python/pyarrow/tests/test_scalars.py | 5 +- python/pyarrow/tests/test_schema.py | 6 +- python/pyarrow/tests/test_sparse_tensor.py | 5 +- python/pyarrow/tests/test_substrait.py | 2 + python/pyarrow/tests/test_table.py | 132 ++++++++++-------- python/pyarrow/tests/test_tensor.py | 5 +- python/pyarrow/tests/test_types.py | 8 +- python/pyarrow/tests/test_udf.py | 13 +- 30 files changed, 248 insertions(+), 103 deletions(-) diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh index d79121b0c4259..f6b9b0d7cabaf 100755 --- a/ci/scripts/python_test.sh +++ b/ci/scripts/python_test.sh @@ -69,4 +69,4 @@ export PYARROW_TEST_PARQUET_ENCRYPTION export PYARROW_TEST_S3 # Testing PyArrow -pytest -r s ${PYTEST_ARGS} --pyargs ${PYTEST_PYARGS:-'pyarrow'} +pytest -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/docker-compose.yml b/docker-compose.yml index 82915451306c9..2be002e716889 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1243,13 +1243,6 @@ services: environment: <<: [*common, *ccache, *sccache] PARQUET_REQUIRE_ENCRYPTION: # inherit - # Some tests fail to be collected due to numpy fixtures. - # That is why we just collect tests from the following modules. - PYTEST_PYARGS: "pyarrow.tests.test_array - pyarrow.tests.test_compute - pyarrow.tests.test_dataset - pyarrow.tests.test_flight - pyarrow.tests.test_without_numpy" HYPOTHESIS_PROFILE: # inherit PYARROW_TEST_HYPOTHESIS: # inherit volumes: *conda-volumes diff --git a/python/pyarrow/tests/interchange/test_conversion.py b/python/pyarrow/tests/interchange/test_conversion.py index 6d91bad57cef4..a55b1fe3316ac 100644 --- a/python/pyarrow/tests/interchange/test_conversion.py +++ b/python/pyarrow/tests/interchange/test_conversion.py @@ -16,11 +16,15 @@ # under the License. from datetime import datetime as dt -import numpy as np import pyarrow as pa from pyarrow.vendored.version import Version import pytest +try: + import numpy as np +except ImportError: + pytest.skip(reason="Failures on test collection due to numpy NOT enabled", allow_module_level=True) + import pyarrow.interchange as pi from pyarrow.interchange.column import ( _PyArrowColumn, diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py index 826089652bca6..f79bf1426c910 100644 --- a/python/pyarrow/tests/interchange/test_interchange_spec.py +++ b/python/pyarrow/tests/interchange/test_interchange_spec.py @@ -19,10 +19,13 @@ import hypothesis as h import hypothesis.strategies as st -import numpy as np +import pytest +try: + import numpy as np +except ImportError: + pytest.skip(reason="Failures on test collection due to numpy NOT enabled", allow_module_level=True) import pyarrow as pa import pyarrow.tests.strategies as past -import pytest all_types = st.deferred( diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py index b4a57ba0b1556..fd6ad94fbd6d3 100644 --- a/python/pyarrow/tests/parquet/common.py +++ b/python/pyarrow/tests/parquet/common.py @@ -17,7 +17,10 @@ import io -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pyarrow as pa from pyarrow.tests import util diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py index 56b967a0595b8..f23ef8a82e8bb 100644 --- a/python/pyarrow/tests/parquet/test_basic.py +++ b/python/pyarrow/tests/parquet/test_basic.py @@ -20,7 +20,10 @@ import warnings from shutil import copytree -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pytest import pyarrow as pa diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py index e6b66b00428fb..18c244894a0a2 100644 --- a/python/pyarrow/tests/parquet/test_data_types.py +++ b/python/pyarrow/tests/parquet/test_data_types.py @@ -18,7 +18,10 @@ import decimal import io -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pytest import pyarrow as pa @@ -173,6 +176,7 @@ def test_direct_read_dictionary_subfield(): assert result[0].num_chunks == 1 +@pytest.mark.numpy def test_dictionary_array_automatically_read(): # ARROW-3246 @@ -331,6 +335,7 @@ def test_column_of_lists(tempdir): tm.assert_frame_equal(df, df_read) +@pytest.mark.numpy def test_large_list_records(): # This was fixed in PARQUET-1100 diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index 47e608a1404ff..849f4180caed0 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -20,7 +20,10 @@ import os import pathlib -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pytest import unittest.mock as mock @@ -1153,6 +1156,7 @@ def test_partitioned_dataset(tempdir): pq.write_table(table, path / "output.parquet") +@pytest.mark.numpy def test_dataset_read_dictionary(tempdir): path = tempdir / "ARROW-3325-dataset" t1 = pa.table([[util.rands(10) for i in range(5)] * 10], names=['f0']) diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py index 08fb1098322be..b89fd97cb91e6 100644 --- a/python/pyarrow/tests/parquet/test_datetime.py +++ b/python/pyarrow/tests/parquet/test_datetime.py @@ -19,7 +19,10 @@ import io import warnings -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pytest import pyarrow as pa diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py index 52ab59a961b3e..b9aac3b9e3ef0 100644 --- a/python/pyarrow/tests/parquet/test_metadata.py +++ b/python/pyarrow/tests/parquet/test_metadata.py @@ -20,7 +20,10 @@ from collections import OrderedDict import io -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pytest import pyarrow as pa @@ -579,6 +582,7 @@ def test_write_metadata(tempdir): ) +@pytest.mark.numpy def test_table_large_metadata(): # ARROW-8694 my_schema = pa.schema([pa.field('f0', 'double')], diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py index b5913bf5c6b6e..2ea2f46873aef 100644 --- a/python/pyarrow/tests/parquet/test_pandas.py +++ b/python/pyarrow/tests/parquet/test_pandas.py @@ -18,7 +18,10 @@ import io import json -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pytest import pyarrow as pa diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py index cd381cf427dc3..76a766984dab6 100644 --- a/python/pyarrow/tests/test_adhoc_memory_leak.py +++ b/python/pyarrow/tests/test_adhoc_memory_leak.py @@ -17,7 +17,10 @@ import pytest -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pyarrow as pa import pyarrow.tests.util as test_util diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py index aea4619a5363e..9187a19b5fc24 100644 --- a/python/pyarrow/tests/test_builder.py +++ b/python/pyarrow/tests/test_builder.py @@ -15,13 +15,9 @@ # specific language governing permissions and limitations # under the License. +import math import weakref -try: - import numpy as np -except ImportError: - np = None - import pyarrow as pa from pyarrow.lib import StringBuilder, StringViewBuilder @@ -38,7 +34,7 @@ def test_string_builder_append(): sbuilder = StringBuilder() sbuilder.append(b"a byte string") sbuilder.append("a string") - sbuilder.append(np.nan) + sbuilder.append(math.nan) sbuilder.append(None) assert len(sbuilder) == 4 assert sbuilder.null_count == 2 @@ -53,7 +49,7 @@ def test_string_builder_append(): def test_string_builder_append_values(): sbuilder = StringBuilder() - sbuilder.append_values([np.nan, None, "text", None, "other text"]) + sbuilder.append_values([math.nan, None, "text", None, "other text"]) assert sbuilder.null_count == 3 arr = sbuilder.finish() assert arr.null_count == 3 @@ -63,7 +59,7 @@ def test_string_builder_append_values(): def test_string_builder_append_after_finish(): sbuilder = StringBuilder() - sbuilder.append_values([np.nan, None, "text", None, "other text"]) + sbuilder.append_values([math.nan, None, "text", None, "other text"]) arr = sbuilder.finish() sbuilder.append("No effect") expected = [None, None, "text", None, "other text"] @@ -75,7 +71,7 @@ def test_string_view_builder(): builder.append(b"a byte string") builder.append("a string") builder.append("a longer not-inlined string") - builder.append(np.nan) + builder.append(math.nan) builder.append_values([None, "text"]) assert len(builder) == 6 assert builder.null_count == 2 diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py index 0eeae5d65f7d5..483aed673d7ec 100644 --- a/python/pyarrow/tests/test_cython.py +++ b/python/pyarrow/tests/test_cython.py @@ -80,6 +80,7 @@ def check_cython_example_module(mod): mod.cast_scalar(scal, pa.list_(pa.int64())) +@pytest.mark.numpy @pytest.mark.cython def test_cython_api(tmpdir): """ @@ -162,6 +163,7 @@ def test_cython_api(tmpdir): env=subprocess_env) +@pytest.mark.numpy @pytest.mark.cython def test_visit_strings(tmpdir): with tmpdir.as_cwd(): diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py index 0d8b4a152ab9f..6f5eb8b15fb64 100644 --- a/python/pyarrow/tests/test_dataset_encryption.py +++ b/python/pyarrow/tests/test_dataset_encryption.py @@ -17,7 +17,10 @@ import base64 from datetime import timedelta -import numpy as np +try: + import numpy as np +except ImportError: + np=None import pyarrow.fs as fs import pyarrow as pa @@ -170,6 +173,7 @@ def test_write_dataset_parquet_without_encryption(): _ = pformat.make_write_options(encryption_config="some value") +@pytest.mark.numpy @pytest.mark.skipif( encryption_unavailable, reason="Parquet Encryption is not currently enabled" ) diff --git a/python/pyarrow/tests/test_dlpack.py b/python/pyarrow/tests/test_dlpack.py index 7cf3f4acdbd40..468d51106b643 100644 --- a/python/pyarrow/tests/test_dlpack.py +++ b/python/pyarrow/tests/test_dlpack.py @@ -19,7 +19,10 @@ from functools import wraps import pytest -import numpy as np +try: + import numpy as np +except ImportError: + pytest.skip(reason="Failures on test collection due to numpy NOT enabled", allow_module_level=True) import pyarrow as pa from pyarrow.vendored.version import Version diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 1c4d0175a2d97..51e97e832ce2a 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -23,12 +23,15 @@ from uuid import uuid4, UUID import sys -import numpy as np +import pytest +try: + import numpy as np +except ImportError: + pytest.skip(reason="Failures on test collection due to numpy NOT enabled", allow_module_level=True) + import pyarrow as pa from pyarrow.vendored.version import Version -import pytest - @contextlib.contextmanager def registered_extension_type(ext_type): diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py index 0064006489088..9ed1cfc03e0d6 100644 --- a/python/pyarrow/tests/test_feather.py +++ b/python/pyarrow/tests/test_feather.py @@ -23,7 +23,10 @@ import hypothesis as h import hypothesis.strategies as st -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pyarrow as pa import pyarrow.tests.strategies as past @@ -135,6 +138,7 @@ def f(): pytest.raises(exc, f) +@pytest.mark.numpy def test_dataset(version): num_values = (100, 100) num_files = 5 @@ -354,6 +358,7 @@ def test_buffer_bounds_error(version): _check_arrow_roundtrip(table) +@pytest.mark.numpy def test_boolean_object_nulls(version): repeats = 100 table = pa.Table.from_arrays( @@ -540,6 +545,7 @@ def test_read_columns(version): columns=['boo', 'woo']) +@pytest.mark.numpy def test_overwritten_file(version): path = random_path() TEST_FILES.append(path) @@ -675,6 +681,7 @@ def test_v2_compression_options(): write_feather(df, buf, compression='snappy') +@pytest.mark.numpy def test_v2_lz4_default_compression(): # ARROW-8750: Make sure that the compression=None option selects lz4 if # it's available diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index 2306014c4194a..669cd5cfc3e07 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -29,7 +29,10 @@ import tempfile import weakref -import numpy as np +try: + import numpy as np +except ImportError: + np = None from pyarrow.util import guid from pyarrow import Codec @@ -464,6 +467,7 @@ def test_buffer_hex(val, expected_hex_buffer): assert buf.hex() == expected_hex_buffer +@pytest.mark.numpy def test_buffer_to_numpy(): # Make sure creating a numpy array from an arrow buffer works byte_array = bytearray(20) @@ -476,6 +480,7 @@ def test_buffer_to_numpy(): assert array.base == buf +@pytest.mark.numpy def test_buffer_from_numpy(): # C-contiguous arr = np.arange(12, dtype=np.int8).reshape((3, 4)) @@ -493,6 +498,7 @@ def test_buffer_from_numpy(): buf = pa.py_buffer(arr.T[::2]) +@pytest.mark.numpy def test_buffer_address(): b1 = b'some data!' b2 = bytearray(b1) @@ -513,6 +519,7 @@ def test_buffer_address(): assert buf.address == arr.ctypes.data +@pytest.mark.numpy def test_buffer_equals(): # Buffer.equals() returns true iff the buffers have the same contents def eq(a, b): @@ -624,6 +631,7 @@ def test_buffer_hashing(): hash(pa.py_buffer(b'123')) +@pytest.mark.numpy def test_buffer_protocol_respects_immutability(): # ARROW-3228; NumPy's frombuffer ctor determines whether a buffer-like # object is mutable by first attempting to get a mutable buffer using @@ -635,6 +643,7 @@ def test_buffer_protocol_respects_immutability(): assert not numpy_ref.flags.writeable +@pytest.mark.numpy def test_foreign_buffer(): obj = np.array([1, 2], dtype=np.int32) addr = obj.__array_interface__["data"][0] @@ -669,6 +678,7 @@ def test_allocate_buffer_resizable(): assert buf.size == 200 +@pytest.mark.numpy def test_non_cpu_buffer(pickle_module): cuda = pytest.importorskip("pyarrow.cuda") ctx = cuda.Context(0) @@ -798,6 +808,7 @@ def test_cache_options_pickling(pickle_module): assert pickle_module.loads(pickle_module.dumps(option)) == option +@pytest.mark.numpy @pytest.mark.parametrize("compression", [ pytest.param( "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError) @@ -838,6 +849,7 @@ def test_compress_decompress(compression): pa.decompress(compressed_bytes, codec=compression) +@pytest.mark.numpy @pytest.mark.parametrize("compression", [ pytest.param( "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError) @@ -996,6 +1008,7 @@ def make_buffer(bytes_obj): assert refcount_before == sys.getrefcount(val) +@pytest.mark.numpy def test_nativefile_write_memoryview(): f = pa.BufferOutputStream() data = b'ok' @@ -1106,11 +1119,13 @@ def _check_native_file_reader(FACTORY, sample_data, assert f.tell() == ex_length +@pytest.mark.numpy def test_memory_map_reader(sample_disk_data): _check_native_file_reader(pa.memory_map, sample_disk_data, allow_read_out_of_bounds=False) +@pytest.mark.numpy def test_memory_map_retain_buffer_reference(sample_disk_data): path, data = sample_disk_data @@ -1127,6 +1142,7 @@ def test_memory_map_retain_buffer_reference(sample_disk_data): assert buf.to_pybytes() == expected +@pytest.mark.numpy def test_os_file_reader(sample_disk_data): _check_native_file_reader(pa.OSFile, sample_disk_data) @@ -1142,6 +1158,7 @@ def _try_delete(path): pass +@pytest.mark.numpy def test_memory_map_writer(tmpdir): SIZE = 4096 arr = np.random.randint(0, 256, size=SIZE).astype('u1') @@ -1183,6 +1200,7 @@ def test_memory_map_writer(tmpdir): assert f.read(3) == b'foo' +@pytest.mark.numpy def test_memory_map_resize(tmpdir): SIZE = 4096 arr = np.random.randint(0, 256, size=SIZE).astype(np.uint8) @@ -1237,6 +1255,7 @@ def test_memory_map_deref_remove(tmpdir): os.remove(path) # Shouldn't fail +@pytest.mark.numpy def test_os_file_writer(tmpdir): SIZE = 4096 arr = np.random.randint(0, 256, size=SIZE).astype('u1') @@ -1518,6 +1537,7 @@ def test_buffered_input_stream_detach_non_seekable(): raw.seek(2) +@pytest.mark.numpy def test_buffered_output_stream(): np_buf = np.zeros(100, dtype=np.int8) # zero-initialized buffer buf = pa.py_buffer(np_buf) @@ -1535,6 +1555,7 @@ def test_buffered_output_stream(): assert np_buf[:10].tobytes() == b'123456789\0' +@pytest.mark.numpy def test_buffered_output_stream_detach(): np_buf = np.zeros(100, dtype=np.int8) # zero-initialized buffer buf = pa.py_buffer(np_buf) diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py index d8eb6e926e4c0..35028b7b4af5a 100644 --- a/python/pyarrow/tests/test_ipc.py +++ b/python/pyarrow/tests/test_ipc.py @@ -24,7 +24,10 @@ import threading import weakref -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pyarrow as pa from pyarrow.tests.util import changed_environ, invoke_script @@ -161,14 +164,17 @@ def test_empty_file(): pa.ipc.open_file(pa.BufferReader(buf)) +@pytest.mark.numpy def test_file_simple_roundtrip(file_fixture): file_fixture._check_roundtrip(as_table=False) +@pytest.mark.numpy def test_file_write_table(file_fixture): file_fixture._check_roundtrip(as_table=True) +@pytest.mark.numpy @pytest.mark.parametrize("sink_factory", [ lambda: io.BytesIO(), lambda: pa.BufferOutputStream() @@ -186,6 +192,7 @@ def test_file_read_all(sink_factory): assert result.equals(expected) +@pytest.mark.numpy def test_open_file_from_buffer(file_fixture): # ARROW-2859; APIs accept the buffer protocol file_fixture.write_batches() @@ -221,6 +228,7 @@ def test_file_read_pandas(file_fixture): assert_frame_equal(result, expected) +@pytest.mark.numpy def test_file_pathlib(file_fixture, tmpdir): file_fixture.write_batches() source = file_fixture.get_source() @@ -277,6 +285,7 @@ def test_stream_categorical_roundtrip(stream_fixture): assert_frame_equal(table.to_pandas(), df) +@pytest.mark.numpy def test_open_stream_from_buffer(stream_fixture): # ARROW-2859 stream_fixture.write_batches() @@ -302,6 +311,7 @@ def test_open_stream_from_buffer(stream_fixture): assert tuple(st1) == tuple(stream_fixture.write_stats) +@pytest.mark.numpy @pytest.mark.parametrize('options', [ pa.ipc.IpcReadOptions(), pa.ipc.IpcReadOptions(use_threads=False), @@ -320,6 +330,7 @@ def test_open_stream_options(stream_fixture, options): assert tuple(st) == tuple(stream_fixture.write_stats) +@pytest.mark.numpy def test_open_stream_with_wrong_options(stream_fixture): stream_fixture.write_batches() source = stream_fixture.get_source() @@ -328,6 +339,7 @@ def test_open_stream_with_wrong_options(stream_fixture): pa.ipc.open_stream(source, options=True) +@pytest.mark.numpy @pytest.mark.parametrize('options', [ pa.ipc.IpcReadOptions(), pa.ipc.IpcReadOptions(use_threads=False), @@ -345,6 +357,7 @@ def test_open_file_options(file_fixture, options): assert st.num_record_batches == 5 +@pytest.mark.numpy def test_open_file_with_wrong_options(file_fixture): file_fixture.write_batches() source = file_fixture.get_source() @@ -398,6 +411,7 @@ def test_stream_write_table_batches(stream_fixture): ignore_index=True)) +@pytest.mark.numpy @pytest.mark.parametrize('use_legacy_ipc_format', [False, True]) def test_stream_simple_roundtrip(stream_fixture, use_legacy_ipc_format): stream_fixture.use_legacy_ipc_format = use_legacy_ipc_format @@ -418,6 +432,7 @@ def test_stream_simple_roundtrip(stream_fixture, use_legacy_ipc_format): reader.read_next_batch() +@pytest.mark.numpy @pytest.mark.zstd def test_compression_roundtrip(): sink = io.BytesIO() @@ -507,6 +522,7 @@ def test_write_options_legacy_exclusive(stream_fixture): stream_fixture.write_batches() +@pytest.mark.numpy @pytest.mark.parametrize('options', [ pa.ipc.IpcWriteOptions(), pa.ipc.IpcWriteOptions(allow_64bit=True), @@ -702,6 +718,7 @@ def test_envvar_set_legacy_ipc_format(): assert writer._metadata_version == pa.ipc.MetadataVersion.V4 +@pytest.mark.numpy def test_stream_read_all(stream_fixture): batches = stream_fixture.write_batches() file_contents = pa.BufferReader(stream_fixture.get_source()) @@ -740,6 +757,7 @@ def test_message_ctors_no_segfault(): repr(pa.MessageReader()) +@pytest.mark.numpy def test_message_reader(example_messages): _, messages = example_messages @@ -756,6 +774,7 @@ def test_message_reader(example_messages): assert msg.metadata_version == pa.MetadataVersion.V5 +@pytest.mark.numpy def test_message_serialize_read_message(example_messages): _, messages = example_messages @@ -780,6 +799,7 @@ def test_message_serialize_read_message(example_messages): pa.ipc.read_message(reader) +@pytest.mark.numpy @pytest.mark.gzip def test_message_read_from_compressed(example_messages): # Part of ARROW-5910 @@ -796,12 +816,14 @@ def test_message_read_from_compressed(example_messages): assert result.equals(message) +@pytest.mark.numpy def test_message_read_schema(example_messages): batches, messages = example_messages schema = pa.ipc.read_schema(messages[0]) assert schema.equals(batches[1].schema) +@pytest.mark.numpy def test_message_read_record_batch(example_messages): batches, messages = example_messages @@ -895,6 +917,7 @@ def socket_fixture(): return SocketStreamFixture() +@pytest.mark.numpy def test_socket_simple_roundtrip(socket_fixture): socket_fixture.start_server(do_read_all=False) writer_batches = socket_fixture.write_batches() @@ -906,6 +929,7 @@ def test_socket_simple_roundtrip(socket_fixture): assert reader_batches[i].equals(batch) +@pytest.mark.numpy def test_socket_read_all(socket_fixture): socket_fixture.start_server(do_read_all=True) writer_batches = socket_fixture.write_batches() diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py index a0a6174266310..d378294ee03d2 100644 --- a/python/pyarrow/tests/test_json.py +++ b/python/pyarrow/tests/test_json.py @@ -23,7 +23,10 @@ import string import unittest -import numpy as np +try: + import numpy as np +except ImportError: + np=None import pytest import pyarrow as pa @@ -297,6 +300,7 @@ def test_explicit_schema_with_unexpected_behaviour(self): match="JSON parse error: unexpected field"): self.read_bytes(rows, parse_options=opts) + @pytest.mark.numpy def test_small_random_json(self): data, expected = make_random_json(num_cols=2, num_rows=10) table = self.read_bytes(data) @@ -304,6 +308,7 @@ def test_small_random_json(self): assert table.equals(expected) assert table.to_pydict() == expected.to_pydict() + @pytest.mark.numpy def test_load_large_json(self): data, expected = make_random_json(num_cols=2, num_rows=100100) # set block size is 10MB @@ -312,6 +317,7 @@ def test_load_large_json(self): assert table.num_rows == 100100 assert expected.num_rows == 100100 + @pytest.mark.numpy def test_stress_block_sizes(self): # Test a number of small block sizes to stress block stitching data_base, expected = make_random_json(num_cols=2, num_rows=100) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 7d74a60dcb921..3f7ae3854114a 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -27,9 +27,12 @@ import hypothesis as h import hypothesis.strategies as st -import numpy as np -import numpy.testing as npt import pytest +try: + import numpy as np + import numpy.testing as npt +except ImportError: + pytest.skip(reason="Failures on test collection due to numpy NOT enabled", allow_module_level=True) from pyarrow.pandas_compat import get_logical_type, _pandas_api from pyarrow.tests.util import invoke_script, random_ascii, rands diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py index 6a814111898b7..a62057979c985 100644 --- a/python/pyarrow/tests/test_scalars.py +++ b/python/pyarrow/tests/test_scalars.py @@ -21,7 +21,10 @@ import sys import weakref -import numpy as np +try: + import numpy as np +except ImportError: + pytest.skip(reason="Failures on test collection due to numpy NOT enabled", allow_module_level=True) import pyarrow as pa import pyarrow.compute as pc diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py index 8793c9e773c1d..4d435daf5bd9d 100644 --- a/python/pyarrow/tests/test_schema.py +++ b/python/pyarrow/tests/test_schema.py @@ -20,7 +20,10 @@ import weakref import pytest -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pyarrow as pa import pyarrow.tests.util as test_util @@ -184,6 +187,7 @@ def test_time_types(): pa.time64('s') +@pytest.mark.numpy def test_from_numpy_dtype(): cases = [ (np.dtype('bool'), pa.bool_()), diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py index aa7da0a742086..7ba9e2b3e13db 100644 --- a/python/pyarrow/tests/test_sparse_tensor.py +++ b/python/pyarrow/tests/test_sparse_tensor.py @@ -19,7 +19,10 @@ import sys import weakref -import numpy as np +try: + import numpy as np +except ImportError: + pytestmark = pytest.mark.numpy import pyarrow as pa try: diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py index 40700e4741321..01d468cd9e9cc 100644 --- a/python/pyarrow/tests/test_substrait.py +++ b/python/pyarrow/tests/test_substrait.py @@ -608,6 +608,7 @@ def table_provider(names, schema): assert res_tb == expected +@pytest.mark.numpy def test_scalar_aggregate_udf_basic(varargs_agg_func_fixture): test_table = pa.Table.from_pydict( @@ -756,6 +757,7 @@ def table_provider(names, _): assert res_tb == expected_tb +@pytest.mark.numpy def test_hash_aggregate_udf_basic(varargs_agg_func_fixture): test_table = pa.Table.from_pydict( diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index 30c687b0d94df..15eaffa9f8d4c 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -20,7 +20,10 @@ import sys import weakref -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pytest import pyarrow as pa import pyarrow.compute as pc @@ -125,6 +128,7 @@ def test_chunked_array_can_combine_chunks_with_no_chunks(): ).combine_chunks() == pa.array([], type=pa.bool_()) +@pytest.mark.numpy def test_chunked_array_to_numpy(): data = pa.chunked_array([ [1, 2, 3], @@ -173,6 +177,7 @@ def test_chunked_array_str(): ]""" +@pytest.mark.numpy def test_chunked_array_getitem(): data = [ pa.array([1, 2, 3]), @@ -972,65 +977,67 @@ def check_tensors(tensor, expected_tensor, type, size): assert tensor.strides == expected_tensor.strides -@pytest.mark.parametrize('typ', [ - np.uint8, np.uint16, np.uint32, np.uint64, - np.int8, np.int16, np.int32, np.int64, - np.float32, np.float64, -]) +@pytest.mark.numpy def test_recordbatch_to_tensor_uniform_type(typ): - arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] - arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90] - arr3 = [100, 100, 100, 100, 100, 100, 100, 100, 100] - batch = pa.RecordBatch.from_arrays( - [ - pa.array(arr1, type=pa.from_numpy_dtype(typ)), - pa.array(arr2, type=pa.from_numpy_dtype(typ)), - pa.array(arr3, type=pa.from_numpy_dtype(typ)), - ], ["a", "b", "c"] - ) - - result = batch.to_tensor(row_major=False) - x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="F") - expected = pa.Tensor.from_numpy(x) - check_tensors(result, expected, pa.from_numpy_dtype(typ), 27) - - result = batch.to_tensor() - x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="C") - expected = pa.Tensor.from_numpy(x) - check_tensors(result, expected, pa.from_numpy_dtype(typ), 27) - - # Test offset - batch1 = batch.slice(1) - arr1 = [2, 3, 4, 5, 6, 7, 8, 9] - arr2 = [20, 30, 40, 50, 60, 70, 80, 90] - arr3 = [100, 100, 100, 100, 100, 100, 100, 100] - - result = batch1.to_tensor(row_major=False) - x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="F") - expected = pa.Tensor.from_numpy(x) - check_tensors(result, expected, pa.from_numpy_dtype(typ), 24) - - result = batch1.to_tensor() - x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="C") - expected = pa.Tensor.from_numpy(x) - check_tensors(result, expected, pa.from_numpy_dtype(typ), 24) - - batch2 = batch.slice(1, 5) - arr1 = [2, 3, 4, 5, 6] - arr2 = [20, 30, 40, 50, 60] - arr3 = [100, 100, 100, 100, 100] - - result = batch2.to_tensor(row_major=False) - x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="F") - expected = pa.Tensor.from_numpy(x) - check_tensors(result, expected, pa.from_numpy_dtype(typ), 15) - - result = batch2.to_tensor() - x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="C") - expected = pa.Tensor.from_numpy(x) - check_tensors(result, expected, pa.from_numpy_dtype(typ), 15) - + for typ in [ + np.uint8, np.uint16, np.uint32, np.uint64, + np.int8, np.int16, np.int32, np.int64, + np.float32, np.float64, + ]: + arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] + arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90] + arr3 = [100, 100, 100, 100, 100, 100, 100, 100, 100] + batch = pa.RecordBatch.from_arrays( + [ + pa.array(arr1, type=pa.from_numpy_dtype(typ)), + pa.array(arr2, type=pa.from_numpy_dtype(typ)), + pa.array(arr3, type=pa.from_numpy_dtype(typ)), + ], ["a", "b", "c"] + ) + result = batch.to_tensor(row_major=False) + x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="F") + expected = pa.Tensor.from_numpy(x) + check_tensors(result, expected, pa.from_numpy_dtype(typ), 27) + + result = batch.to_tensor() + x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="C") + expected = pa.Tensor.from_numpy(x) + check_tensors(result, expected, pa.from_numpy_dtype(typ), 27) + + # Test offset + batch1 = batch.slice(1) + arr1 = [2, 3, 4, 5, 6, 7, 8, 9] + arr2 = [20, 30, 40, 50, 60, 70, 80, 90] + arr3 = [100, 100, 100, 100, 100, 100, 100, 100] + + result = batch1.to_tensor(row_major=False) + x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="F") + expected = pa.Tensor.from_numpy(x) + check_tensors(result, expected, pa.from_numpy_dtype(typ), 24) + + result = batch1.to_tensor() + x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="C") + expected = pa.Tensor.from_numpy(x) + check_tensors(result, expected, pa.from_numpy_dtype(typ), 24) + + batch2 = batch.slice(1, 5) + arr1 = [2, 3, 4, 5, 6] + arr2 = [20, 30, 40, 50, 60] + arr3 = [100, 100, 100, 100, 100] + + result = batch2.to_tensor(row_major=False) + x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="F") + expected = pa.Tensor.from_numpy(x) + check_tensors(result, expected, pa.from_numpy_dtype(typ), 15) + + result = batch2.to_tensor() + x = np.column_stack([arr1, arr2, arr3]).astype(typ, order="C") + expected = pa.Tensor.from_numpy(x) + check_tensors(result, expected, pa.from_numpy_dtype(typ), 15) + + +@pytest.mark.numpy def test_recordbatch_to_tensor_uniform_float_16(): arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90] @@ -1054,6 +1061,7 @@ def test_recordbatch_to_tensor_uniform_float_16(): check_tensors(result, expected, pa.float16(), 27) +@pytest.mark.numpy def test_recordbatch_to_tensor_mixed_type(): # uint16 + int16 = int32 arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] @@ -1105,6 +1113,7 @@ def test_recordbatch_to_tensor_mixed_type(): assert result.strides == expected.strides +@pytest.mark.numpy def test_recordbatch_to_tensor_unsupported_mixed_type_with_float16(): arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9] arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90] @@ -1124,6 +1133,7 @@ def test_recordbatch_to_tensor_unsupported_mixed_type_with_float16(): batch.to_tensor() +@pytest.mark.numpy def test_recordbatch_to_tensor_nan(): arr1 = [1, 2, 3, 4, np.nan, 6, 7, 8, 9] arr2 = [10, 20, 30, 40, 50, 60, 70, np.nan, 90] @@ -1144,6 +1154,7 @@ def test_recordbatch_to_tensor_nan(): assert result.strides == expected.strides +@pytest.mark.numpy def test_recordbatch_to_tensor_null(): arr1 = [1, 2, 3, 4, None, 6, 7, 8, 9] arr2 = [10, 20, 30, 40, 50, 60, 70, None, 90] @@ -1204,6 +1215,7 @@ def test_recordbatch_to_tensor_null(): assert result.strides == expected.strides +@pytest.mark.numpy def test_recordbatch_to_tensor_empty(): batch = pa.RecordBatch.from_arrays( [ @@ -1295,6 +1307,7 @@ def test_slice_zero_length_table(): table.to_pandas() +@pytest.mark.numpy def test_recordbatchlist_schema_equals(): a1 = np.array([1], dtype='uint32') a2 = np.array([4.0, 5.0], dtype='float64') @@ -2123,6 +2136,7 @@ def test_table_unsafe_casting(cls): assert casted_table.equals(expected_table) +@pytest.mark.numpy def test_invalid_table_construct(): array = np.array([0, 1], dtype=np.uint8) u8 = pa.uint8() @@ -3280,6 +3294,7 @@ def test_table_sort_by(cls): assert sorted_tab_dict["b"] == ["foo", "car", "bar", "foobar"] +@pytest.mark.numpy @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch]) def test_numpy_asarray(constructor): table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"]) @@ -3312,6 +3327,7 @@ def test_numpy_asarray(constructor): assert result.dtype == "int32" +@pytest.mark.numpy @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch]) def test_numpy_array_protocol(constructor): table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"]) diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py index 3e6a4ca8ed222..cf45aceaf980a 100644 --- a/python/pyarrow/tests/test_tensor.py +++ b/python/pyarrow/tests/test_tensor.py @@ -21,7 +21,10 @@ import warnings import weakref -import numpy as np +try: + import numpy as np +except ImportError: + pytestmark = pytest.mark.numpy import pyarrow as pa diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index f7b6040f510af..13faeb48679f2 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -30,7 +30,10 @@ tzst = None import weakref -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pyarrow as pa import pyarrow.types as types import pyarrow.tests.strategies as past @@ -1259,12 +1262,14 @@ def test_field_modified_copies(): assert f0.equals(f0_) +@pytest.mark.numpy def test_is_integer_value(): assert pa.types.is_integer_value(1) assert pa.types.is_integer_value(np.int64(1)) assert not pa.types.is_integer_value('1') +@pytest.mark.numpy def test_is_float_value(): assert not pa.types.is_float_value(1) assert pa.types.is_float_value(1.) @@ -1272,6 +1277,7 @@ def test_is_float_value(): assert not pa.types.is_float_value('1.0') +@pytest.mark.numpy def test_is_boolean_value(): assert not pa.types.is_boolean_value(1) assert pa.types.is_boolean_value(True) diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py index c8e376fefb3b8..281b770b638ab 100644 --- a/python/pyarrow/tests/test_udf.py +++ b/python/pyarrow/tests/test_udf.py @@ -18,7 +18,10 @@ import pytest -import numpy as np +try: + import numpy as np +except ImportError: + np = None import pyarrow as pa from pyarrow import compute as pc @@ -718,6 +721,7 @@ def test_udt_datasource1_exception(): _test_datasource1_udt(datasource1_exception) +@pytest.mark.numpy def test_scalar_agg_basic(unary_agg_func_fixture): arr = pa.array([10.0, 20.0, 30.0, 40.0, 50.0], pa.float64()) result = pc.call_function("mean_udf", [arr]) @@ -725,6 +729,7 @@ def test_scalar_agg_basic(unary_agg_func_fixture): assert result == expected +@pytest.mark.numpy def test_scalar_agg_empty(unary_agg_func_fixture): empty = pa.array([], pa.float64()) @@ -744,6 +749,7 @@ def test_scalar_agg_wrong_output_type(wrong_output_type_agg_func_fixture): pc.call_function("y=wrong_output_type(x)", [arr]) +@pytest.mark.numpy def test_scalar_agg_varargs(varargs_agg_func_fixture): arr1 = pa.array([10, 20, 30, 40, 50], pa.int64()) arr2 = pa.array([1.0, 2.0, 3.0, 4.0, 5.0], pa.float64()) @@ -755,6 +761,7 @@ def test_scalar_agg_varargs(varargs_agg_func_fixture): assert result == expected +@pytest.mark.numpy def test_scalar_agg_exception(exception_agg_func_fixture): arr = pa.array([10, 20, 30, 40, 50, 60], pa.int64()) @@ -762,6 +769,7 @@ def test_scalar_agg_exception(exception_agg_func_fixture): pc.call_function("y=exception_len(x)", [arr]) +@pytest.mark.numpy def test_hash_agg_basic(unary_agg_func_fixture): arr1 = pa.array([10.0, 20.0, 30.0, 40.0, 50.0], pa.float64()) arr2 = pa.array([4, 2, 1, 2, 1], pa.int32()) @@ -780,6 +788,7 @@ def test_hash_agg_basic(unary_agg_func_fixture): assert result.sort_by('id') == expected.sort_by('id') +@pytest.mark.numpy def test_hash_agg_empty(unary_agg_func_fixture): arr1 = pa.array([], pa.float64()) arr2 = pa.array([], pa.int32()) @@ -810,6 +819,7 @@ def test_hash_agg_wrong_output_type(wrong_output_type_agg_func_fixture): table.group_by("id").aggregate([("value", "y=wrong_output_type(x)")]) +@pytest.mark.numpy def test_hash_agg_exception(exception_agg_func_fixture): arr1 = pa.array([10, 20, 30, 40, 50], pa.int64()) arr2 = pa.array([4, 2, 1, 2, 1], pa.int32()) @@ -819,6 +829,7 @@ def test_hash_agg_exception(exception_agg_func_fixture): table.group_by("id").aggregate([("value", "y=exception_len(x)")]) +@pytest.mark.numpy def test_hash_agg_random(sum_agg_func_fixture): """Test hash aggregate udf with randomly sampled data"""