Fix test collection and add corresponding marks for numpy

raulcd · Jun 28, 2024 · 97ef4fa · 97ef4fa
1 parent 6693e93
commit 97ef4fa
Show file tree

Hide file tree

Showing 30 changed files with 248 additions and 103 deletions.
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
@@ -69,4 +69,4 @@ export PYARROW_TEST_PARQUET_ENCRYPTION
 export PYARROW_TEST_S3
 
 # Testing PyArrow
-pytest -r s ${PYTEST_ARGS} --pyargs ${PYTEST_PYARGS:-'pyarrow'}
+pytest -r s ${PYTEST_ARGS} --pyargs pyarrow
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1243,13 +1243,6 @@ services:
     environment:
       <<: [*common, *ccache, *sccache]
       PARQUET_REQUIRE_ENCRYPTION:  # inherit
-      # Some tests fail to be collected due to numpy fixtures.
-      # That is why we just collect tests from the following modules.
-      PYTEST_PYARGS: "pyarrow.tests.test_array
-                      pyarrow.tests.test_compute
-                      pyarrow.tests.test_dataset
-                      pyarrow.tests.test_flight
-                      pyarrow.tests.test_without_numpy"
       HYPOTHESIS_PROFILE:  # inherit
       PYARROW_TEST_HYPOTHESIS:  # inherit
     volumes: *conda-volumes

diff --git a/python/pyarrow/tests/interchange/test_conversion.py b/python/pyarrow/tests/interchange/test_conversion.py
@@ -16,11 +16,15 @@
 # under the License.
 
 from datetime import datetime as dt
-import numpy as np
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 import pytest
 
+try:
+    import numpy as np
+except ImportError:
+    pytest.skip(reason="Failures on test collection due to numpy NOT enabled", allow_module_level=True)
+
 import pyarrow.interchange as pi
 from pyarrow.interchange.column import (
     _PyArrowColumn,

diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py
@@ -19,10 +19,13 @@
 import hypothesis as h
 import hypothesis.strategies as st
 
-import numpy as np
+import pytest
+try:
+    import numpy as np
+except ImportError:
+    pytest.skip(reason="Failures on test collection due to numpy NOT enabled", allow_module_level=True)
 import pyarrow as pa
 import pyarrow.tests.strategies as past
-import pytest
 
 
 all_types = st.deferred(

diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
@@ -17,7 +17,10 @@
 
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests import util

diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
@@ -20,7 +20,10 @@
 import warnings
 from shutil import copytree
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa

diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
@@ -18,7 +18,10 @@
 import decimal
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -173,6 +176,7 @@ def test_direct_read_dictionary_subfield():
     assert result[0].num_chunks == 1
 
 
+@pytest.mark.numpy
 def test_dictionary_array_automatically_read():
     # ARROW-3246
 
@@ -331,6 +335,7 @@ def test_column_of_lists(tempdir):
     tm.assert_frame_equal(df, df_read)
 
 
+@pytest.mark.numpy
 def test_large_list_records():
     # This was fixed in PARQUET-1100
 

diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
@@ -20,7 +20,10 @@
 import os
 import pathlib
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import unittest.mock as mock
 
@@ -1153,6 +1156,7 @@ def test_partitioned_dataset(tempdir):
     pq.write_table(table, path / "output.parquet")
 
 
+@pytest.mark.numpy
 def test_dataset_read_dictionary(tempdir):
     path = tempdir / "ARROW-3325-dataset"
     t1 = pa.table([[util.rands(10) for i in range(5)] * 10], names=['f0'])

diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
@@ -19,7 +19,10 @@
 import io
 import warnings
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa

diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
@@ -20,7 +20,10 @@
 from collections import OrderedDict
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -579,6 +582,7 @@ def test_write_metadata(tempdir):
         )
 
 
+@pytest.mark.numpy
 def test_table_large_metadata():
     # ARROW-8694
     my_schema = pa.schema([pa.field('f0', 'double')],

diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
@@ -18,7 +18,10 @@
 import io
 import json
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa

diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -17,7 +17,10 @@
 
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util

diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py
@@ -15,13 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import math
 import weakref
 
-try:
-    import numpy as np
-except ImportError:
-    np = None
-
 import pyarrow as pa
 from pyarrow.lib import StringBuilder, StringViewBuilder
 
@@ -38,7 +34,7 @@ def test_string_builder_append():
     sbuilder = StringBuilder()
     sbuilder.append(b"a byte string")
     sbuilder.append("a string")
-    sbuilder.append(np.nan)
+    sbuilder.append(math.nan)
     sbuilder.append(None)
     assert len(sbuilder) == 4
     assert sbuilder.null_count == 2
@@ -53,7 +49,7 @@ def test_string_builder_append():
 
 def test_string_builder_append_values():
     sbuilder = StringBuilder()
-    sbuilder.append_values([np.nan, None, "text", None, "other text"])
+    sbuilder.append_values([math.nan, None, "text", None, "other text"])
     assert sbuilder.null_count == 3
     arr = sbuilder.finish()
     assert arr.null_count == 3
@@ -63,7 +59,7 @@ def test_string_builder_append_values():
 
 def test_string_builder_append_after_finish():
     sbuilder = StringBuilder()
-    sbuilder.append_values([np.nan, None, "text", None, "other text"])
+    sbuilder.append_values([math.nan, None, "text", None, "other text"])
     arr = sbuilder.finish()
     sbuilder.append("No effect")
     expected = [None, None, "text", None, "other text"]
@@ -75,7 +71,7 @@ def test_string_view_builder():
     builder.append(b"a byte string")
     builder.append("a string")
     builder.append("a longer not-inlined string")
-    builder.append(np.nan)
+    builder.append(math.nan)
     builder.append_values([None, "text"])
     assert len(builder) == 6
     assert builder.null_count == 2

diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
@@ -80,6 +80,7 @@ def check_cython_example_module(mod):
         mod.cast_scalar(scal, pa.list_(pa.int64()))
 
 
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_cython_api(tmpdir):
     """
@@ -162,6 +163,7 @@ def test_cython_api(tmpdir):
                               env=subprocess_env)
 
 
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_visit_strings(tmpdir):
     with tmpdir.as_cwd():

diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
@@ -17,7 +17,10 @@
 
 import base64
 from datetime import timedelta
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np=None
 import pyarrow.fs as fs
 import pyarrow as pa
 
@@ -170,6 +173,7 @@ def test_write_dataset_parquet_without_encryption():
         _ = pformat.make_write_options(encryption_config="some value")
 
 
+@pytest.mark.numpy
 @pytest.mark.skipif(
     encryption_unavailable, reason="Parquet Encryption is not currently enabled"
 )

diff --git a/python/pyarrow/tests/test_dlpack.py b/python/pyarrow/tests/test_dlpack.py
@@ -19,7 +19,10 @@
 from functools import wraps
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytest.skip(reason="Failures on test collection due to numpy NOT enabled", allow_module_level=True)
 
 import pyarrow as pa
 from pyarrow.vendored.version import Version

diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
@@ -23,12 +23,15 @@
 from uuid import uuid4, UUID
 import sys
 
-import numpy as np
+import pytest
+try:
+    import numpy as np
+except ImportError:
+    pytest.skip(reason="Failures on test collection due to numpy NOT enabled", allow_module_level=True)
+
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 
-import pytest
-
 
 @contextlib.contextmanager
 def registered_extension_type(ext_type):

diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
@@ -23,7 +23,10 @@
 import hypothesis as h
 import hypothesis.strategies as st
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -135,6 +138,7 @@ def f():
     pytest.raises(exc, f)
 
 
+@pytest.mark.numpy
 def test_dataset(version):
     num_values = (100, 100)
     num_files = 5
@@ -354,6 +358,7 @@ def test_buffer_bounds_error(version):
         _check_arrow_roundtrip(table)
 
 
+@pytest.mark.numpy
 def test_boolean_object_nulls(version):
     repeats = 100
     table = pa.Table.from_arrays(
@@ -540,6 +545,7 @@ def test_read_columns(version):
                             columns=['boo', 'woo'])
 
 
+@pytest.mark.numpy
 def test_overwritten_file(version):
     path = random_path()
     TEST_FILES.append(path)
@@ -675,6 +681,7 @@ def test_v2_compression_options():
         write_feather(df, buf, compression='snappy')
 
 
+@pytest.mark.numpy
 def test_v2_lz4_default_compression():
     # ARROW-8750: Make sure that the compression=None option selects lz4 if
     # it's available