From 827b03c69560d9c20dece4c06f2b327f7c21eb03 Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Thu, 3 Aug 2023 18:22:20 -0400 Subject: [PATCH 1/5] GH-36642: [Python][CI] Configure warnings as errors during pytest --- dev/tasks/tasks.yml | 4 +++- docker-compose.yml | 1 + python/pyarrow/tests/parquet/test_dataset.py | 2 +- python/pyarrow/tests/test_pandas.py | 7 ++++++- python/pyarrow/tests/test_tensor.py | 7 +++++-- 5 files changed, 16 insertions(+), 5 deletions(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 73b793162d959..f87959afa5b11 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1253,6 +1253,7 @@ tasks: params: env: PYTHON: "{{ python_version }}" + PYTEST_ARGS: "-W error" image: conda-python {% endfor %} @@ -1265,7 +1266,7 @@ tasks: HYPOTHESIS_PROFILE: ci PYARROW_TEST_HYPOTHESIS: ON # limit to execute hypothesis tests only - PYTEST_ARGS: "-m hypothesis" + PYTEST_ARGS: "-m hypothesis -W error" image: conda-python-pandas test-conda-python-3.10-substrait: @@ -1274,6 +1275,7 @@ tasks: params: env: PYTHON: "3.10" + PYTEST_ARGS: "-W error" image: conda-python-substrait test-debian-11-python-3: diff --git a/docker-compose.yml b/docker-compose.yml index fe98a30d0b92b..3bf346ef94173 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -835,6 +835,7 @@ services: shm_size: *shm-size environment: <<: [*common, *ccache, *sccache] + PYTEST_ARGS: # inherit volumes: *conda-volumes command: &python-conda-command [" diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py index cd991617c9fa8..3e6ff49265c32 100644 --- a/python/pyarrow/tests/parquet/test_dataset.py +++ b/python/pyarrow/tests/parquet/test_dataset.py @@ -1316,7 +1316,7 @@ def _test_write_to_dataset_with_partitions(base_path, output_df[col] = output_df[col].astype('category') if schema: - expected_date_type = schema.field_by_name('date').type.to_pandas_dtype() + expected_date_type = schema.field('date').type.to_pandas_dtype() output_df["date"] = output_df["date"].astype(expected_date_type) tm.assert_frame_equal(output_df, input_df) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 8bdc7253a4837..4548679821f17 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -2913,7 +2913,12 @@ def test_strided_data_import(self): 'f4', 'f8'] for type_name in numeric_dtypes: - cases.append(random_numbers.astype(type_name)) + if type_name in ['u4', 'u8']: + # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning + with pytest.warns(RuntimeWarning): + cases.append(random_numbers.astype(type_name)) + else: + cases.append(random_numbers.astype(type_name)) # strings cases.append(np.array([random_ascii(10) for i in range(N * K)], diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py index aee46bc93690c..6b2843ce00dc6 100644 --- a/python/pyarrow/tests/test_tensor.py +++ b/python/pyarrow/tests/test_tensor.py @@ -82,8 +82,11 @@ def test_tensor_base_object(): @pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs) def test_tensor_numpy_roundtrip(dtype_str, arrow_type): dtype = np.dtype(dtype_str) - data = (100 * np.random.randn(10, 4)).astype(dtype) - + if dtype in [np.uint32, np.uint64]: + # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning + data = (np.random.randint(0, 100, size=(10, 4))).astype(dtype) + else: + data = (100 * np.random.randn(10, 4)).astype(dtype) tensor = pa.Tensor.from_numpy(data) assert tensor.type == arrow_type From 91958264cc59f368d665d3ebfe1e9fb303e03cca Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Thu, 3 Aug 2023 18:28:19 -0400 Subject: [PATCH 2/5] Remove pytest args for conda-python-substrait --- dev/tasks/tasks.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index f87959afa5b11..941506b9c2abc 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1275,7 +1275,6 @@ tasks: params: env: PYTHON: "3.10" - PYTEST_ARGS: "-W error" image: conda-python-substrait test-debian-11-python-3: From 6db95f66460be3453eefca4ea9ac7444bf95bddd Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Thu, 3 Aug 2023 19:04:36 -0400 Subject: [PATCH 3/5] Don't assume a warning is thrown, since it varies by package/dependency versions --- python/pyarrow/tests/test_pandas.py | 8 +++----- python/pyarrow/tests/test_tensor.py | 8 ++++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 4548679821f17..ef6ddd09933c9 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -2913,11 +2913,9 @@ def test_strided_data_import(self): 'f4', 'f8'] for type_name in numeric_dtypes: - if type_name in ['u4', 'u8']: - # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning - with pytest.warns(RuntimeWarning): - cases.append(random_numbers.astype(type_name)) - else: + # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning + with warnings.catch_warnings(): + warnings.simplefilter("ignore") cases.append(random_numbers.astype(type_name)) # strings diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py index 6b2843ce00dc6..3e6a4ca8ed222 100644 --- a/python/pyarrow/tests/test_tensor.py +++ b/python/pyarrow/tests/test_tensor.py @@ -18,6 +18,7 @@ import os import sys import pytest +import warnings import weakref import numpy as np @@ -82,10 +83,9 @@ def test_tensor_base_object(): @pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs) def test_tensor_numpy_roundtrip(dtype_str, arrow_type): dtype = np.dtype(dtype_str) - if dtype in [np.uint32, np.uint64]: - # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning - data = (np.random.randint(0, 100, size=(10, 4))).astype(dtype) - else: + # Casting np.float64 -> uint32 or uint64 throws a RuntimeWarning + with warnings.catch_warnings(): + warnings.simplefilter("ignore") data = (100 * np.random.randn(10, 4)).astype(dtype) tensor = pa.Tensor.from_numpy(data) assert tensor.type == arrow_type From fb569f8486c0b400ecfeb643c80703b34d03675e Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Fri, 4 Aug 2023 12:12:44 -0400 Subject: [PATCH 4/5] Fix hypothesis warnings --- python/pyarrow/tests/strategies.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py index 48f7e5381724a..bb88a4dcb7b2a 100644 --- a/python/pyarrow/tests/strategies.py +++ b/python/pyarrow/tests/strategies.py @@ -182,15 +182,17 @@ def struct_types(draw, item_strategy=primitive_types): def dictionary_types(key_strategy=None, value_strategy=None): - key_strategy = key_strategy or signed_integer_types - value_strategy = value_strategy or st.one_of( - bool_type, - integer_types, - st.sampled_from([pa.float32(), pa.float64()]), - binary_type, - string_type, - fixed_size_binary_type, - ) + if key_strategy is None: + key_strategy = signed_integer_types + if value_strategy is None: + value_strategy = st.one_of( + bool_type, + integer_types, + st.sampled_from([pa.float32(), pa.float64()]), + binary_type, + string_type, + fixed_size_binary_type, + ) return st.builds(pa.dictionary, key_strategy, value_strategy) @@ -368,7 +370,7 @@ def record_batches(draw, type, rows=None, max_fields=None): children = [draw(arrays(field.type, size=rows)) for field in schema] # TODO(kszucs): the names and schema arguments are not consistent with # Table.from_array's arguments - return pa.RecordBatch.from_arrays(children, names=schema) + return pa.RecordBatch.from_arrays(children, schema=schema) @st.composite From fca76638ff5f6032646344e3409495ea2a99bd49 Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Fri, 4 Aug 2023 15:05:21 -0400 Subject: [PATCH 5/5] Fix ResourceWarning --- python/pyarrow/tests/parquet/conftest.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/tests/parquet/conftest.py b/python/pyarrow/tests/parquet/conftest.py index 1e75493cdae03..461c24af22aa9 100644 --- a/python/pyarrow/tests/parquet/conftest.py +++ b/python/pyarrow/tests/parquet/conftest.py @@ -29,9 +29,10 @@ def datadir(base_datadir): def s3_bucket(s3_server): boto3 = pytest.importorskip('boto3') botocore = pytest.importorskip('botocore') + s3_bucket_name = 'test-s3fs' host, port, access_key, secret_key = s3_server['connection'] - s3 = boto3.resource( + s3_client = boto3.client( 's3', endpoint_url='http://{}:{}'.format(host, port), aws_access_key_id=access_key, @@ -39,13 +40,15 @@ def s3_bucket(s3_server): config=botocore.client.Config(signature_version='s3v4'), region_name='us-east-1' ) - bucket = s3.Bucket('test-s3fs') + try: - bucket.create() + s3_client.create_bucket(Bucket=s3_bucket_name) except Exception: - # we get BucketAlreadyOwnedByYou error with fsspec handler - pass - return 'test-s3fs' + pass # we get BucketAlreadyOwnedByYou error with fsspec handler + finally: + s3_client.close() + + return s3_bucket_name @pytest.fixture