Skip to content

Commit

Permalink
Improve test
Browse files Browse the repository at this point in the history
  • Loading branch information
judahrand committed Sep 8, 2023
1 parent 616147a commit f86d7da
Showing 1 changed file with 29 additions and 27 deletions.
56 changes: 29 additions & 27 deletions python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5292,33 +5292,35 @@ def test_write_dataset_preserve_field_metadata(tempdir):


def test_write_dataset_write_page_index(tempdir):
for write_page_index in [True, False]:
schema = pa.schema([
pa.field("x", pa.int64()),
pa.field("y", pa.int64())])

arrays = [[1, 2, 3], [None, 5, None]]
table = pa.Table.from_arrays(arrays, schema=schema)

file_format = ds.ParquetFileFormat()
base_dir = tempdir / f"write_page_index_{write_page_index}"
ds.write_dataset(
table,
base_dir,
format="parquet",
file_options=file_format.make_write_options(
write_page_index=write_page_index,
),
existing_data_behavior='overwrite_or_ignore',
)
ds1 = ds.dataset(base_dir, format="parquet")

for file in ds1.files:
# Can retrieve sorting columns from metadata
metadata = pq.read_metadata(file)
cc = metadata.row_group(0).column(0)
assert cc.has_offset_index is write_page_index
assert cc.has_column_index is write_page_index
for write_statistics in [True, False]:
for write_page_index in [True, False]:
schema = pa.schema([
pa.field("x", pa.int64()),
pa.field("y", pa.int64())])

arrays = [[1, 2, 3], [None, 5, None]]
table = pa.Table.from_arrays(arrays, schema=schema)

file_format = ds.ParquetFileFormat()
base_dir = tempdir / f"write_page_index_{write_page_index}"
ds.write_dataset(
table,
base_dir,
format="parquet",
file_options=file_format.make_write_options(
write_statistics=write_statistics,
write_page_index=write_page_index,
),
existing_data_behavior='overwrite_or_ignore',
)
ds1 = ds.dataset(base_dir, format="parquet")

for file in ds1.files:
# Can retrieve sorting columns from metadata
metadata = pq.read_metadata(file)
cc = metadata.row_group(0).column(0)
assert cc.has_offset_index is write_page_index
assert cc.has_column_index is write_page_index & write_statistics


@pytest.mark.parametrize('dstype', [
Expand Down

0 comments on commit f86d7da

Please sign in to comment.