From 0d563d5c10acdcd972ff4b4f5e8778679ed12203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Arroyo=20Torrens?= Date: Mon, 24 Jun 2024 17:15:38 +0200 Subject: [PATCH] increase chunk-size to 10000 --- docs/source/user_guide/cli.rst | 6 +++--- raster_loader/cli/bigquery.py | 2 +- raster_loader/cli/snowflake.py | 2 +- raster_loader/io/datawarehouse.py | 4 ++-- raster_loader/io/snowflake.py | 2 +- raster_loader/tests/bigquery/test_io.py | 4 ++-- raster_loader/tests/snowflake/test_io.py | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/source/user_guide/cli.rst b/docs/source/user_guide/cli.rst index 9002fec..e0d1ad1 100644 --- a/docs/source/user_guide/cli.rst +++ b/docs/source/user_guide/cli.rst @@ -163,10 +163,10 @@ due to excessive operations in the destination table: Exceeded rate limits: too many table update operations for this table. For more information, see https://cloud.google.com/bigquery/troubleshooting-errors ``` -The default chunk size is 1000 rows. +The default chunk size is 10000 rows. For example, the following command uploads the raster in chunks -of 2000 rows: +of 20000 rows: .. code-block:: bash @@ -175,7 +175,7 @@ of 2000 rows: --project my-gcp-project \ --dataset my-bigquery-dataset \ --table my-bigquery-table \ - --chunk_size 1000 + --chunk_size 20000 diff --git a/raster_loader/cli/bigquery.py b/raster_loader/cli/bigquery.py index 089ce46..caf90c7 100644 --- a/raster_loader/cli/bigquery.py +++ b/raster_loader/cli/bigquery.py @@ -57,7 +57,7 @@ def bigquery(args=None): multiple=True, ) @click.option( - "--chunk_size", help="The number of blocks to upload in each chunk.", default=1000 + "--chunk_size", help="The number of blocks to upload in each chunk.", default=10000 ) @click.option( "--overwrite", diff --git a/raster_loader/cli/snowflake.py b/raster_loader/cli/snowflake.py index 9897cf1..1574f37 100644 --- a/raster_loader/cli/snowflake.py +++ b/raster_loader/cli/snowflake.py @@ -66,7 +66,7 @@ def snowflake(args=None): multiple=True, ) @click.option( - "--chunk_size", help="The number of blocks to upload in each chunk.", default=1000 + "--chunk_size", help="The number of blocks to upload in each chunk.", default=10000 ) @click.option( "--overwrite", diff --git a/raster_loader/io/datawarehouse.py b/raster_loader/io/datawarehouse.py index 9107706..ee4e7f1 100644 --- a/raster_loader/io/datawarehouse.py +++ b/raster_loader/io/datawarehouse.py @@ -64,7 +64,7 @@ def upload_raster( fqn: str, band: int = 1, band_name: str = None, - chunk_size: int = 1000, + chunk_size: int = 10000, overwrite: bool = False, append: bool = False, ): @@ -80,7 +80,7 @@ def upload_raster( band_name : str, optional Name of the band chunk_size : int, optional - Number of blocks to upload in each chunk, by default 1000 + Number of blocks to upload in each chunk, by default 10000 overwrite : bool, optional Overwrite existing data in the table if it already exists, by default False append : bool, optional diff --git a/raster_loader/io/snowflake.py b/raster_loader/io/snowflake.py index 0660087..fcd10be 100644 --- a/raster_loader/io/snowflake.py +++ b/raster_loader/io/snowflake.py @@ -129,7 +129,7 @@ def upload_records( table_name=table, database=database, schema=schema, - chunk_size=1000, + chunk_size=10000, auto_create_table=True, overwrite=overwrite, )[0] diff --git a/raster_loader/tests/bigquery/test_io.py b/raster_loader/tests/bigquery/test_io.py index 2d25f7f..58f0be3 100644 --- a/raster_loader/tests/bigquery/test_io.py +++ b/raster_loader/tests/bigquery/test_io.py @@ -568,7 +568,7 @@ def test_rasterio_to_table_with_chunk_size(*args, **kwargs): success = connector.upload_raster( os.path.join(fixtures_dir, "mosaic_cog.tif"), f"{BQ_PROJECT_ID}.{BQ_DATASET_ID}.{table_name}", - chunk_size=1000, + chunk_size=10000, ) assert success @@ -604,7 +604,7 @@ def test_rasterio_to_table_invalid_raster(*args, **kwargs): connector.upload_raster( os.path.join(fixtures_dir, "mosaic.tif"), f"{BQ_PROJECT_ID}.{BQ_DATASET_ID}.{table_name}", - chunk_size=1000, + chunk_size=10000, ) diff --git a/raster_loader/tests/snowflake/test_io.py b/raster_loader/tests/snowflake/test_io.py index 005cb32..5e81245 100644 --- a/raster_loader/tests/snowflake/test_io.py +++ b/raster_loader/tests/snowflake/test_io.py @@ -546,7 +546,7 @@ def test_rasterio_to_table_with_chunk_size(*args, **kwargs): success = connector.upload_raster( os.path.join(fixtures_dir, "mosaic_cog.tif"), f"{SF_DATABASE}.{SF_SCHEMA}.{table_name}", - chunk_size=1000, + chunk_size=10000, ) assert success @@ -582,7 +582,7 @@ def test_rasterio_to_table_invalid_raster(*args, **kwargs): connector.upload_raster( os.path.join(fixtures_dir, "mosaic.tif"), f"{SF_DATABASE}.{SF_SCHEMA}.{table_name}", - chunk_size=1000, + chunk_size=10000, )