diff --git a/awswrangler/athena/_read.py b/awswrangler/athena/_read.py index 7edd84ffe..5501f89e8 100644 --- a/awswrangler/athena/_read.py +++ b/awswrangler/athena/_read.py @@ -807,9 +807,11 @@ def read_sql_query( # pylint: disable=too-many-arguments,too-many-locals There are two batching strategies: - - If **chunksize=True**, a new DataFrame will be returned for each file in the query result. + - If **chunksize=True**, depending on the size of the data, one or more data frames will be + returned per each file in the query result. + Unlike **chunksize=INTEGER**, rows from different files will not be mixed in the resulting data frames. - - If **chunksize=INTEGER**, awswrangler will iterate on the data by number of rows igual the received INTEGER. + - If **chunksize=INTEGER**, awswrangler will iterate on the data by number of rows egual the received INTEGER. `P.S.` `chunksize=True` is faster and uses less memory while `chunksize=INTEGER` is more precise in number of rows for each Dataframe. @@ -1110,9 +1112,11 @@ def read_sql_table( There are two batching strategies: - - If **chunksize=True**, a new DataFrame will be returned for each file in the query result. + - If **chunksize=True**, depending on the size of the data, one or more data frames will be + returned per each file in the query result. + Unlike **chunksize=INTEGER**, rows from different files will not be mixed in the resulting data frames. - - If **chunksize=INTEGER**, awswrangler will iterate on the data by number of rows igual the received INTEGER. + - If **chunksize=INTEGER**, awswrangler will iterate on the data by number of rows egual the received INTEGER. `P.S.` `chunksize=True` is faster and uses less memory while `chunksize=INTEGER` is more precise in number of rows for each Dataframe. diff --git a/awswrangler/redshift.py b/awswrangler/redshift.py index d82444fdc..b28c68884 100644 --- a/awswrangler/redshift.py +++ b/awswrangler/redshift.py @@ -1122,7 +1122,9 @@ def unload( There are two batching strategies on awswrangler: - - If **chunked=True**, a new DataFrame will be returned for each file in your path/dataset. + - If **chunked=True**, depending on the size of the data, one or more data frames will be + returned per each file in the path/dataset. + Unlike **chunked=INTEGER**, rows from different files will not be mixed in the resulting data frames. - If **chunked=INTEGER**, awswrangler will iterate on the data by number of rows (equal to the received INTEGER). diff --git a/awswrangler/s3/_read_parquet.py b/awswrangler/s3/_read_parquet.py index 40a2263ce..e32c26d49 100644 --- a/awswrangler/s3/_read_parquet.py +++ b/awswrangler/s3/_read_parquet.py @@ -600,9 +600,11 @@ def read_parquet( There are two batching strategies on awswrangler: - - If **chunked=True**, a new DataFrame will be returned for each file in your path/dataset. + - If **chunked=True**, depending on the size of the data, one or more data frames will be + returned per each file in the path/dataset. + Unlike **chunked=INTEGER**, rows from different files will not be mixed in the resulting data frames. - - If **chunked=INTEGER**, awswrangler will iterate on the data by number of rows igual the received INTEGER. + - If **chunked=INTEGER**, awswrangler will iterate on the data by number of rows egual the received INTEGER. `P.S.` `chunked=True` if faster and uses less memory while `chunked=INTEGER` is more precise in number of rows for each Dataframe. @@ -652,7 +654,7 @@ def read_parquet( chunked : Union[int, bool] If passed will split the data in a Iterable of DataFrames (Memory friendly). If `True` awswrangler iterates on the data by files in the most efficient way without guarantee of chunksize. - If an `INTEGER` is passed awswrangler will iterate on the data by number of rows igual the received INTEGER. + If an `INTEGER` is passed awswrangler will iterate on the data by number of rows egual the received INTEGER. dataset: bool If `True` read a parquet dataset instead of simple file(s) loading all the related partitions as columns. categories: Optional[List[str]], optional @@ -830,10 +832,12 @@ def read_parquet_table( There are two batching strategies on awswrangler: - - If **chunked=True**, a new DataFrame will be returned for each file in your path/dataset. + - If **chunked=True**, depending on the size of the data, one or more data frames will be + returned per each file in the path/dataset. + Unlike **chunked=INTEGER**, rows from different files will not be mixed in the resulting data frames. - If **chunked=INTEGER**, awswrangler will paginate through files slicing and concatenating - to return DataFrames with the number of row igual the received INTEGER. + to return DataFrames with the number of rows egual the received INTEGER. `P.S.` `chunked=True` if faster and uses less memory while `chunked=INTEGER` is more precise in number of rows for each Dataframe.