diff --git a/docs/user-guide/installation.md b/docs/user-guide/installation.md index 5d5c4bde25471..0642675dd6abd 100644 --- a/docs/user-guide/installation.md +++ b/docs/user-guide/installation.md @@ -67,9 +67,13 @@ To use the library import it into your project use polars::prelude::*; ``` -## Feature Flags +## Feature flags -By using the above command you install the core of Polars onto your system. However depending on your use case you might want to install the optional dependencies as well. These are made optional to minimize the footprint. The flags are different depending on the programming language. Throughout the user guide we will mention when a functionality is used that requires an additional dependency. +By using the above command you install the core of Polars onto your system. +However, depending on your use case, you might want to install the optional dependencies as well. +These are made optional to minimize the footprint. +The flags are different depending on the programming language. +Throughout the user guide we will mention when a functionality is used that requires an additional dependency. ### Python @@ -78,19 +82,65 @@ By using the above command you install the core of Polars onto your system. Howe pip install 'polars[numpy,fsspec]' ``` -| Tag | Description | -| ---------- | ------------------------------------------------------------------------------------------------------------------------------------- | -| all | Install all optional dependencies (all of the following) | -| pandas | Install with Pandas for converting data to and from Pandas Dataframes/Series | -| numpy | Install with numpy for converting data to and from numpy arrays | -| pyarrow | Reading data formats using PyArrow | -| fsspec | Support for reading from remote file systems | -| connectorx | Support for reading from SQL databases | -| xlsx2csv | Support for reading from Excel files | -| deltalake | Support for reading from Delta Lake Tables | -| plot | Support for plotting Dataframes | -| style | Support for styling Dataframes | -| timezone | Timezone support, only needed if 1. you are on Python < 3.9 and/or 2. you are on Windows, otherwise no dependencies will be installed | +#### All + +| Tag | Description | +| --- | --------------------------------- | +| all | Install all optional dependencies | + +#### Interop + +| Tag | Description | +| -------- | ------------------------------------------------- | +| pandas | Convert data to and from pandas DataFrames/Series | +| numpy | Convert data to and from NumPy arrays | +| pyarrow | Convert data to and from PyArrow tables/arrays | +| pydantic | Convert data from Pydantic models to Polars | + +#### Excel + +| Tag | Description | +| ---------- | ----------------------------------------------- | +| calamine | Read from Excel files with the calamine engine | +| openpyxl | Read from Excel files with the openpyxl engine | +| xlsx2csv | Read from Excel files with the xlsx2csv engine | +| xlsxwriter | Write to Excel files with the XlsxWriter engine | +| excel | Install all supported Excel engines | + +#### Database + +| Tag | Description | +| ---------- | ----------------------------------------------------------------------------------- | +| adbc | Read from and write to databases with the Arrow Database Connectivity (ADBC) engine | +| connectorx | Read from databases with the ConnectorX engine | +| sqlalchemy | Write to databases with the SQLAlchemy engine | +| database | Install all supported database engines | + +#### Cloud + +| Tag | Description | +| ------ | ------------------------------------------ | +| fsspec | Read from and write to remote file systems | + +#### Other I/O + +| Tag | Description | +| --------- | ----------------------------------- | +| deltalake | Read from and write to Delta tables | +| iceberg | Read from Apache Iceberg tables | + +#### Other + +| Tag | Description | +| ----------- | ---------------------------------------------- | +| async | Collect LazyFrames asynchronously | +| cloudpickle | Serialize user-defined functions | +| graph | Visualize LazyFrames as a graph | +| plot | Plot DataFrames through the `plot` namespace | +| style | Style DataFrames through the `style` namespace | +| timezone | Timezone support* | + +_* Only needed if 1. you are on Python < 3.9 and/or 2. you are on Windows_ ### Rust diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 5d3fd9c1c2ab1..bacc86fcfac0c 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -657,7 +657,6 @@ def style(self) -> GT: Format measure_b values to two decimal places: >>> df.style.fmt_number("measure_b", decimals=2) # doctest: +SKIP - """ if not _GREAT_TABLES_AVAILABLE: msg = "great_tables is required for `.style`" @@ -3604,7 +3603,7 @@ def write_database( Additional options to pass to the engine's associated insert method: * "sqlalchemy" - currently inserts using Pandas' `to_sql` method, though - this will eventually be phased out in favour of a native solution. + this will eventually be phased out in favor of a native solution. * "adbc" - inserts using the ADBC cursor's `adbc_ingest` method. Examples diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index 864b116947f26..adb7ff89f2158 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -39,31 +39,45 @@ Changelog = "https://github.com/pola-rs/polars/releases" [project.optional-dependencies] # NOTE: keep this list in sync with show_versions() and requirements-dev.txt -adbc = ["adbc_driver_manager", "adbc_driver_sqlite"] -async = ["nest_asyncio"] -cloudpickle = ["cloudpickle"] -connectorx = ["connectorx >= 0.3.2"] -deltalake = ["deltalake >= 0.15.0"] -fastexcel = ["fastexcel >= 0.9"] -fsspec = ["fsspec"] -gevent = ["gevent"] -iceberg = ["pyiceberg >= 0.5.0"] -matplotlib = ["matplotlib"] -# TODO: Remove upper bound when we support 2.0 +# Interop +# TODO: Remove NumPy upper bound once we support NumPy 2.0.0 # https://github.com/pola-rs/polars/issues/16998 numpy = ["numpy >= 1.16.0, < 2.0.0"] -openpyxl = ["openpyxl >= 3.0.0"] -pandas = ["pyarrow >= 7.0.0", "pandas"] -plot = ["hvplot >= 0.9.1"] +pandas = ["pandas", "polars[pyarrow]"] pyarrow = ["pyarrow >= 7.0.0"] pydantic = ["pydantic"] -sqlalchemy = ["sqlalchemy", "pandas"] -style = ["great-tables >= 0.8.0"] -timezone = ["backports.zoneinfo; python_version < '3.9'", "tzdata; platform_system == 'Windows'"] + +# Excel +calamine = ["fastexcel >= 0.9"] +openpyxl = ["openpyxl >= 3.0.0"] xlsx2csv = ["xlsx2csv >= 0.8.0"] xlsxwriter = ["xlsxwriter"] +excel = ["polars[calamine,openpyxl,xlsx2csv,xlsxwriter]"] + +# Database +adbc = ["adbc-driver-manager[dbapi]", "adbc-driver-sqlite[dbapi]"] +connectorx = ["connectorx >= 0.3.2"] +sqlalchemy = ["sqlalchemy", "polars[pandas]"] +database = ["polars[adbc,connectorx,sqlalchemy]", "nest-asyncio"] + +# Cloud +fsspec = ["fsspec"] + +# Other I/O +deltalake = ["deltalake >= 0.15.0"] +iceberg = ["pyiceberg >= 0.5.0"] + +# Other +async = ["gevent"] +cloudpickle = ["cloudpickle"] +graph = ["matplotlib"] +plot = ["hvplot >= 0.9.1", "polars[pandas]"] +style = ["great-tables >= 0.8.0"] +timezone = ["backports.zoneinfo; python_version < '3.9'", "tzdata; platform_system == 'Windows'"] + +# All all = [ - "polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,style,numpy,pandas,plot,pyarrow,pydantic,iceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]", + "polars[async,cloudpickle,database,deltalake,excel,fsspec,graph,iceberg,numpy,pandas,plot,pyarrow,pydantic,style,timezone]", ] [tool.maturin] diff --git a/py-polars/requirements-dev.txt b/py-polars/requirements-dev.txt index 49e661031b817..6bfc434d9b4f6 100644 --- a/py-polars/requirements-dev.txt +++ b/py-polars/requirements-dev.txt @@ -15,7 +15,7 @@ pip # DEPENDENCIES # ------------ -# Interoperability +# Interop # Unpin NumPy when support is implemented in numpy crate: # https://github.com/pola-rs/polars/issues/16998 numpy<2 @@ -28,37 +28,34 @@ numba backports.zoneinfo; python_version < '3.9' tzdata; platform_system == 'Windows' # Database -SQLAlchemy -adbc_driver_manager; python_version >= '3.9' and platform_system != 'Windows' -adbc_driver_sqlite; python_version >= '3.9' and platform_system != 'Windows' +sqlalchemy +adbc-driver-manager; python_version >= '3.9' and platform_system != 'Windows' +adbc-driver-sqlite; python_version >= '3.9' and platform_system != 'Windows' aiosqlite connectorx kuzu +nest-asyncio # Cloud cloudpickle fsspec s3fs[boto3] # Spreadsheet -lxml fastexcel>=0.9 openpyxl xlsx2csv -XlsxWriter -# Skip deltalake version 0.18.0 due to MacOS issues: -# https://github.com/delta-io/delta-rs/issues/2577 -deltalake>=0.15.0; platform_system != 'Darwin' -deltalake>=0.15.0,!=0.18.0; platform_system == 'Darwin' +xlsxwriter +# Other I/O +deltalake>=0.15.0 # Csv zstandard # Plotting hvplot>=0.9.1 -matplotlib # Styling great-tables>=0.8.0; python_version >= '3.9' -# Other -blake3 +# Async gevent -nest_asyncio +# Graph +matplotlib # ------- # TOOLING