Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python): Support passing Worksheet objects to the write_excel method #18031

Merged
merged 2 commits into from
Aug 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 40 additions & 8 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
import torch
from great_tables import GT
from hvplot.plotting.core import hvPlotTabularPolars
from xlsxwriter import Workbook
from xlsxwriter import Workbook, Worksheet

from polars import DataType, Expr, LazyFrame, Series
from polars._typing import (
Expand Down Expand Up @@ -2802,8 +2802,8 @@ def write_avro(

def write_excel(
self,
workbook: Workbook | IO[bytes] | Path | str | None = None,
worksheet: str | None = None,
workbook: str | Workbook | IO[bytes] | Path | None = None,
worksheet: str | Worksheet | None = None,
*,
position: tuple[int, int] | str = "A1",
table_style: str | dict[str, Any] | None = None,
Expand Down Expand Up @@ -2838,14 +2838,15 @@ def write_excel(

Parameters
----------
workbook : Workbook
workbook : {str, Workbook}
String name or path of the workbook to create, BytesIO object to write
into, or an open `xlsxwriter.Workbook` object that has not been closed.
If None, writes to a `dataframe.xlsx` workbook in the working directory.
worksheet : str
Name of target worksheet; if None, writes to "Sheet1" when creating a new
workbook (note that writing to an existing workbook requires a valid
existing -or new- worksheet name).
worksheet : {str, Worksheet}
Name of target worksheet or an `xlsxwriter.Worksheet` object (in which
case `workbook` must be the parent `xlsxwriter.Workbook` object); if None,
writes to "Sheet1" when creating a new workbook (note that writing to an
existing workbook requires a valid existing -or new- worksheet name).
position : {str, tuple}
Table position in Excel notation (eg: "A1"), or a (row,col) integer tuple.
table_style : {str, dict}
Expand Down Expand Up @@ -3154,6 +3155,37 @@ def write_excel(
... hide_gridlines=True,
... sheet_zoom=125,
... )

Create and reference a Worksheet object directly, adding a basic chart.
Taking advantage of structured references to set chart series values and
categories is strongly recommended so that you do not have to calculate
cell positions with respect to the frame data and worksheet:

>>> with Workbook("basic_chart.xlsx") as wb: # doctest: +SKIP
... # create worksheet object and write frame data to it
... ws = wb.add_worksheet("demo")
... df.write_excel(
... workbook=wb,
... worksheet=ws,
... table_name="DataTable",
... table_style="Table Style Medium 26",
... hide_gridlines=True,
... )
... # create chart object, point to the written table
... # data using structured references, and style it
... chart = wb.add_chart({"type": "column"})
... chart.set_title({"name": "Example Chart"})
... chart.set_legend({"none": True})
... chart.set_style(38)
... chart.add_series(
... { # note the use of structured references
... "values": "=DataTable[points]",
... "categories": "=DataTable[id]",
... "data_labels": {"value": True},
... }
... )
... # add chart to the worksheet
... ws.insert_chart("D1", chart)
""" # noqa: W505
from polars.io.spreadsheet._write_utils import (
_unpack_multi_column_dict,
Expand Down
30 changes: 27 additions & 3 deletions py-polars/polars/io/spreadsheet/_write_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,15 +522,36 @@ def _xl_setup_table_options(
return table_style, table_options


def _xl_worksheet_in_workbook(
wb: Workbook, ws: Worksheet, *, return_worksheet: bool = False
) -> bool | Worksheet:
if any(ws is sheet for sheet in wb.worksheets()):
return ws if return_worksheet else True
msg = f"the given workbook object {wb.filename!r} is not the parent of worksheet {ws.name!r}"
raise ValueError(msg)


def _xl_setup_workbook(
workbook: Workbook | BytesIO | Path | str | None, worksheet: str | None = None
workbook: Workbook | BytesIO | Path | str | None,
worksheet: str | Worksheet | None = None,
) -> tuple[Workbook, Worksheet, bool]:
"""Establish the target excel workbook and worksheet."""
from xlsxwriter import Workbook
from xlsxwriter.worksheet import Worksheet

if isinstance(workbook, Workbook):
wb, can_close = workbook, False
ws = wb.get_worksheet_by_name(name=worksheet)
ws = (
worksheet
if (
isinstance(worksheet, Worksheet)
and _xl_worksheet_in_workbook(wb, worksheet)
)
else wb.get_worksheet_by_name(name=worksheet)
)
elif isinstance(worksheet, Worksheet):
msg = f"worksheet object requires the parent workbook object; found workbook={workbook!r}"
raise TypeError(msg)
else:
workbook_options = {
"nan_inf_to_errors": True,
Expand All @@ -550,7 +571,10 @@ def _xl_setup_workbook(
ws, can_close = None, True

if ws is None:
ws = wb.add_worksheet(name=worksheet)
if isinstance(worksheet, Worksheet):
ws = _xl_worksheet_in_workbook(wb, worksheet, return_worksheet=True)
else:
ws = wb.add_worksheet(name=worksheet)
return wb, ws, can_close


Expand Down
44 changes: 35 additions & 9 deletions py-polars/tests/unit/io/test_spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,21 +775,19 @@ def test_excel_sparklines(engine: ExcelSpreadsheetEngine) -> None:
def test_excel_write_multiple_tables() -> None:
from xlsxwriter import Workbook

# note: checks that empty tables don't error on write
df1 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64})
df2 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64})
df3 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64})
df4 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64})
# note: also checks that empty tables don't error on write
df = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64})

# write multiple frames to multiple worksheets
xls = BytesIO()
with Workbook(xls) as wb:
df1.write_excel(workbook=wb, worksheet="sheet1", position="A1")
df2.write_excel(workbook=wb, worksheet="sheet1", position="A6")
df3.write_excel(workbook=wb, worksheet="sheet2", position="A1")
df.write_excel(workbook=wb, worksheet="sheet1", position="A1")
df.write_excel(workbook=wb, worksheet="sheet1", position="A6")
df.write_excel(workbook=wb, worksheet="sheet2", position="A1")

# validate integration of externally-added formats
fmt = wb.add_format({"bg_color": "#ffff00"})
df4.write_excel(
df.write_excel(
workbook=wb,
worksheet="sheet3",
position="A1",
Expand All @@ -811,6 +809,34 @@ def test_excel_write_multiple_tables() -> None:
assert pl.read_excel(xls, sheet_name="sheet3").rows() == []


def test_excel_write_worksheet_object() -> None:
# write to worksheet object
from xlsxwriter import Workbook

df = pl.DataFrame({"colx": ["aaa", "bbb", "ccc"], "coly": [-1234, 0, 5678]})

with Workbook(xls := BytesIO()) as wb:
ws = wb.add_worksheet("frame_data")
df.write_excel(wb, worksheet=ws)
ws.hide_zero()

assert_frame_equal(df, pl.read_excel(xls, sheet_name="frame_data"))

with pytest.raises( # noqa: SIM117
ValueError,
match="the given workbook object .* is not the parent of worksheet 'frame_data'",
):
with Workbook(BytesIO()) as wb:
df.write_excel(wb, worksheet=ws)

with pytest.raises( # noqa: SIM117
TypeError,
match="worksheet object requires the parent workbook object; found workbook=None",
):
with Workbook(BytesIO()) as wb:
df.write_excel(None, worksheet=ws)


def test_excel_freeze_panes() -> None:
from xlsxwriter import Workbook

Expand Down