Skip to content

Commit

Permalink
feat(python): Support passing Worksheet objects to the `write_excel…
Browse files Browse the repository at this point in the history
…` method (#18031)
  • Loading branch information
alexander-beedie authored Aug 4, 2024
1 parent 8adadf6 commit b1cb91e
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 20 deletions.
48 changes: 40 additions & 8 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@
import torch
from great_tables import GT
from hvplot.plotting.core import hvPlotTabularPolars
from xlsxwriter import Workbook
from xlsxwriter import Workbook, Worksheet

from polars import DataType, Expr, LazyFrame, Series
from polars._typing import (
Expand Down Expand Up @@ -2802,8 +2802,8 @@ def write_avro(

def write_excel(
self,
workbook: Workbook | IO[bytes] | Path | str | None = None,
worksheet: str | None = None,
workbook: str | Workbook | IO[bytes] | Path | None = None,
worksheet: str | Worksheet | None = None,
*,
position: tuple[int, int] | str = "A1",
table_style: str | dict[str, Any] | None = None,
Expand Down Expand Up @@ -2838,14 +2838,15 @@ def write_excel(
Parameters
----------
workbook : Workbook
workbook : {str, Workbook}
String name or path of the workbook to create, BytesIO object to write
into, or an open `xlsxwriter.Workbook` object that has not been closed.
If None, writes to a `dataframe.xlsx` workbook in the working directory.
worksheet : str
Name of target worksheet; if None, writes to "Sheet1" when creating a new
workbook (note that writing to an existing workbook requires a valid
existing -or new- worksheet name).
worksheet : {str, Worksheet}
Name of target worksheet or an `xlsxwriter.Worksheet` object (in which
case `workbook` must be the parent `xlsxwriter.Workbook` object); if None,
writes to "Sheet1" when creating a new workbook (note that writing to an
existing workbook requires a valid existing -or new- worksheet name).
position : {str, tuple}
Table position in Excel notation (eg: "A1"), or a (row,col) integer tuple.
table_style : {str, dict}
Expand Down Expand Up @@ -3154,6 +3155,37 @@ def write_excel(
... hide_gridlines=True,
... sheet_zoom=125,
... )
Create and reference a Worksheet object directly, adding a basic chart.
Taking advantage of structured references to set chart series values and
categories is strongly recommended so that you do not have to calculate
cell positions with respect to the frame data and worksheet:
>>> with Workbook("basic_chart.xlsx") as wb: # doctest: +SKIP
... # create worksheet object and write frame data to it
... ws = wb.add_worksheet("demo")
... df.write_excel(
... workbook=wb,
... worksheet=ws,
... table_name="DataTable",
... table_style="Table Style Medium 26",
... hide_gridlines=True,
... )
... # create chart object, point to the written table
... # data using structured references, and style it
... chart = wb.add_chart({"type": "column"})
... chart.set_title({"name": "Example Chart"})
... chart.set_legend({"none": True})
... chart.set_style(38)
... chart.add_series(
... { # note the use of structured references
... "values": "=DataTable[points]",
... "categories": "=DataTable[id]",
... "data_labels": {"value": True},
... }
... )
... # add chart to the worksheet
... ws.insert_chart("D1", chart)
""" # noqa: W505
from polars.io.spreadsheet._write_utils import (
_unpack_multi_column_dict,
Expand Down
30 changes: 27 additions & 3 deletions py-polars/polars/io/spreadsheet/_write_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,15 +522,36 @@ def _xl_setup_table_options(
return table_style, table_options


def _xl_worksheet_in_workbook(
wb: Workbook, ws: Worksheet, *, return_worksheet: bool = False
) -> bool | Worksheet:
if any(ws is sheet for sheet in wb.worksheets()):
return ws if return_worksheet else True
msg = f"the given workbook object {wb.filename!r} is not the parent of worksheet {ws.name!r}"
raise ValueError(msg)


def _xl_setup_workbook(
workbook: Workbook | BytesIO | Path | str | None, worksheet: str | None = None
workbook: Workbook | BytesIO | Path | str | None,
worksheet: str | Worksheet | None = None,
) -> tuple[Workbook, Worksheet, bool]:
"""Establish the target excel workbook and worksheet."""
from xlsxwriter import Workbook
from xlsxwriter.worksheet import Worksheet

if isinstance(workbook, Workbook):
wb, can_close = workbook, False
ws = wb.get_worksheet_by_name(name=worksheet)
ws = (
worksheet
if (
isinstance(worksheet, Worksheet)
and _xl_worksheet_in_workbook(wb, worksheet)
)
else wb.get_worksheet_by_name(name=worksheet)
)
elif isinstance(worksheet, Worksheet):
msg = f"worksheet object requires the parent workbook object; found workbook={workbook!r}"
raise TypeError(msg)
else:
workbook_options = {
"nan_inf_to_errors": True,
Expand All @@ -550,7 +571,10 @@ def _xl_setup_workbook(
ws, can_close = None, True

if ws is None:
ws = wb.add_worksheet(name=worksheet)
if isinstance(worksheet, Worksheet):
ws = _xl_worksheet_in_workbook(wb, worksheet, return_worksheet=True)
else:
ws = wb.add_worksheet(name=worksheet)
return wb, ws, can_close


Expand Down
44 changes: 35 additions & 9 deletions py-polars/tests/unit/io/test_spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,21 +775,19 @@ def test_excel_sparklines(engine: ExcelSpreadsheetEngine) -> None:
def test_excel_write_multiple_tables() -> None:
from xlsxwriter import Workbook

# note: checks that empty tables don't error on write
df1 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64})
df2 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64})
df3 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64})
df4 = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64})
# note: also checks that empty tables don't error on write
df = pl.DataFrame(schema={"colx": pl.Date, "coly": pl.String, "colz": pl.Float64})

# write multiple frames to multiple worksheets
xls = BytesIO()
with Workbook(xls) as wb:
df1.write_excel(workbook=wb, worksheet="sheet1", position="A1")
df2.write_excel(workbook=wb, worksheet="sheet1", position="A6")
df3.write_excel(workbook=wb, worksheet="sheet2", position="A1")
df.write_excel(workbook=wb, worksheet="sheet1", position="A1")
df.write_excel(workbook=wb, worksheet="sheet1", position="A6")
df.write_excel(workbook=wb, worksheet="sheet2", position="A1")

# validate integration of externally-added formats
fmt = wb.add_format({"bg_color": "#ffff00"})
df4.write_excel(
df.write_excel(
workbook=wb,
worksheet="sheet3",
position="A1",
Expand All @@ -811,6 +809,34 @@ def test_excel_write_multiple_tables() -> None:
assert pl.read_excel(xls, sheet_name="sheet3").rows() == []


def test_excel_write_worksheet_object() -> None:
# write to worksheet object
from xlsxwriter import Workbook

df = pl.DataFrame({"colx": ["aaa", "bbb", "ccc"], "coly": [-1234, 0, 5678]})

with Workbook(xls := BytesIO()) as wb:
ws = wb.add_worksheet("frame_data")
df.write_excel(wb, worksheet=ws)
ws.hide_zero()

assert_frame_equal(df, pl.read_excel(xls, sheet_name="frame_data"))

with pytest.raises( # noqa: SIM117
ValueError,
match="the given workbook object .* is not the parent of worksheet 'frame_data'",
):
with Workbook(BytesIO()) as wb:
df.write_excel(wb, worksheet=ws)

with pytest.raises( # noqa: SIM117
TypeError,
match="worksheet object requires the parent workbook object; found workbook=None",
):
with Workbook(BytesIO()) as wb:
df.write_excel(None, worksheet=ws)


def test_excel_freeze_panes() -> None:
from xlsxwriter import Workbook

Expand Down

0 comments on commit b1cb91e

Please sign in to comment.