From 10ab0c45def93d5911185adb9f49a4512370dc1d Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Tue, 23 Jul 2024 15:49:38 +1000 Subject: [PATCH] docs(python): Mention `read_*` functions in Hugging Face section in user-guide --- docs/src/python/user-guide/io/hugging-face.py | 70 +++++++++++++++++++ docs/user-guide/io/hugging-face.md | 21 +++--- 2 files changed, 82 insertions(+), 9 deletions(-) diff --git a/docs/src/python/user-guide/io/hugging-face.py b/docs/src/python/user-guide/io/hugging-face.py index 09e162863884..d18304a5b6b0 100644 --- a/docs/src/python/user-guide/io/hugging-face.py +++ b/docs/src/python/user-guide/io/hugging-face.py @@ -11,10 +11,80 @@ print(pl.scan_ndjson("hf://datasets/nameexhaustion/polars-docs/iris.jsonl").collect()) # --8<-- [end:scan_iris_ndjson] +# --8<-- [start:scan_iris_repr] +print("""\ +shape: (150, 5) +┌──────────────┬─────────────┬──────────────┬─────────────┬───────────┐ +│ sepal_length ┆ sepal_width ┆ petal_length ┆ petal_width ┆ species │ +│ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +│ f64 ┆ f64 ┆ f64 ┆ f64 ┆ str │ +╞══════════════╪═════════════╪══════════════╪═════════════╪═══════════╡ +│ 5.1 ┆ 3.5 ┆ 1.4 ┆ 0.2 ┆ setosa │ +│ 4.9 ┆ 3.0 ┆ 1.4 ┆ 0.2 ┆ setosa │ +│ 4.7 ┆ 3.2 ┆ 1.3 ┆ 0.2 ┆ setosa │ +│ 4.6 ┆ 3.1 ┆ 1.5 ┆ 0.2 ┆ setosa │ +│ 5.0 ┆ 3.6 ┆ 1.4 ┆ 0.2 ┆ setosa │ +│ … ┆ … ┆ … ┆ … ┆ … │ +│ 6.7 ┆ 3.0 ┆ 5.2 ┆ 2.3 ┆ virginica │ +│ 6.3 ┆ 2.5 ┆ 5.0 ┆ 1.9 ┆ virginica │ +│ 6.5 ┆ 3.0 ┆ 5.2 ┆ 2.0 ┆ virginica │ +│ 6.2 ┆ 3.4 ┆ 5.4 ┆ 2.3 ┆ virginica │ +│ 5.9 ┆ 3.0 ┆ 5.1 ┆ 1.8 ┆ virginica │ +└──────────────┴─────────────┴──────────────┴─────────────┴───────────┘ +""") +# --8<-- [end:scan_iris_repr] + # --8<-- [start:scan_parquet_hive] print(pl.scan_parquet("hf://datasets/nameexhaustion/polars-docs/hive_dates/").collect()) # --8<-- [end:scan_parquet_hive] +# --8<-- [start:scan_parquet_hive_repr] +print("""\ +shape: (4, 3) +┌────────────┬────────────────────────────┬─────┐ +│ date1 ┆ date2 ┆ x │ +│ --- ┆ --- ┆ --- │ +│ date ┆ datetime[μs] ┆ i32 │ +╞════════════╪════════════════════════════╪═════╡ +│ 2024-01-01 ┆ 2023-01-01 00:00:00 ┆ 1 │ +│ 2024-02-01 ┆ 2023-02-01 00:00:00 ┆ 2 │ +│ 2024-03-01 ┆ null ┆ 3 │ +│ null ┆ 2023-03-01 01:01:01.000001 ┆ 4 │ +└────────────┴────────────────────────────┴─────┘ +""") +# --8<-- [end:scan_parquet_hive_repr] + # --8<-- [start:scan_ipc] print(pl.scan_ipc("hf://spaces/nameexhaustion/polars-docs/orders.feather").collect()) # --8<-- [end:scan_ipc] + +# --8<-- [start:scan_ipc_repr] +print("""\ +shape: (10, 9) +┌────────────┬───────────┬───────────────┬──────────────┬───┬─────────────────┬─────────────────┬────────────────┬─────────────────────────┐ +│ o_orderkey ┆ o_custkey ┆ o_orderstatus ┆ o_totalprice ┆ … ┆ o_orderpriority ┆ o_clerk ┆ o_shippriority ┆ o_comment │ +│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │ +│ i64 ┆ i64 ┆ str ┆ f64 ┆ ┆ str ┆ str ┆ i64 ┆ str │ +╞════════════╪═══════════╪═══════════════╪══════════════╪═══╪═════════════════╪═════════════════╪════════════════╪═════════════════════════╡ +│ 1 ┆ 36901 ┆ O ┆ 173665.47 ┆ … ┆ 5-LOW ┆ Clerk#000000951 ┆ 0 ┆ nstructions sleep │ +│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ furiously am… │ +│ 2 ┆ 78002 ┆ O ┆ 46929.18 ┆ … ┆ 1-URGENT ┆ Clerk#000000880 ┆ 0 ┆ foxes. pending accounts │ +│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ at th… │ +│ 3 ┆ 123314 ┆ F ┆ 193846.25 ┆ … ┆ 5-LOW ┆ Clerk#000000955 ┆ 0 ┆ sly final accounts │ +│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ boost. care… │ +│ 4 ┆ 136777 ┆ O ┆ 32151.78 ┆ … ┆ 5-LOW ┆ Clerk#000000124 ┆ 0 ┆ sits. slyly regular │ +│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ warthogs c… │ +│ 5 ┆ 44485 ┆ F ┆ 144659.2 ┆ … ┆ 5-LOW ┆ Clerk#000000925 ┆ 0 ┆ quickly. bold deposits │ +│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ sleep s… │ +│ 6 ┆ 55624 ┆ F ┆ 58749.59 ┆ … ┆ 4-NOT SPECIFIED ┆ Clerk#000000058 ┆ 0 ┆ ggle. special, final │ +│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ requests … │ +│ 7 ┆ 39136 ┆ O ┆ 252004.18 ┆ … ┆ 2-HIGH ┆ Clerk#000000470 ┆ 0 ┆ ly special requests │ +│ 32 ┆ 130057 ┆ O ┆ 208660.75 ┆ … ┆ 2-HIGH ┆ Clerk#000000616 ┆ 0 ┆ ise blithely bold, │ +│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ regular req… │ +│ 33 ┆ 66958 ┆ F ┆ 163243.98 ┆ … ┆ 3-MEDIUM ┆ Clerk#000000409 ┆ 0 ┆ uriously. furiously │ +│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ final requ… │ +│ 34 ┆ 61001 ┆ O ┆ 58949.67 ┆ … ┆ 3-MEDIUM ┆ Clerk#000000223 ┆ 0 ┆ ly final packages. │ +│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ fluffily fi… │ +└────────────┴───────────┴───────────────┴──────────────┴───┴─────────────────┴─────────────────┴────────────────┴─────────────────────────┘ +""") +# --8<-- [end:scan_ipc_repr] diff --git a/docs/user-guide/io/hugging-face.md b/docs/user-guide/io/hugging-face.md index 7bb0b425d7e4..1a9f74ce9c28 100644 --- a/docs/user-guide/io/hugging-face.md +++ b/docs/user-guide/io/hugging-face.md @@ -2,12 +2,15 @@ ## Scanning datasets from Huggging Face -All cloud-enabled scan functions also transparently support scanning from Hugging Face: +All cloud-enabled scan functions, and their `read_` counterparts transparently support scanning from +Hugging Face: -- [scan_parquet](https://docs.pola.rs/api/python/stable/reference/api/polars.scan_parquet.html) -- [scan_csv](https://docs.pola.rs/api/python/stable/reference/api/polars.scan_csv.html) -- [scan_ipc](https://docs.pola.rs/api/python/stable/reference/api/polars.scan_ipc.html) -- [scan_ndjson](https://docs.pola.rs/api/python/stable/reference/api/polars.scan_ndjson.html) +| Scan | Read | +| --------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | +| [scan_parquet](https://docs.pola.rs/api/python/stable/reference/api/polars.scan_parquet.html) | [read_parquet](https://docs.pola.rs/api/python/stable/reference/api/polars.read_parquet.html) | +| [scan_csv](https://docs.pola.rs/api/python/stable/reference/api/polars.scan_csv.html) | [read_csv](https://docs.pola.rs/api/python/stable/reference/api/polars.read_csv.html) | +| [scan_ndjson](https://docs.pola.rs/api/python/stable/reference/api/polars.scan_ndjson.html) | [read_ndjson](https://docs.pola.rs/api/python/stable/reference/api/polars.read_ndjson.html) | +| [scan_ipc](https://docs.pola.rs/api/python/stable/reference/api/polars.scan_ipc.html) | [read_ipc](https://docs.pola.rs/api/python/stable/reference/api/polars.read_ipc.html) | ### Path format @@ -45,7 +48,7 @@ A Hugging Face API key can be passed to Polars to access private locations using {{code_block('user-guide/io/hugging-face','scan_iris_csv',['scan_csv'])}} ```python exec="on" result="text" session="user-guide/io/hugging-face" ---8<-- "python/user-guide/io/hugging-face.py:scan_iris_csv" +--8<-- "python/user-guide/io/hugging-face.py:scan_iris_repr" ``` See this file at [https://huggingface.co/datasets/nameexhaustion/polars-docs/blob/main/iris.csv](https://huggingface.co/datasets/nameexhaustion/polars-docs/blob/main/iris.csv) @@ -55,7 +58,7 @@ See this file at [https://huggingface.co/datasets/nameexhaustion/polars-docs/blo {{code_block('user-guide/io/hugging-face','scan_iris_ndjson',['scan_ndjson'])}} ```python exec="on" result="text" session="user-guide/io/hugging-face" ---8<-- "python/user-guide/io/hugging-face.py:scan_iris_ndjson" +--8<-- "python/user-guide/io/hugging-face.py:scan_iris_repr" ``` See this file at [https://huggingface.co/datasets/nameexhaustion/polars-docs/blob/main/iris.jsonl](https://huggingface.co/datasets/nameexhaustion/polars-docs/blob/main/iris.jsonl) @@ -65,7 +68,7 @@ See this file at [https://huggingface.co/datasets/nameexhaustion/polars-docs/blo {{code_block('user-guide/io/hugging-face','scan_parquet_hive',['scan_parquet'])}} ```python exec="on" result="text" session="user-guide/io/hugging-face" ---8<-- "python/user-guide/io/hugging-face.py:scan_parquet_hive" +--8<-- "python/user-guide/io/hugging-face.py:scan_parquet_hive_repr" ``` See this folder at [https://huggingface.co/datasets/nameexhaustion/polars-docs/tree/main/hive_dates/](https://huggingface.co/datasets/nameexhaustion/polars-docs/tree/main/hive_dates/) @@ -75,7 +78,7 @@ See this folder at [https://huggingface.co/datasets/nameexhaustion/polars-docs/t {{code_block('user-guide/io/hugging-face','scan_ipc',['scan_ipc'])}} ```python exec="on" result="text" session="user-guide/io/hugging-face" ---8<-- "python/user-guide/io/hugging-face.py:scan_ipc" +--8<-- "python/user-guide/io/hugging-face.py:scan_ipc_repr" ``` See this file at [https://huggingface.co/spaces/nameexhaustion/polars-docs/blob/main/orders.feather](https://huggingface.co/spaces/nameexhaustion/polars-docs/blob/main/orders.feather)