Skip to content

Commit

Permalink
Updating to rs-0.45 (#301)
Browse files Browse the repository at this point in the history
Updating to rs-0.45
  • Loading branch information
Bidek56 authored Dec 24, 2024
1 parent 8816b46 commit 2c0e091
Show file tree
Hide file tree
Showing 21 changed files with 1,245 additions and 1,537 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
- name: Install latest Rust nightly
uses: dtolnay/rust-toolchain@stable
with:
toolchain: nightly-2024-10-28
toolchain: nightly-2024-11-28
components: rustfmt, clippy
- name: Install ghp-import
uses: actions/setup-python@v5
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-js.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Install latest Rust nightly
uses: dtolnay/rust-toolchain@stable
with:
toolchain: nightly-2024-10-28
toolchain: nightly-2024-11-28
components: rustfmt, clippy
- name: Check yarn version
run: yarn --version
Expand All @@ -46,7 +46,7 @@ jobs:
- name: Install latest Rust nightly
uses: dtolnay/rust-toolchain@stable
with:
toolchain: nightly-2024-10-28
toolchain: nightly-2024-11-28
components: rustfmt, clippy
- name: Bun version
uses: oven-sh/setup-bun@v1
Expand Down
925 changes: 0 additions & 925 deletions .yarn/releases/yarn-4.5.0.cjs

This file was deleted.

934 changes: 934 additions & 0 deletions .yarn/releases/yarn-4.5.3.cjs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion .yarnrc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ enableGlobalCache: false

nodeLinker: node-modules

yarnPath: .yarn/releases/yarn-4.5.0.cjs
yarnPath: .yarn/releases/yarn-4.5.3.cjs
12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ napi = { version = "2.16.13", default-features = false, features = [
"napi8",
"serde-json",
] }
napi-derive = { version = "2.16.12", default-features = false }
polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "2dce3d3b5c80ae7522a3435f844fac8fed9dc9e8", default-features = false }
polars-io = { git = "https://github.com/pola-rs/polars.git", rev = "2dce3d3b5c80ae7522a3435f844fac8fed9dc9e8", default-features = false }
polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "2dce3d3b5c80ae7522a3435f844fac8fed9dc9e8", default-features = false }
napi-derive = { version = "2.16.13", default-features = false }
polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "58a38af21dccaf3326514494a1db118601c8c2ca", default-features = false }
polars-io = { git = "https://github.com/pola-rs/polars.git", rev = "58a38af21dccaf3326514494a1db118601c8c2ca", default-features = false }
polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "58a38af21dccaf3326514494a1db118601c8c2ca", default-features = false }
thiserror = "1"
smartstring = { version = "1" }
serde_json = { version = "1" }
either = "1.13.0"
hashbrown = { version = "0.15.0", features = ["rayon", "serde"] }
hashbrown = { version = "0.15.2", features = ["rayon", "serde"] }

[dependencies.polars]
features = [
Expand Down Expand Up @@ -162,7 +162,7 @@ features = [
"azure"
]
git = "https://github.com/pola-rs/polars.git"
rev = "2dce3d3b5c80ae7522a3435f844fac8fed9dc9e8"
rev = "58a38af21dccaf3326514494a1db118601c8c2ca"

[build-dependencies]
napi-build = "2.1.3"
Expand Down
6 changes: 3 additions & 3 deletions __tests__/dataframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1680,7 +1680,7 @@ describe("io", () => {
expect(actual).toEqual(expected);
});
test("writeCSV:string:sep", () => {
const actual = df.clone().writeCSV({ sep: "X" }).toString();
const actual = df.clone().writeCSV({ separator: "X" }).toString();
const expected = "fooXbar\n1X6\n2X2\n9X8\n";
expect(actual).toEqual(expected);
});
Expand All @@ -1689,14 +1689,14 @@ describe("io", () => {
bar: ["a,b,c", "d,e,f", "g,h,i"],
foo: [1, 2, 3],
});
const actual = df.writeCSV({ quote: "^" }).toString();
const actual = df.writeCSV({ quoteChar: "^" }).toString();
const expected = "bar,foo\n^a,b,c^,1.0\n^d,e,f^,2.0\n^g,h,i^,3.0\n";
expect(actual).toEqual(expected);
});
test("writeCSV:string:header", () => {
const actual = df
.clone()
.writeCSV({ sep: "X", includeHeader: false, lineTerminator: "|" })
.writeCSV({ separator: "X", includeHeader: false, lineTerminator: "|" })
.toString();
const expected = "1X6|2X2|9X8|";
expect(actual).toEqual(expected);
Expand Down
15 changes: 15 additions & 0 deletions __tests__/io.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,21 @@ describe("read:csv", () => {
csvString.slice(0, 22),
);
});
it("can read from a csv buffer with newline in the header", () => {
const csvBuffer = Buffer.from(
'"name\na","height\nb"\n"John",172.23\n"Anna",1653.34',
);
const df = pl.readCSV(csvBuffer, {
quoteChar: '"',
sep: ",",
hasHeader: false,
skipRows: 1,
});
expect(df.toRecords()).toEqual([
{ column_1: "John", column_2: 172.23 },
{ column_1: "Anna", column_2: 1653.34 },
]);
});
it("can read from a csv buffer", () => {
const csvBuffer = Buffer.from("foo,bar,baz\n1,2,3\n4,5,6\n", "utf-8");
const df = pl.readCSV(csvBuffer);
Expand Down
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,16 @@
"@napi-rs/cli": "^2.18.4",
"@types/chance": "^1.1.6",
"@types/jest": "^29.5.14",
"@types/node": "^22.8.6",
"@types/node": "^22.10.1",
"chance": "^1.1.12",
"jest": "^29.7.0",
"source-map-support": "^0.5.21",
"ts-jest": "^29.2.5",
"ts-node": "^10.9.2",
"typedoc": "^0.26.10",
"typescript": "5.6.3"
"typedoc": "^0.27.3",
"typescript": "5.7.2"
},
"packageManager": "[email protected].0",
"packageManager": "[email protected].3",
"workspaces": [
"benches"
]
Expand Down
10 changes: 5 additions & 5 deletions polars/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ import { type LazyDataFrame, _LazyDataFrame } from "./lazy/dataframe";
import { Expr } from "./lazy/expr";
import { Series, _Series } from "./series";
import type {
CsvWriterOptions,
FillNullStrategy,
JoinOptions,
WriteAvroOptions,
WriteCsvOptions,
WriteIPCOptions,
WriteParquetOptions,
} from "./types";
Expand Down Expand Up @@ -61,8 +61,8 @@ interface WriteMethods {
* @param options.includeBom - Whether to include UTF-8 BOM in the CSV output.
* @param options.lineTerminator - String used to end each row.
* @param options.includeHeader - Whether or not to include header in the CSV output.
* @param options.sep - Separate CSV fields with this symbol. _defaults to `,`
* @param options.quote - Character to use for quoting. Default: \" Note: it will note be used when sep is used
* @param options.separator - Separate CSV fields with this symbol. _defaults to `,`
* @param options.quoteChar - Character to use for quoting. Default: \" Note: it will note be used when sep is used
* @param options.batchSize - Number of rows that will be processed per thread.
* @param options.datetimeFormat - A format string, with the specifiers defined by the
* `chrono <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>`_
Expand Down Expand Up @@ -109,8 +109,8 @@ interface WriteMethods {
* @category IO
*/
writeCSV(): Buffer;
writeCSV(options: WriteCsvOptions): Buffer;
writeCSV(dest: string | Writable, options?: WriteCsvOptions): void;
writeCSV(options: CsvWriterOptions): Buffer;
writeCSV(dest: string | Writable, options?: CsvWriterOptions): void;
/**
* Write Dataframe to JSON string, file, or write stream
* @param destination file or write stream
Expand Down
4 changes: 2 additions & 2 deletions polars/io.ts
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ export function readAvro(pathOrBody, options = {}) {
@param options.rechunk - In case of reading multiple files via a glob pattern rechunk the final DataFrame into contiguous memory chunks.
@param options.lowMemory - Reduce memory pressure at the expense of performance.
@param options.cache - Cache the result after reading.
@param options.storageOptions - Options that indicate how to connect to a cloud provider.
@param options.cloudOptions - Options that indicate how to connect to a cloud provider.
If the cloud provider is not supported by Polars, the storage options are passed to `fsspec.open()`.
The cloud providers currently supported are AWS, GCP, and Azure.
Expand All @@ -513,7 +513,7 @@ export function readAvro(pathOrBody, options = {}) {
* `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
* `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
If `storage_options` is not provided, Polars will try to infer the information from environment variables.
If `cloudOptions` is not provided, Polars will try to infer the information from environment variables.
@param retries - Number of retries if accessing a cloud instance fails.
@param includeFilePaths - Include the path of the source file(s) as a column with this name.
*/
Expand Down
18 changes: 15 additions & 3 deletions polars/lazy/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import pli from "../internals/polars_internal";
import type { Series } from "../series";
import type { Deserialize, GroupByOps, Serialize } from "../shared_traits";
import type {
CsvWriterOptions,
LazyJoinOptions,
LazyOptions,
SinkCsvOptions,
SinkParquetOptions,
} from "../types";
import {
Expand Down Expand Up @@ -541,7 +541,7 @@ export interface LazyDataFrame extends Serialize, GroupByOps<LazyGroupBy> {
>>> lf.sinkCsv("out.csv")
*/

sinkCSV(path: string, options?: SinkCsvOptions): void;
sinkCSV(path: string, options?: CsvWriterOptions): void;

/***
*
Expand Down Expand Up @@ -580,6 +580,18 @@ export interface LazyDataFrame extends Serialize, GroupByOps<LazyGroupBy> {
@param simplifyExpression - Run simplify expressions optimization. Default -> true
@param slicePushdown - Slice pushdown optimization. Default -> true
@param noOptimization - Turn off (certain) optimizations. Default -> false
@param cloudOptions - Options that indicate how to connect to a cloud provider.
If the cloud provider is not supported by Polars, the storage options are passed to `fsspec.open()`.
The cloud providers currently supported are AWS, GCP, and Azure.
See supported keys here:
* `aws <https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html>`_
* `gcp <https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html>`_
* `azure <https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html>`_
If `cloudOptions` is not provided, Polars will try to infer the information from environment variables.
@param retries - Number of retries if accessing a cloud instance fails.
Examples
--------
Expand Down Expand Up @@ -1078,7 +1090,7 @@ export const _LazyDataFrame = (_ldf: any): LazyDataFrame => {
withRowCount(name = "row_nr") {
return _LazyDataFrame(_ldf.withRowCount(name));
},
sinkCSV(path, options: SinkCsvOptions = {}) {
sinkCSV(path, options: CsvWriterOptions = {}) {
options.maintainOrder = options.maintainOrder ?? false;
_ldf.sinkCsv(path, options);
},
Expand Down
4 changes: 2 additions & 2 deletions polars/lazy/functions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ export function spearmanRankCorr(a: ExprOrString, b: ExprOrString): Expr {
a = exprToLitOrExpr(a, false);
b = exprToLitOrExpr(b, false);

return _Expr(pli.spearmanRankCorr(a, b, null, false));
return _Expr(pli.spearmanRankCorr(a, b, false));
}

/** Get the last n rows of an Expression. */
Expand Down Expand Up @@ -964,7 +964,7 @@ export function sumHorizontal(exprs: ExprOrString | ExprOrString[]): Expr {

exprs = selectionToExprList(exprs);

return _Expr(pli.sumHorizontal(exprs));
return _Expr(pli.sumHorizontal(exprs, true));
}

// // export function collect_all() {}
Expand Down
29 changes: 7 additions & 22 deletions polars/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,33 +39,16 @@ export interface ConcatOptions {
how?: "vertical" | "horizontal" | "diagonal";
}
/**
* Options for {@link DataFrame.writeCSV}
* @category Options
*/
export interface WriteCsvOptions {
includeBom?: boolean;
includeHeader?: boolean;
sep?: string;
quote?: string;
lineTerminator?: string;
batchSize?: number;
datetimeFormat?: string;
dateFormat?: string;
timeFormat?: string;
floatPrecision?: number;
nullValue?: string;
}
/**
* Options for @see {@link DataFrame.writeCSV}
* Options for @see {@link LazyDataFrame.sinkCSV}
* @category Options
*/
export interface SinkCsvOptions {
includeHeader?: boolean;
quote?: string;
export interface CsvWriterOptions {
includeBom?: boolean;
includeHeader?: boolean;
separator?: string;
lineTerminator?: string;
quoteChar?: string;
lineTerminator?: string;
batchSize?: number;
datetimeFormat?: string;
dateFormat?: string;
Expand All @@ -91,6 +74,8 @@ export interface SinkParquetOptions {
simplifyExpression?: boolean;
slicePushdown?: boolean;
noOptimization?: boolean;
cloudOptions?: Map<string, string>;
retries?: number;
}
/**
* Options for {@link DataFrame.writeJSON}
Expand Down Expand Up @@ -153,7 +138,7 @@ export interface ScanParquetOptions {
rechunk?: boolean;
lowMemory?: boolean;
useStatistics?: boolean;
cloudOptions?: unknown;
cloudOptions?: Map<string, string>;
retries?: number;
includeFilePaths?: string;
allowMissingColumns?: boolean;
Expand Down
2 changes: 1 addition & 1 deletion rust-toolchain
Original file line number Diff line number Diff line change
@@ -1 +1 @@
nightly-2024-10-28
nightly-2024-11-28
Loading

0 comments on commit 2c0e091

Please sign in to comment.