Skip to content

Commit

Permalink
Setting inferSchemaLength = 0 when null (#285)
Browse files Browse the repository at this point in the history
Setting inferSchemaLength = 0 when null to close #279
  • Loading branch information
Bidek56 authored Oct 16, 2024
1 parent 64a3d63 commit 8ac81c6
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 4 deletions.
10 changes: 8 additions & 2 deletions __tests__/io.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ describe("read:csv", () => {
expect(df.shape).toEqual({ height: 27, width: 4 });
});
it("can read from a csv file with inferSchemaLength = 0 option", () => {
const df = pl.readCSV(csvpath, { inferSchemaLength: 0 });
let df = pl.readCSV(csvpath, { inferSchemaLength: 0 });
const expected = `shape: (1, 4)
┌────────────┬──────────┬────────┬──────────┐
│ category ┆ calories ┆ fats_g ┆ sugars_g │
Expand All @@ -34,6 +34,8 @@ describe("read:csv", () => {
│ vegetables ┆ 45 ┆ 0.5 ┆ 2 │
└────────────┴──────────┴────────┴──────────┘`;
expect(df.head(1).toString()).toEqual(expected);
df = pl.readCSV(csvpath, { inferSchemaLength: null });
expect(df.head(1).toString()).toEqual(expected);
});
it("can read from a csv file with options", () => {
const df = pl.readCSV(csvpath, { hasHeader: false, skipRows: 1, nRows: 4 });
Expand Down Expand Up @@ -154,7 +156,11 @@ describe("read:json", () => {
expect(df.shape).toEqual({ height: 27, width: 4 });
});
it("can specify read options", () => {
const df = pl.readJSON(jsonpath, { batchSize: 10, inferSchemaLength: 100 });
let df = pl.readJSON(jsonpath, { batchSize: 10, inferSchemaLength: 100 });
expect(df.shape).toEqual({ height: 27, width: 4 });
df = pl.readJSON(jsonpath, { batchSize: 10, inferSchemaLength: null });
expect(df.shape).toEqual({ height: 27, width: 4 });
df = pl.readJSON(jsonpath, { batchSize: 10, inferSchemaLength: 0 });
expect(df.shape).toEqual({ height: 27, width: 4 });
});
it("can read from a json buffer", () => {
Expand Down
11 changes: 11 additions & 0 deletions polars/io.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ export function readCSV(pathOrBody, options?) {
options = { ...readCsvDefaultOptions, ...options };
const extensions = [".tsv", ".csv"];

// Handle If set to `null` case
options.inferSchemaLength = options.inferSchemaLength ?? 0;

if (Buffer.isBuffer(pathOrBody)) {
return _DataFrame(pli.readCsv(pathOrBody, options));
}
Expand Down Expand Up @@ -275,6 +278,8 @@ export function scanCSV(
export function scanCSV(path, options?) {
options = { ...scanCsvDefaultOptions, ...options };

// Handle If set to `null` case
options.inferSchemaLength = options.inferSchemaLength ?? 0;
return _LazyDataFrame(pli.scanCsv(path, options));
}
/**
Expand Down Expand Up @@ -320,6 +325,10 @@ export function readJSON(
options = { ...readJsonDefaultOptions, ...options };
const method = options.format === "lines" ? pli.readJsonLines : pli.readJson;
const extensions = [".ndjson", ".json", ".jsonl"];

// Handle If set to `null` case
options.inferSchemaLength = options.inferSchemaLength ?? 0;

if (Buffer.isBuffer(pathOrBody)) {
return _DataFrame(pli.readJson(pathOrBody, options));
}
Expand Down Expand Up @@ -382,6 +391,8 @@ export function scanJson(
export function scanJson(path: string, options?: Partial<ScanJsonOptions>) {
options = { ...readJsonDefaultOptions, ...options };

// Handle If set to `null` case
options.inferSchemaLength = options.inferSchemaLength ?? 0;
return _LazyDataFrame(pli.scanJson(path, options));
}

Expand Down
2 changes: 1 addition & 1 deletion polars/lazy/expr/string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ export interface StringNamespace extends StringFunctions<Expr> {
* @see https://goessner.net/articles/JsonPath/
* @param jsonPath - A valid JSON path query string
* @param dtype - The dtype to cast the extracted value to. If None, the dtype will be inferred from the JSON value.
* @param inferSchemaLength - How many rows to parse to determine the schema. If ``None`` all rows are used.
* @param inferSchemaLength - How many rows to parse to determine the schema. If `null` all rows are used.
* @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing.
* @example
* ```
Expand Down
2 changes: 1 addition & 1 deletion polars/series/string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ export interface StringNamespace extends StringFunctions<Series> {
* @see https://goessner.net/articles/JsonPath/
* @param jsonPath - A valid JSON path query string
* @param dtype - The dtype to cast the extracted value to. If None, the dtype will be inferred from the JSON value.
* @param inferSchemaLength - How many rows to parse to determine the schema. If ``None`` all rows are used.
* @param inferSchemaLength - How many rows to parse to determine the schema. If ``null`` all rows are used.
* @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing.
* @example
* ```
Expand Down

0 comments on commit 8ac81c6

Please sign in to comment.