Skip to content

Commit

Permalink
create built in way to visualize dataframe in Jupyter (#132)
Browse files Browse the repository at this point in the history
  • Loading branch information
rgbkrk authored Oct 12, 2023
1 parent eb21fb5 commit 0387b7d
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 11 deletions.
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,43 @@ Releases happen quite often (weekly / every few days) at the moment, so updating
- Node version `>=18`
- Rust version `>=1.59` - *Only needed for development*


## Deno

In Deno modules you can import polars straight from `npm`:

```typescript
import pl from "npm:nodejs-polars";
```

With Deno 1.37, you can use the `display` function to display a `DataFrame` in the notebook:

```typescript
import pl from "npm:nodejs-polars";
import { display } from "https://deno.land/x/[email protected]/mod.ts";

let response = await fetch(
"https://cdn.jsdelivr.net/npm/world-atlas@1/world/110m.tsv",
);
let data = await response.text();
let df = pl.readCSV(data, { sep: "\t" });
await display(df)
```

With Deno 1.38, you only have to make the dataframe be the last expression in the cell:

```typescript
import pl from "npm:nodejs-polars";
let response = await fetch(
"https://cdn.jsdelivr.net/npm/world-atlas@1/world/110m.tsv",
);
let data = await response.text();
let df = pl.readCSV(data, { sep: "\t" });
df
```

<img width="510" alt="image" src="https://github.com/pola-rs/nodejs-polars/assets/836375/90cf7bf4-7478-4919-b297-f8eb6a16196f">

___

## Documentation
Expand Down
27 changes: 27 additions & 0 deletions __tests__/dataframe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2250,6 +2250,33 @@ describe("meta", () => {
});
});

test("Jupyter.display", () => {
const df = pl.DataFrame({
os: ["apple", "linux"],
version: [10.12, 18.04],
});
expect(Symbol.for("Jupyter.display") in df).toBe(true);

const actual = df[Symbol.for("Jupyter.display")]();

expect(actual).toBeInstanceOf(Object);

const dataResource = actual["application/vnd.dataresource+json"];
expect(dataResource).toBeInstanceOf(Object);
expect(dataResource).toHaveProperty("schema");
expect(dataResource).toHaveProperty("data");
expect(dataResource).toHaveProperty("data", [
{ os: "apple", version: 10.12 },
{ os: "linux", version: 18.04 },
]);

const html = actual["text/html"];
expect(html).toContain("apple");
expect(html).toContain("linux");
expect(html).toContain("10.12");
expect(html).toContain("18.04");
});

describe("additional", () => {
test("partitionBy", () => {
const df = pl.DataFrame({
Expand Down
128 changes: 117 additions & 11 deletions polars/dataframe.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import pli from "./internals/polars_internal";
import { arrayToJsDataFrame } from "./internals/construction";
import { DynamicGroupBy, _GroupBy, GroupBy, RollingGroupBy } from "./groupby";
import { LazyDataFrame, _LazyDataFrame } from "./lazy/dataframe";
import { _GroupBy, DynamicGroupBy, GroupBy, RollingGroupBy } from "./groupby";
import { _LazyDataFrame, LazyDataFrame } from "./lazy/dataframe";
import { concat } from "./functions";
import { Expr } from "./lazy/expr";
import { Series, _Series } from "./series";
import { _Series, Series } from "./series";
import { Stream, Writable } from "stream";
import {
FillNullStrategy,
JoinOptions,
WriteAvroOptions,
WriteCsvOptions,
WriteIPCOptions,
WriteParquetOptions,
WriteAvroOptions,
FillNullStrategy,
JoinOptions,
} from "./types";

import { DataType } from "./datatypes";
Expand All @@ -21,10 +21,10 @@ import {
columnOrColumns,
columnOrColumnsStrict,
ColumnSelection,
isSeriesArray,
ColumnsOrExpr,
ValueOrArray,
ExprOrString,
isSeriesArray,
ValueOrArray,
} from "./utils";

import {
Expand All @@ -35,9 +35,12 @@ import {
Serialize,
} from "./shared_traits";

import { escapeHTML } from "./html";

import { col, element } from "./lazy/functions";

const inspect = Symbol.for("nodejs.util.inspect.custom");
const jupyterDisplay = Symbol.for("Jupyter.display");

/**
* Write methods for DataFrame
Expand Down Expand Up @@ -98,7 +101,6 @@ interface WriteMethods {
* ... bar: ['a','b','c']
* ... })
*
*
* > df.writeJSON({format:"json"})
* `[ {"foo":1.0,"bar":"a"}, {"foo":2.0,"bar":"b"}, {"foo":3.0,"bar":"c"}]`
*
Expand Down Expand Up @@ -984,7 +986,6 @@ export interface DataFrame
mapFn: (df: DataFrame) => T,
): T[];
/**
*
* Create a spreadsheet-style pivot table as a DataFrame.
*
* @param values Column values to aggregate. Can be multiple columns if the *columns* arguments contains multiple columns as well
Expand Down Expand Up @@ -1363,7 +1364,6 @@ export interface DataFrame
sum(axis: 1): Series;
sum(axis: 1, nullStrategy?: "ignore" | "propagate"): Series;
/**
*
* @example
* ```
* > df = pl.DataFrame({
Expand Down Expand Up @@ -1437,6 +1437,16 @@ export interface DataFrame
*/
toJSON(): string;

/**
* Converts dataframe object into a {@link TabularDataResource}
*/
toDataResource(): TabularDataResource;

/**
* Converts dataframe object into HTML
*/
toHTML(): string;

/**
* Converts dataframe object into column oriented javascript objects
* @example
Expand Down Expand Up @@ -1707,6 +1717,47 @@ function map(df: DataFrame, fn: (...args: any[]) => any[]) {
return df.rows().map(fn);
}

type DataResourceField = {
name: string;
type: string;
};

/**
* Tabular Data Resource from https://specs.frictionlessdata.io/schemas/tabular-data-resource.json,
*/
type TabularDataResource = {
data: any[];
schema: {
fields: DataResourceField[];
};
};

function mapPolarsTypeToJSONSchema(colType: DataType): string {
const typeMapping: { [key: string]: string } = {
Null: "null",
Bool: "boolean",
Int8: "integer",
Int16: "integer",
Int32: "integer",
Int64: "integer",
UInt8: "integer",
UInt16: "integer",
UInt32: "integer",
UInt64: "integer",
Float32: "number",
Float64: "number",
Date: "string",
Datetime: "string",
Utf8: "string",
Categorical: "string",
List: "array",
Struct: "object",
};

const dataType = colType.variant;
return typeMapping[dataType] || "string";
}

/**
* @ignore
*/
Expand Down Expand Up @@ -1755,6 +1806,25 @@ export const _DataFrame = (_df: any): DataFrame => {
set columns(names) {
_df.columns = names;
},
/**
* Return back text/html and application/vnd.dataresource+json representations
* of the DataFrame. This is intended to be a simple view of the DataFrame
* inside of notebooks.
*
* @returns Media bundle / mimetype keys for Jupyter frontends
*/
[jupyterDisplay]() {
let rows = 50;
if (process.env.POLARS_FMT_MAX_ROWS) {
rows = parseInt(process.env.POLARS_FMT_MAX_ROWS);
}

const limited = this.limit(rows);
return {
"application/vnd.dataresource+json": limited.toDataResource(),
"text/html": limited.toHTML(),
};
},
get schema() {
return this.getColumns().reduce((acc, curr) => {
acc[curr.name] = curr.dtype;
Expand Down Expand Up @@ -2180,6 +2250,39 @@ export const _DataFrame = (_df: any): DataFrame => {

return _df.serialize("json").toString();
},
toHTML(): string {
let htmlTable = "<table>";

// Add table headers
htmlTable += "<thead><tr>";
this.getColumns().forEach((field) => {
htmlTable += `<th>${escapeHTML(field.name)}</th>`;
});
htmlTable += "</tr></thead>";

// Add table data
htmlTable += "<tbody>";
this.toRecords().forEach((row) => {
htmlTable += "<tr>";
this.getColumns().forEach((field) => {
htmlTable += `<td>${escapeHTML(String(row[field.name]))}</td>`;
});
htmlTable += "</tr>";
});
htmlTable += "</tbody></table>";

return htmlTable;
},
toDataResource(): TabularDataResource {
const data = this.toRecords();

const fields = this.getColumns().map((column) => ({
name: column.name,
type: mapPolarsTypeToJSONSchema(column.dtype),
}));

return { data, schema: { fields } };
},
toObject() {
return this.getColumns().reduce((acc, curr) => {
acc[curr.name] = curr.toArray();
Expand Down Expand Up @@ -2379,6 +2482,9 @@ export const _DataFrame = (_df: any): DataFrame => {
return true;
},
has(target, p) {
if (p === jupyterDisplay) {
return true;
}
return target.columns.includes(p as any);
},
ownKeys(target) {
Expand Down
18 changes: 18 additions & 0 deletions polars/html.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
const rawToEntityEntries = [
["&", "&amp;"],
["<", "&lt;"],
[">", "&gt;"],
['"', "&quot;"],
["'", "&#39;"],
] as const;

const rawToEntity = new Map<string, string>(rawToEntityEntries);

const rawRe = new RegExp(`[${[...rawToEntity.keys()].join("")}]`, "g");

/**
* Escapes text for safe interpolation into HTML text content and quoted attributes
*/
export function escapeHTML(str: string) {
return str.replaceAll(rawRe, (m) => rawToEntity.get(m) ?? m);
}

0 comments on commit 0387b7d

Please sign in to comment.