Skip to content

Commit

Permalink
feat: add sqlContext (#185)
Browse files Browse the repository at this point in the history
* feat: add sqlContext

* pr feedback
  • Loading branch information
universalmind303 authored Mar 16, 2024
1 parent 137a2d9 commit 31e7741
Show file tree
Hide file tree
Showing 7 changed files with 512 additions and 67 deletions.
133 changes: 67 additions & 66 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ crate-type = ["cdylib", "lib"]
[dependencies]
ahash = "0.8.7"
bincode = "1.3.3"
napi = { version = "2.14.2", default-features = false, features = [
"napi8",
"serde-json",
napi = { version = "2.16.0", default-features = false, features = [
"napi8",
"serde-json",
] }
napi-derive = { version = "2.14.6", default-features = false }
napi-derive = { version = "2.16.0", default-features = false }
polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "3cf4897e679b056d17a235d48867035265d43cdc", default-features = false }
polars-io = { git = "https://github.com/pola-rs/polars.git", rev = "3cf4897e679b056d17a235d48867035265d43cdc", default-features = false }
polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "3cf4897e679b056d17a235d48867035265d43cdc", default-features = false }
Expand All @@ -30,68 +30,69 @@ either = "1.9"

[dependencies.polars]
features = [
"binary_encoding",
"rolling_window",
"json",
"dynamic_group_by",
"zip_with",
"simd",
"lazy",
"strings",
"temporal",
"random",
"object",
"fmt",
"performant",
"dtype-full",
"rows",
"round_series",
"is_unique",
"is_in",
"is_first_distinct",
"asof_join",
"cross_join",
"dot_product",
"concat_str",
"row_hash",
"reinterpret",
"mode",
"extract_jsonpath",
"cum_agg",
"rolling_window",
"repeat_by",
"interpolate",
"ewma",
"rank",
"propagate_nans",
"diff",
"pct_change",
"moment",
"diagonal_concat",
"abs",
"dot_diagram",
"dataframe_arithmetic",
"json",
"string_encoding",
"product",
"ndarray",
"unique_counts",
"log",
"serde-lazy",
"partition_by",
"pivot",
"semi_anti_join",
"parquet",
"to_dummies",
"ipc",
"avro",
"list_eval",
"arg_where",
"timezones",
"peaks",
"string_pad",
"cov",
"group_by_list",
"binary_encoding",
"rolling_window",
"json",
"dynamic_group_by",
"zip_with",
"simd",
"lazy",
"strings",
"temporal",
"random",
"object",
"fmt",
"performant",
"dtype-full",
"rows",
"round_series",
"is_unique",
"is_in",
"is_first_distinct",
"asof_join",
"cross_join",
"dot_product",
"concat_str",
"row_hash",
"reinterpret",
"mode",
"extract_jsonpath",
"cum_agg",
"rolling_window",
"repeat_by",
"interpolate",
"ewma",
"rank",
"propagate_nans",
"diff",
"pct_change",
"moment",
"diagonal_concat",
"abs",
"dot_diagram",
"dataframe_arithmetic",
"json",
"string_encoding",
"product",
"ndarray",
"unique_counts",
"log",
"serde-lazy",
"partition_by",
"pivot",
"semi_anti_join",
"parquet",
"to_dummies",
"ipc",
"avro",
"list_eval",
"arg_where",
"timezones",
"peaks",
"string_pad",
"cov",
"group_by_list",
"sql",
]
git = "https://github.com/pola-rs/polars.git"
rev = "3cf4897e679b056d17a235d48867035265d43cdc"
Expand Down
81 changes: 81 additions & 0 deletions __tests__/sql.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import pl from "@polars";
describe("sql", () => {
test("execute", () => {
const df = pl.DataFrame({
values: [
["aa", "bb"],
[null, "cc"],
["dd", null],
],
});

const ctx = pl.SQLContext({ df });
const actual = ctx.execute("SELECT * FROM df").collectSync();

expect(actual).toFrameEqual(df);
const actual2 = ctx.execute("SELECT * FROM df", { eager: true });
expect(actual2).toFrameEqual(df);
});

test("register and query dataframe", () => {
const df = pl.DataFrame({ hello: ["world"] });
const ctx = pl.SQLContext();
ctx.register("frame_data", df);
const actual = ctx.execute("SELECT * FROM frame_data", { eager: true });

const expected = pl.DataFrame({ hello: ["world"] });

expect(actual).toFrameEqual(expected);
ctx.register("null_frame", null);

const actual2 = ctx.execute("SELECT * FROM null_frame", { eager: true });
const expected2 = pl.DataFrame();
expect(actual2).toFrameEqual(expected2);
});
test("register many", () => {
const lf1 = pl.DataFrame({ a: [1, 2, 3], b: ["m", "n", "o"] });
const lf2 = pl.DataFrame({ a: [2, 3, 4], c: ["p", "q", "r"] });

// Register multiple DataFrames at once
const ctx = pl.SQLContext().registerMany({ tbl1: lf1, tbl2: lf2 });
const tables = ctx.tables();

expect(tables).toEqual(expect.arrayContaining(["tbl1", "tbl2"]));
});
test("inspect", () => {
const df = pl.DataFrame({
a: [1, 2, 3],
b: ["m", "n", "o"],
});

const ctx = pl.SQLContext({ df });
const actual = ctx[Symbol.for("nodejs.util.inspect.custom")]();

const expected = "SQLContext: {df}";

expect(actual).toEqual(expected);
});
test("constructor with LazyFrames", () => {
const lf1 = pl.DataFrame({ a: [1, 2, 3], b: ["m", "n", "o"] }).lazy();
const lf2 = pl.DataFrame({ a: [2, 3, 4], c: ["p", "q", "r"] }).lazy();

const ctx = pl.SQLContext({ tbl1: lf1, tbl2: lf2 });
const tables = ctx.tables();
expect(tables).toEqual(expect.arrayContaining(["tbl1", "tbl2"]));
});
test("unregister", () => {
const df = pl.DataFrame({ hello: ["world"] });
const df2 = pl.DataFrame({ hello: ["world"] });
const df3 = pl.DataFrame({ hello: ["world"] });
const ctx = pl.SQLContext({ df, df2, df3 });

ctx.unregister("df");

const tables = ctx.tables();
expect(tables).toEqual(["df2", "df3"]);

ctx.unregister(["df2", "df3"]);
const tables2 = ctx.tables();
expect(tables2).toEqual([]);
});
});
16 changes: 16 additions & 0 deletions polars/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ export * from "./lazy/dataframe";
export * from "./lazy";
import * as lazy from "./lazy";
export * from "./types";
import * as sql from "./sql";
export type { SQLContext } from "./sql";

export type { GroupBy } from "./groupby";
export namespace pl {
export import Expr = lazy.Expr;
Expand Down Expand Up @@ -109,6 +112,19 @@ export namespace pl {
export import list = lazy.list;
export import when = lazy.when;
export const version = pli.version();

/**
* Run SQL queries against DataFrame/LazyFrame data.
*
* @warning This functionality is considered **unstable**, although it is close to being
* considered stable. It may be changed at any point without it being considered
* a breaking change.
*/
export function SQLContext(
frames?: Record<string, DataFrame | LazyDataFrame>,
): sql.SQLContext {
return new sql.SQLContext(frames);
}
}
// eslint-disable-next-line no-undef
export default pl;
Loading

0 comments on commit 31e7741

Please sign in to comment.