Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use len for polars #83

Merged
merged 5 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions _report/report.R
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,6 @@ time_logs = function(path=getwd()) {
lt <- load_time(path=getwd())

ct = clean_time(lt)
# https://github.com/pola-rs/polars/issues/16937
ct = ct %>% filter(!(solution == 'polars' & question == 'sum v3 count by id1:id6'))
# remove duckdb-latest for now
ct = ct %>% filter(!(solution == 'duckdb-latest'))
d = model_time(ct)
Expand Down
2 changes: 1 addition & 1 deletion polars/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.19.11
0.20.31
4 changes: 2 additions & 2 deletions polars/groupby-polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@
question = "sum v3 count by id1:id6" # q10
gc.collect()
t_start = timeit.default_timer()
ans = x.group_by(["id1","id2","id3","id4","id5","id6"]).agg([pl.sum("v3").alias("v3"), pl.count("v1").alias("count")]).collect()
ans = x.group_by(["id1","id2","id3","id4","id5","id6"]).agg([pl.sum("v3").alias("v3"), pl.len().alias("count")]).collect()
print(ans.shape, flush=True)
t = timeit.default_timer() - t_start
m = memory_usage()
Expand All @@ -288,7 +288,7 @@
del ans
gc.collect()
t_start = timeit.default_timer()
ans = x.group_by(["id1","id2","id3","id4","id5","id6"]).agg([pl.sum("v3").alias("v3"), pl.count("v1").alias("count")]).collect()
ans = x.group_by(["id1","id2","id3","id4","id5","id6"]).agg([pl.sum("v3").alias("v3"), pl.len().alias("count")]).collect()
print(ans.shape, flush=True)
t = timeit.default_timer() - t_start
m = memory_usage()
Expand Down
6 changes: 0 additions & 6 deletions time.csv
Original file line number Diff line number Diff line change
Expand Up @@ -10669,8 +10669,6 @@ ip-172-31-31-147,1717788597,1718048977.2284322,groupby,G1_1e7_1e2_5_0,10000000,l
ip-172-31-31-147,1717788597,1718048977.7352843,groupby,G1_1e7_1e2_5_0,10000000,largest two v3 by id6,190002,2,polars,0.20.31,,.groupby,2,0.5,1.83,TRUE,18700554.78,0.0,,FALSE
ip-172-31-31-147,1717788597,1718048978.163413,groupby,G1_1e7_1e2_5_0,10000000,regression v1 v2 by id2 id4,9216,3,polars,0.20.31,,.groupby,1,0.421,1.816,TRUE,9.941,0.0,,FALSE
ip-172-31-31-147,1717788597,1718048978.7440386,groupby,G1_1e7_1e2_5_0,10000000,regression v1 v2 by id2 id4,9216,3,polars,0.20.31,,.groupby,2,0.574,1.779,TRUE,9.941,0.0,,FALSE
ip-172-31-31-147,1717788597,1718048979.338154,groupby,G1_1e7_1e2_5_0,10000000,sum v3 count by id1:id6,9999993,8,polars,0.20.31,,.groupby,1,0.532,2.521,TRUE,474969574.048;9500000.0,0.055,,FALSE
ip-172-31-31-147,1717788597,1718048979.9407501,groupby,G1_1e7_1e2_5_0,10000000,sum v3 count by id1:id6,9999993,8,polars,0.20.31,,.groupby,2,0.518,2.572,TRUE,474969574.048;9500000.0,0.052,,FALSE
ip-172-31-31-147,1717788597,1718049045.2117703,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1,100,2,polars,0.20.31,,.groupby,1,0.065,4.732,TRUE,299991302,0.0,,FALSE
ip-172-31-31-147,1717788597,1718049045.266078,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1,100,2,polars,0.20.31,,.groupby,2,0.05,4.589,TRUE,299991302,0.0,,FALSE
ip-172-31-31-147,1717788597,1718049046.9703794,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1:id2,10000,3,polars,0.20.31,,.groupby,1,1.7,5.417,TRUE,299991302,0.0,,FALSE
Expand Down Expand Up @@ -10769,8 +10767,6 @@ ip-172-31-31-147,1717788597,1718049971.9428356,groupby,G1_1e8_1e2_5_0,100000000,
ip-172-31-31-147,1717788597,1718049977.8404155,groupby,G1_1e8_1e2_5_0,100000000,largest two v3 by id6,1900002,2,polars,0.20.31,,.groupby,2,5.888,4.634,TRUE,186996833.999,0.002,,FALSE
ip-172-31-31-147,1717788597,1718049983.1203926,groupby,G1_1e8_1e2_5_0,100000000,regression v1 v2 by id2 id4,9216,3,polars,0.20.31,,.groupby,1,5.268,5.976,TRUE,1.029,0.0,,FALSE
ip-172-31-31-147,1717788597,1718049988.6490858,groupby,G1_1e8_1e2_5_0,100000000,regression v1 v2 by id2 id4,9216,3,polars,0.20.31,,.groupby,2,5.521,5.941,TRUE,1.029,0.0,,FALSE
ip-172-31-31-147,1717788597,1718049996.1272922,groupby,G1_1e8_1e2_5_0,100000000,sum v3 count by id1:id6,99999338,8,polars,0.20.31,,.groupby,1,6.931,12.352,TRUE,4750083909.4;95000000.0,0.54,,FALSE
ip-172-31-31-147,1717788597,1718050003.5831513,groupby,G1_1e8_1e2_5_0,100000000,sum v3 count by id1:id6,99999338,8,polars,0.20.31,,.groupby,2,6.683,11.868,TRUE,4750083909.4;95000000.0,0.589,,FALSE
ip-172-31-31-147,1717788597,1718050535.2330437,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1,100,2,polars,0.20.31,,.groupby,1,0.875,12.54,TRUE,2999924714,0.0,,FALSE
ip-172-31-31-147,1717788597,1718050535.8921537,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1,100,2,polars,0.20.31,,.groupby,2,0.655,13.182,TRUE,2999924714,0.0,,FALSE
ip-172-31-31-147,1717788597,1718050556.9025226,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1:id2,10000,3,polars,0.20.31,,.groupby,1,21.005,16.078,TRUE,2999924714,0.0,,FALSE
Expand Down Expand Up @@ -10849,8 +10845,6 @@ ip-172-31-31-147,1717788597,1718058836.9101326,groupby,G1_1e9_1e2_5_0,1000000000
ip-172-31-31-147,1717788597,1718058905.6577113,groupby,G1_1e9_1e2_5_0,1000000000,largest two v3 by id6,19000002,2,polars,0.20.31,,.groupby,2,68.713,44.959,TRUE,1870003947.143,0.012,,FALSE
ip-172-31-31-147,1717788597,1718058981.064196,groupby,G1_1e9_1e2_5_0,1000000000,regression v1 v2 by id2 id4,9216,3,polars,0.20.31,,.groupby,1,75.384,44.352,TRUE,0.099,0.0,,FALSE
ip-172-31-31-147,1717788597,1718059134.2026863,groupby,G1_1e9_1e2_5_0,1000000000,regression v1 v2 by id2 id4,9216,3,polars,0.20.31,,.groupby,2,153.129,43.601,TRUE,0.099,0.0,,FALSE
ip-172-31-31-147,1717788597,1718059426.282781,groupby,G1_1e9_1e2_5_0,1000000000,sum v3 count by id1:id6,999939563,8,polars,0.20.31,,.groupby,1,286.626,85.045,TRUE,47498842805.649;950000000.0,5.444,,FALSE
ip-172-31-31-147,1717788597,1718059618.637678,groupby,G1_1e9_1e2_5_0,1000000000,sum v3 count by id1:id6,999939563,8,polars,0.20.31,,.groupby,2,182.781,94.205,TRUE,47498842805.649;950000000.0,6.831,,FALSE
ip-172-31-31-147,1717788597,1718060293.4996655,join,J1_1e7_NA_0_0,10000000,small inner on int,8998860,9,polars,0.20.31,,.join,1,0.112,3.889,TRUE,450015153.577;347720187.395,0.013,,FALSE
ip-172-31-31-147,1717788597,1718060293.635809,join,J1_1e7_NA_0_0,10000000,small inner on int,8998860,9,polars,0.20.31,,.join,2,0.097,3.894,TRUE,450015153.577;347720187.395,0.01,,FALSE
ip-172-31-31-147,1717788597,1718060293.7767437,join,J1_1e7_NA_0_0,10000000,medium inner on int,8998412,11,polars,0.20.31,,.join,1,0.099,3.963,TRUE,449954076.026;449999844.937,0.015,,FALSE
Expand Down
Loading