Skip to content

Commit

Permalink
tests
Browse files Browse the repository at this point in the history
  • Loading branch information
andygrove committed Dec 14, 2024
1 parent 4e60563 commit ebe403a
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/query_stage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::context::serialize_execution_plan;
use crate::shuffle::{ShuffleCodec, ShuffleReaderExec};
use crate::shuffle::{ShuffleCodec, ShuffleReaderExec, ShuffleWriterExec};
use datafusion::error::Result;
use datafusion::physical_plan::{ExecutionPlan, ExecutionPlanProperties, Partitioning};
use datafusion::prelude::SessionContext;
Expand Down Expand Up @@ -99,7 +99,14 @@ impl QueryStage {
/// Get the input partition count. This is the same as the number of concurrent tasks
/// when we schedule this query stage for execution
pub fn get_input_partition_count(&self) -> usize {
self.plan.output_partitioning().partition_count()
self.plan.children()[0].output_partitioning().partition_count()
if self.plan.as_any().is::<ShuffleWriterExec>() {
// most query stages represent a shuffle write
self.plan.children()[0].output_partitioning().partition_count()
} else {
// probably the final query stage
self.plan.output_partitioning().partition_count()
}
}

pub fn get_output_partition_count(&self) -> usize {
Expand Down
12 changes: 12 additions & 0 deletions tests/test_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,21 @@ def test_basic_query_succeed():
df_ctx = SessionContext()
ctx = DatafusionRayContext(df_ctx)
df_ctx.register_csv("tips", "examples/tips.csv", has_header=True)
# TODO why does this return a single batch and not a list of batches?
record_batch = ctx.sql("SELECT * FROM tips")
assert record_batch.num_rows == 244

def test_aggregate():
df_ctx = SessionContext()
ctx = DatafusionRayContext(df_ctx)
df_ctx.register_csv("tips", "examples/tips.csv", has_header=True)
record_batches = ctx.sql("select sex, smoker, avg(tip/total_bill) as tip_pct from tips group by sex, smoker")
assert isinstance(record_batches, list)
# TODO why does this return many empty batches?
num_rows = 0
for record_batch in record_batches:
num_rows += record_batch.num_rows
assert num_rows == 4

def test_no_result_query():
df_ctx = SessionContext()
Expand Down

0 comments on commit ebe403a

Please sign in to comment.