Skip to content

Commit

Permalink
refactor: Deprecate how="outer" join type in favour of more correct…
Browse files Browse the repository at this point in the history
… `how="full"` (left/right are *also* outer joins)
  • Loading branch information
alexander-beedie committed May 22, 2024
1 parent ddaf393 commit 2d51973
Show file tree
Hide file tree
Showing 37 changed files with 271 additions and 237 deletions.
12 changes: 6 additions & 6 deletions crates/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1072,7 +1072,7 @@ impl LazyFrame {
self.join(other, vec![], vec![], JoinArgs::new(JoinType::Cross))
}

/// Left join this query with another lazy query.
/// Left outer join this query with another lazy query.
///
/// Matches on the values of the expressions `left_on` and `right_on`. For more
/// flexible join logic, see [`join`](LazyFrame::join) or
Expand Down Expand Up @@ -1122,7 +1122,7 @@ impl LazyFrame {
)
}

/// Outer join this query with another lazy query.
/// Full outer join this query with another lazy query.
///
/// Matches on the values of the expressions `left_on` and `right_on`. For more
/// flexible join logic, see [`join`](LazyFrame::join) or
Expand All @@ -1133,17 +1133,17 @@ impl LazyFrame {
/// ```rust
/// use polars_core::prelude::*;
/// use polars_lazy::prelude::*;
/// fn outer_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
/// fn full_join_dataframes(ldf: LazyFrame, other: LazyFrame) -> LazyFrame {
/// ldf
/// .outer_join(other, col("foo"), col("bar"))
/// .full_join(other, col("foo"), col("bar"))
/// }
/// ```
pub fn outer_join<E: Into<Expr>>(self, other: LazyFrame, left_on: E, right_on: E) -> LazyFrame {
pub fn full_join<E: Into<Expr>>(self, other: LazyFrame, left_on: E, right_on: E) -> LazyFrame {
self.join(
other,
[left_on.into()],
[right_on.into()],
JoinArgs::new(JoinType::Outer),
JoinArgs::new(JoinType::Full),
)
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/physical_plan/streaming/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub(super) fn streamable_join(args: &JoinArgs) -> bool {
JoinCoalesce::JoinSpecific | JoinCoalesce::CoalesceColumns
)
},
JoinType::Outer { .. } => true,
JoinType::Full { .. } => true,
_ => false,
};
supported && !args.validation.needs_checks()
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-lazy/src/tests/streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ fn test_streaming_partial() -> PolarsResult<()> {
.left_on([col("a")])
.right_on([col("a")])
.suffix("_foo")
.how(JoinType::Outer)
.how(JoinType::Full)
.coalesce(JoinCoalesce::CoalesceColumns)
.finish();

Expand Down Expand Up @@ -400,7 +400,7 @@ fn test_sort_maintain_order_streaming() -> PolarsResult<()> {
}

#[test]
fn test_streaming_outer_join() -> PolarsResult<()> {
fn test_streaming_full_outer_join() -> PolarsResult<()> {
let lf_left = df![
"a"=> [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
"b"=> [0, 0, 0, 3, 0, 1, 3, 3, 3, 1, 4, 4, 2, 1, 1, 3, 1, 4, 2, 2],
Expand All @@ -414,7 +414,7 @@ fn test_streaming_outer_join() -> PolarsResult<()> {
.lazy();

let q = lf_left
.outer_join(lf_right, col("a"), col("a"))
.full_join(lf_right, col("a"), col("a"))
.sort_by_exprs([all()], SortMultipleOptions::default());

// Toggle so that the join order is swapped.
Expand Down
10 changes: 5 additions & 5 deletions crates/polars-ops/src/frame/join/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl JoinCoalesce {
Left | Inner => {
matches!(self, JoinSpecific | CoalesceColumns)
},
Outer { .. } => {
Full { .. } => {
matches!(self, CoalesceColumns)
},
#[cfg(feature = "asof_join")]
Expand Down Expand Up @@ -96,9 +96,9 @@ impl JoinArgs {
#[derive(Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum JoinType {
Left,
Inner,
Outer,
Left,
Full,
#[cfg(feature = "asof_join")]
AsOf(AsOfOptions),
Cross,
Expand All @@ -120,7 +120,7 @@ impl Display for JoinType {
let val = match self {
Left => "LEFT",
Inner => "INNER",
Outer { .. } => "OUTER",
Full { .. } => "FULL",
#[cfg(feature = "asof_join")]
AsOf(_) => "ASOF",
Cross => "CROSS",
Expand Down Expand Up @@ -189,7 +189,7 @@ impl JoinValidation {
if !self.needs_checks() {
return Ok(());
}
polars_ensure!(matches!(join_type, JoinType::Inner | JoinType::Outer{..} | JoinType::Left),
polars_ensure!(matches!(join_type, JoinType::Inner | JoinType::Full{..} | JoinType::Left),
ComputeError: "{self} validation on a {join_type} join is not supported");
Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/frame/join/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub fn _finish_join(
Ok(df_left)
}

pub fn _coalesce_outer_join(
pub fn _coalesce_full_join(
mut df: DataFrame,
keys_left: &[&str],
keys_right: &[&str],
Expand Down
6 changes: 3 additions & 3 deletions crates/polars-ops/src/frame/join/hash_join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ pub trait JoinDispatch: IntoDf {
// indices are in bounds
Ok(unsafe { ca_self._finish_anti_semi_join(&idx, slice) })
}
fn _outer_join_from_series(
fn _full_join_from_series(
&self,
other: &DataFrame,
s_left: &Series,
Expand Down Expand Up @@ -271,10 +271,10 @@ pub trait JoinDispatch: IntoDf {
|| unsafe { other.take_unchecked(&idx_ca_r) },
);

let coalesce = args.coalesce.coalesce(&JoinType::Outer);
let coalesce = args.coalesce.coalesce(&JoinType::Full);
let out = _finish_join(df_left, df_right, args.suffix.as_deref());
if coalesce {
Ok(_coalesce_outer_join(
Ok(_coalesce_full_join(
out?,
&[s_left.name()],
&[s_right.name()],
Expand Down
25 changes: 10 additions & 15 deletions crates/polars-ops/src/frame/join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub use cross_join::CrossJoin;
use either::Either;
#[cfg(feature = "chunked_ids")]
use general::create_chunked_index_mapping;
pub use general::{_coalesce_outer_join, _finish_join, _join_suffix_name};
pub use general::{_coalesce_full_join, _finish_join, _join_suffix_name};
pub use hash_join::*;
use hashbrown::hash_map::{Entry, RawEntryMut};
#[cfg(feature = "merge_sorted")]
Expand Down Expand Up @@ -199,7 +199,7 @@ pub trait DataFrameJoinOps: IntoDf {
._inner_join_from_series(other, s_left, s_right, args, _verbose, drop_names),
JoinType::Left => left_df
._left_join_from_series(other, s_left, s_right, args, _verbose, drop_names),
JoinType::Outer => left_df._outer_join_from_series(other, s_left, s_right, args),
JoinType::Full => left_df._full_join_from_series(other, s_left, s_right, args),
#[cfg(feature = "semi_anti_join")]
JoinType::Anti => left_df._semi_anti_join_from_series(
s_left,
Expand Down Expand Up @@ -271,14 +271,14 @@ pub trait DataFrameJoinOps: IntoDf {
JoinType::Cross => {
unreachable!()
},
JoinType::Outer => {
JoinType::Full => {
let names_left = selected_left.iter().map(|s| s.name()).collect::<Vec<_>>();
args.coalesce = JoinCoalesce::KeepColumns;
let suffix = args.suffix.clone();
let out = left_df._outer_join_from_series(other, &lhs_keys, &rhs_keys, args);
let out = left_df._full_join_from_series(other, &lhs_keys, &rhs_keys, args);

if should_coalesce {
Ok(_coalesce_outer_join(
Ok(_coalesce_full_join(
out?,
&names_left,
drop_names.as_ref().unwrap(),
Expand Down Expand Up @@ -341,7 +341,7 @@ pub trait DataFrameJoinOps: IntoDf {
self.join(other, left_on, right_on, JoinArgs::new(JoinType::Inner))
}

/// Perform a left join on two DataFrames
/// Perform a left outer join on two DataFrames
/// # Example
///
/// ```no_run
Expand Down Expand Up @@ -384,27 +384,22 @@ pub trait DataFrameJoinOps: IntoDf {
self.join(other, left_on, right_on, JoinArgs::new(JoinType::Left))
}

/// Perform an outer join on two DataFrames
/// Perform a full outer join on two DataFrames
/// # Example
///
/// ```
/// # use polars_core::prelude::*;
/// # use polars_ops::prelude::*;
/// fn join_dfs(left: &DataFrame, right: &DataFrame) -> PolarsResult<DataFrame> {
/// left.outer_join(right, ["join_column_left"], ["join_column_right"])
/// left.full_join(right, ["join_column_left"], ["join_column_right"])
/// }
/// ```
fn outer_join<I, S>(
&self,
other: &DataFrame,
left_on: I,
right_on: I,
) -> PolarsResult<DataFrame>
fn full_join<I, S>(&self, other: &DataFrame, left_on: I, right_on: I) -> PolarsResult<DataFrame>
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
self.join(other, left_on, right_on, JoinArgs::new(JoinType::Outer))
self.join(other, left_on, right_on, JoinArgs::new(JoinType::Full))
}
}

Expand Down
8 changes: 4 additions & 4 deletions crates/polars-pipe/src/executors/sinks/joins/generic_build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use smartstring::alias::String as SmartString;
use super::*;
use crate::executors::operators::PlaceHolder;
use crate::executors::sinks::joins::generic_probe_inner_left::GenericJoinProbe;
use crate::executors::sinks::joins::generic_probe_outer::GenericOuterJoinProbe;
use crate::executors::sinks::joins::generic_probe_outer::GenericFullOuterJoinProbe;
use crate::executors::sinks::utils::{hash_rows, load_vec};
use crate::executors::sinks::HASHMAP_INIT_SIZE;
use crate::expressions::PhysicalPipedExpr;
Expand Down Expand Up @@ -337,9 +337,9 @@ impl<K: ExtraPayload> Sink for GenericBuild<K> {
self.placeholder.replace(Box::new(probe_operator));
Ok(FinalizedSink::Operator)
},
JoinType::Outer => {
let coalesce = self.join_args.coalesce.coalesce(&JoinType::Outer);
let probe_operator = GenericOuterJoinProbe::new(
JoinType::Full => {
let coalesce = self.join_args.coalesce.coalesce(&JoinType::Full);
let probe_operator = GenericFullOuterJoinProbe::new(
left_df,
materialized_join_cols,
suffix,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use polars_core::prelude::*;
use polars_core::series::IsSorted;
use polars_ops::chunked_array::DfTake;
use polars_ops::frame::join::_finish_join;
use polars_ops::prelude::_coalesce_outer_join;
use polars_ops::prelude::_coalesce_full_join;
use smartstring::alias::String as SmartString;

use crate::executors::sinks::joins::generic_build::*;
Expand All @@ -18,7 +18,7 @@ use crate::expressions::PhysicalPipedExpr;
use crate::operators::{DataChunk, Operator, OperatorResult, PExecutionContext};

#[derive(Clone)]
pub struct GenericOuterJoinProbe<K: ExtraPayload> {
pub struct GenericFullOuterJoinProbe<K: ExtraPayload> {
/// all chunks are stacked into a single dataframe
/// the dataframe is not rechunked.
df_a: Arc<DataFrame>,
Expand Down Expand Up @@ -58,7 +58,7 @@ pub struct GenericOuterJoinProbe<K: ExtraPayload> {
key_names_right: Arc<[SmartString]>,
}

impl<K: ExtraPayload> GenericOuterJoinProbe<K> {
impl<K: ExtraPayload> GenericFullOuterJoinProbe<K> {
#[allow(clippy::too_many_arguments)]
pub(super) fn new(
df_a: DataFrame,
Expand All @@ -75,7 +75,7 @@ impl<K: ExtraPayload> GenericOuterJoinProbe<K> {
key_names_left: Arc<[SmartString]>,
key_names_right: Arc<[SmartString]>,
) -> Self {
GenericOuterJoinProbe {
GenericFullOuterJoinProbe {
df_a: Arc::new(df_a),
df_b_dummy: None,
materialized_join_cols,
Expand Down Expand Up @@ -152,7 +152,7 @@ impl<K: ExtraPayload> GenericOuterJoinProbe<K> {
.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>();
Ok(_coalesce_outer_join(
Ok(_coalesce_full_join(
out,
&l,
&r,
Expand Down Expand Up @@ -287,7 +287,7 @@ impl<K: ExtraPayload> GenericOuterJoinProbe<K> {
}
}

impl<K: ExtraPayload> Operator for GenericOuterJoinProbe<K> {
impl<K: ExtraPayload> Operator for GenericFullOuterJoinProbe<K> {
fn execute(
&mut self,
context: &PExecutionContext,
Expand All @@ -310,6 +310,6 @@ impl<K: ExtraPayload> Operator for GenericOuterJoinProbe<K> {
Box::new(new)
}
fn fmt(&self) -> &str {
"generic_outer_join_probe"
"generic_full_join_probe"
}
}
2 changes: 1 addition & 1 deletion crates/polars-pipe/src/pipeline/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ where
placeholder,
)) as Box<dyn SinkTrait>
},
JoinType::Outer { .. } => {
JoinType::Full { .. } => {
// First get the names before we (potentially) swap.
let key_names_left = join_columns_left
.iter()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,15 @@ fn join_produces_null(how: &JoinType) -> LeftRight<bool> {
{
match how {
JoinType::Left => LeftRight(false, true),
JoinType::Outer { .. } | JoinType::Cross | JoinType::AsOf(_) => LeftRight(true, true),
JoinType::Full { .. } | JoinType::Cross | JoinType::AsOf(_) => LeftRight(true, true),
_ => LeftRight(false, false),
}
}
#[cfg(not(feature = "asof_join"))]
{
match how {
JoinType::Left => LeftRight(false, true),
JoinType::Outer { .. } | JoinType::Cross => LeftRight(true, true),
JoinType::Full { .. } | JoinType::Cross => LeftRight(true, true),
_ => LeftRight(false, false),
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,11 @@ pub(super) fn process_join(
.unwrap();
already_added_local_to_local_projected.insert(local_name);
}
// In outer joins both columns remain. So `add_local=true` also for the right table
let add_local = matches!(options.args.how, JoinType::Outer)
// In full outer joins both columns remain. So `add_local=true` also for the right table
let add_local = matches!(options.args.how, JoinType::Full)
&& !options.args.coalesce.coalesce(&options.args.how);
for e in &right_on {
// In case of outer joins we also add the columns.
// In case of full outer joins we also add the columns.
// But before we do that we must check if the column wasn't already added by the lhs.
let add_local = if add_local {
!already_added_local_to_local_projected.contains(e.output_name())
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/logical_plan/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ pub fn set_estimated_row_counts(
let (known_size, estimated_size) = options.rows_left;
(known_size, estimated_size, filter_count_left)
},
JoinType::Cross | JoinType::Outer { .. } => {
JoinType::Cross | JoinType::Full { .. } => {
let (known_size_left, estimated_size_left) = options.rows_left;
let (known_size_right, estimated_size_right) = options.rows_right;
match (known_size_left, known_size_right) {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-sql/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ impl SQLContext {
lf = match &tbl.join_operator {
JoinOperator::CrossJoin => lf.cross_join(rf),
JoinOperator::FullOuter(constraint) => {
process_join(lf, rf, constraint, &l_name, &r_name, JoinType::Outer)?
process_join(lf, rf, constraint, &l_name, &r_name, JoinType::Full)?
},
JoinOperator::Inner(constraint) => {
process_join(lf, rf, constraint, &l_name, &r_name, JoinType::Inner)?
Expand Down
2 changes: 1 addition & 1 deletion crates/polars/src/docs/eager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@
//! // join on a single column
//! temp.left_join(&rain, ["days"], ["days"]);
//! temp.inner_join(&rain, ["days"], ["days"]);
//! temp.outer_join(&rain, ["days"], ["days"]);
//! temp.full_join(&rain, ["days"], ["days"]);
//!
//! // join on multiple columns
//! temp.join(&rain, vec!["days", "other"], vec!["days", "other"], JoinArgs::new(JoinType::Left));
Expand Down
4 changes: 2 additions & 2 deletions crates/polars/src/docs/lazy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@
//! let lf_a = df_a.clone().lazy();
//! let lf_b = df_b.clone().lazy();
//!
//! let joined = lf_a.join(lf_b, vec![col("a")], vec![col("foo")], JoinArgs::new(JoinType::Outer)).collect()?;
//! let joined = lf_a.join(lf_b, vec![col("a")], vec![col("foo")], JoinArgs::new(JoinType::Full)).collect()?;
//! // joined:
//!
//! // ╭─────┬─────┬─────┬──────┬─────────╮
Expand All @@ -172,7 +172,7 @@
//!
//! # let lf_a = df_a.clone().lazy();
//! # let lf_b = df_b.clone().lazy();
//! let outer = lf_a.outer_join(lf_b, col("a"), col("foo")).collect()?;
//! let outer = lf_a.full_join(lf_b, col("a"), col("foo")).collect()?;
//!
//! # let lf_a = df_a.clone().lazy();
//! # let lf_b = df_b.clone().lazy();
Expand Down
Loading

0 comments on commit 2d51973

Please sign in to comment.