Skip to content

Commit

Permalink
feat: Support Struct field selection in the SQL engine, RENAME an…
Browse files Browse the repository at this point in the history
…d `REPLACE` select wildcard options (#17109)
  • Loading branch information
alexander-beedie authored Jun 24, 2024
1 parent b60788d commit f7ff2ef
Show file tree
Hide file tree
Showing 12 changed files with 654 additions and 254 deletions.
2 changes: 1 addition & 1 deletion crates/polars-mem-engine/src/executors/projection_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ pub(super) fn check_expand_literals(

if duplicate_check && !names.insert(name) {
let msg = format!(
"the name: '{}' is duplicate\n\n\
"the name '{}' is duplicate\n\n\
It's possible that multiple expressions are returning the same default column \
name. If this is the case, try renaming the columns with \
`.alias(\"new_name\")` to avoid duplicate column names.",
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ pub(super) use self::rolling_by::RollingFunctionBy;
#[cfg(feature = "strings")]
pub use self::strings::StringFunction;
#[cfg(feature = "dtype-struct")]
pub(crate) use self::struct_::StructFunction;
pub use self::struct_::StructFunction;
#[cfg(feature = "trigonometry")]
pub(super) use self::trigonometry::TrigonometricFunction;
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/plans/conversion/dsl_to_ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ fn resolve_with_columns(

if !output_names.insert(field.name().clone()) {
let msg = format!(
"the name: '{}' passed to `LazyFrame.with_columns` is duplicate\n\n\
"the name '{}' passed to `LazyFrame.with_columns` is duplicate\n\n\
It's possible that multiple expressions are returning the same default column name. \
If this is the case, try renaming the columns with `.alias(\"new_name\")` to avoid \
duplicate column names.",
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ description = "SQL transpiler for Polars. Converts SQL to Polars logical plans"
arrow = { workspace = true }
polars-core = { workspace = true, features = ["rows"] }
polars-error = { workspace = true }
polars-lazy = { workspace = true, features = ["abs", "binary_encoding", "concat_str", "cross_join", "cum_agg", "dtype-date", "dtype-decimal", "is_in", "list_eval", "log", "meta", "regex", "round_series", "sign", "string_reverse", "strings", "timezones", "trigonometry"] }
polars-lazy = { workspace = true, features = ["abs", "binary_encoding", "concat_str", "cross_join", "cum_agg", "dtype-date", "dtype-decimal", "dtype-struct", "is_in", "list_eval", "log", "meta", "regex", "round_series", "sign", "string_reverse", "strings", "timezones", "trigonometry"] }
polars-ops = { workspace = true }
polars-plan = { workspace = true }
polars-time = { workspace = true }
Expand Down
325 changes: 186 additions & 139 deletions crates/polars-sql/src/context.rs

Large diffs are not rendered by default.

117 changes: 82 additions & 35 deletions crates/polars-sql/src/sql_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,42 +375,9 @@ impl SQLExprVisitor<'_> {

/// Visit a compound SQL identifier
///
/// e.g. df.column or "df"."column"
/// e.g. tbl.column, struct.field, tbl.struct.field (inc. nested struct fields)
fn visit_compound_identifier(&mut self, idents: &[Ident]) -> PolarsResult<Expr> {
match idents {
[tbl_name, column_name] => {
let mut lf = self
.ctx
.get_table_from_current_scope(&tbl_name.value)
.ok_or_else(|| {
polars_err!(
SQLInterface: "no table or alias named '{}' found",
tbl_name
)
})?;

let schema =
lf.schema_with_arenas(&mut self.ctx.lp_arena, &mut self.ctx.expr_arena)?;
if let Some((_, name, _)) = schema.get_full(&column_name.value) {
let resolved = &self.ctx.resolve_name(&tbl_name.value, &column_name.value);
Ok(if name != resolved {
col(resolved).alias(name)
} else {
col(name)
})
} else {
polars_bail!(
SQLInterface: "no column named '{}' found in table '{}'",
column_name,
tbl_name
)
}
},
_ => polars_bail!(
SQLInterface: "invalid identifier {:?}",
idents
),
}
Ok(resolve_compound_identifier(self.ctx, idents, self.active_schema)?[0].clone())
}

fn visit_interval(&self, interval: &Interval) -> PolarsResult<Expr> {
Expand Down Expand Up @@ -1240,3 +1207,83 @@ fn bitstring_to_bytes_literal(b: &String) -> PolarsResult<Expr> {
_ => u64::from_str_radix(s, 2).unwrap().to_be_bytes().to_vec(),
}))
}

pub(crate) fn resolve_compound_identifier(
ctx: &mut SQLContext,
idents: &[Ident],
active_schema: Option<&Schema>,
) -> PolarsResult<Vec<Expr>> {
// inference priority: table > struct > column
let ident_root = &idents[0];
let mut remaining_idents = idents.iter().skip(1);
let mut lf = ctx.get_table_from_current_scope(&ident_root.value);

let schema = if let Some(ref mut lf) = lf {
lf.schema_with_arenas(&mut ctx.lp_arena, &mut ctx.expr_arena)
} else {
Ok(Arc::new(if let Some(active_schema) = active_schema {
active_schema.clone()
} else {
Schema::new()
}))
}?;

let col_dtype: PolarsResult<(Expr, Option<&DataType>)> = if lf.is_none() && schema.is_empty() {
Ok((col(&ident_root.value), None))
} else {
let name = &remaining_idents.next().unwrap().value;
if lf.is_some() && name == "*" {
return Ok(schema
.iter_names()
.map(|name| col(name))
.collect::<Vec<_>>());
} else if let Some((_, name, dtype)) = schema.get_full(name) {
let resolved = &ctx.resolve_name(&ident_root.value, name);
Ok((
if name != resolved {
col(resolved).alias(name)
} else {
col(name)
},
Some(dtype),
))
} else if lf.is_none() {
remaining_idents = idents.iter().skip(1);
Ok((col(&ident_root.value), schema.get(&ident_root.value)))
} else {
polars_bail!(
SQLInterface: "no column named '{}' found in table '{}'",
name,
ident_root
)
}
};

// additional ident levels index into struct fields
let (mut column, mut dtype) = col_dtype?;
for ident in remaining_idents {
let name = ident.value.as_str();
match dtype {
Some(DataType::Struct(fields)) if name == "*" => {
return Ok(fields
.iter()
.map(|fld| column.clone().struct_().field_by_name(&fld.name))
.collect())
},
Some(DataType::Struct(fields)) => {
dtype = fields
.iter()
.find(|fld| fld.name == name)
.map(|fld| &fld.dtype);
},
Some(dtype) if name == "*" => {
polars_bail!(SQLSyntax: "cannot expand '*' on non-Struct dtype; found {:?}", dtype)
},
_ => {
dtype = None;
},
}
column = column.struct_().field_by_name(name);
}
Ok(vec![column])
}
Loading

0 comments on commit f7ff2ef

Please sign in to comment.