diff --git a/crates/ruff_linter/resources/test/fixtures/ruff/for_variable_used_after_block.py b/crates/ruff_linter/resources/test/fixtures/ruff/for_variable_used_after_block.py new file mode 100644 index 0000000000000..e83ab1b2a1ad8 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/ruff/for_variable_used_after_block.py @@ -0,0 +1,81 @@ +for global_var in []: + _ = global_var + pass + +# ❌ +_ = global_var + +def foo(): + # For control var used outside block + for event in []: + _ = event + pass + + # ❌ + _ = event + + for x in []: + _ = x + pass + + # Using the same control variable name in a different loop is ok + for x in []: + _ = x + pass + + for y in []: + # ❌ x is used outside of the loop it was defined in (meant to use y) + if x == 5: + pass + + # Assign a variable before the loop + room_id = 3 + _ = room_id + # Use the same variable name in a loop + for room_id in []: + _ = room_id + pass + + # ❌ After the loop is not ok because the value is probably not what you expect + _ = room_id + + # ❌ Augmented assignment is not allowed because the value is probably not what you expect + room_id += 1 + + # Assigning again after the loop is ok + room_id = 5 + room_id += 1 + _ = room_id + + # Tuple destructuring + for a, b, c in []: + _ = a + _ = b + _ = c + pass + + # ❌ + _ = a + _ = b + _ = c + + # Array destructuring + for [d, e, f] in []: + _ = d + _ = e + _ = f + pass + + # ❌ + _ = d + _ = e + _ = f + + # Nested function and class definitions are fine + for potential_power in []: + def action(): + print(potential_power) + + class Animal: + power_level = potential_power + diff --git a/crates/ruff_linter/src/checkers/ast/analyze/deferred_scopes.rs b/crates/ruff_linter/src/checkers/ast/analyze/deferred_scopes.rs index 003db1d741782..37cb6f46d1aec 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/deferred_scopes.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/deferred_scopes.rs @@ -17,6 +17,7 @@ pub(crate) fn deferred_scopes(checker: &mut Checker) { Rule::AsyncioDanglingTask, Rule::BadStaticmethodArgument, Rule::BuiltinAttributeShadowing, + Rule::ForVariableUsedAfterBlock, Rule::GlobalVariableNotAssigned, Rule::ImportPrivateName, Rule::ImportShadowedByLoopVar, @@ -86,6 +87,10 @@ pub(crate) fn deferred_scopes(checker: &mut Checker) { for scope_id in checker.analyze.scopes.iter().rev().copied() { let scope = &checker.semantic.scopes[scope_id]; + if checker.enabled(Rule::ForVariableUsedAfterBlock) { + ruff::rules::for_variable_used_after_block(checker, scope, &mut diagnostics); + } + if checker.enabled(Rule::UndefinedLocal) { pyflakes::rules::undefined_local(checker, scope_id, scope, &mut diagnostics); } diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 9faf8c8c373b2..d02e792c03a2c 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -958,6 +958,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Ruff, "029") => (RuleGroup::Preview, rules::ruff::rules::UnusedAsync), (Ruff, "030") => (RuleGroup::Preview, rules::ruff::rules::AssertWithPrintMessage), (Ruff, "031") => (RuleGroup::Preview, rules::ruff::rules::IncorrectlyParenthesizedTupleInSubscript), + (Ruff, "032") => (RuleGroup::Preview, rules::ruff::rules::ForVariableUsedAfterBlock), (Ruff, "100") => (RuleGroup::Stable, rules::ruff::rules::UnusedNOQA), (Ruff, "101") => (RuleGroup::Preview, rules::ruff::rules::RedirectedNOQA), diff --git a/crates/ruff_linter/src/rules/ruff/mod.rs b/crates/ruff_linter/src/rules/ruff/mod.rs index 975122c9f68d2..2d447ce4a5295 100644 --- a/crates/ruff_linter/src/rules/ruff/mod.rs +++ b/crates/ruff_linter/src/rules/ruff/mod.rs @@ -26,6 +26,10 @@ mod tests { #[test_case(Rule::AsyncioDanglingTask, Path::new("RUF006.py"))] #[test_case(Rule::CollectionLiteralConcatenation, Path::new("RUF005.py"))] + #[test_case( + Rule::ForVariableUsedAfterBlock, + Path::new("for_variable_used_after_block.py") + )] #[test_case(Rule::ExplicitFStringTypeConversion, Path::new("RUF010.py"))] #[test_case(Rule::FunctionCallInDataclassDefaultArgument, Path::new("RUF009.py"))] #[test_case(Rule::ImplicitOptional, Path::new("RUF013_0.py"))] diff --git a/crates/ruff_linter/src/rules/ruff/rules/for_variable_used_after_block.rs b/crates/ruff_linter/src/rules/ruff/rules/for_variable_used_after_block.rs new file mode 100644 index 0000000000000..9b3ef859e8b46 --- /dev/null +++ b/crates/ruff_linter/src/rules/ruff/rules/for_variable_used_after_block.rs @@ -0,0 +1,97 @@ +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_semantic::Scope; +use ruff_text_size::Ranged; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for variables defined in `for` loops that are used outside of their +/// respective blocks. +/// +/// ## Why is this bad? +/// Usage of of a control variable outside of the block they're defined in will probably +/// lead to flawed logic in a way that will likely cause bugs. The variable might not +/// contain what you expect. +/// +/// In Python, unlike many other languages, `for` loops don't define their own scopes. +/// Therefore, usage of the control variables outside of the block will be the the value +/// from the last iteration until re-assigned. +/// +/// While this mistake is easy to spot in small examples, it can be hidden in larger +/// blocks of code, where the the loop and downstream usage may not be visible at the +/// same time. +/// +/// ## Example +/// ```python +/// for x in range(10): +/// pass +/// +/// print(x) # prints 9 +/// ``` +#[violation] +pub struct ForVariableUsedAfterBlock { + control_var_name: String, +} + +impl Violation for ForVariableUsedAfterBlock { + #[derive_message_formats] + fn message(&self) -> String { + let ForVariableUsedAfterBlock { control_var_name } = self; + + format!("`for` loop variable {control_var_name} is used outside of block") + } +} + +/// Based on wemake-python-styleguide (WPS441) to forbid control variables after the block body. +pub(crate) fn for_variable_used_after_block( + checker: &Checker, + scope: &Scope, + diagnostics: &mut Vec, +) { + // Find for-loop variable bindings + let loop_var_bindings = scope + .all_bindings() + .map(|(name, binding_id)| (name, checker.semantic().binding(binding_id))) + .filter_map(|(name, binding)| binding.kind.is_loop_var().then_some((name, binding))); + + for (name, binding) in loop_var_bindings { + // This is safe to unwrap because we're only dealing with loop var bindings and + // this can only be `None` for built-ins. + let binding_source_node_id = binding.source.unwrap(); + // The node_id of the for-loop that contains the binding + let binding_statement_id = checker.semantic().statement_id(binding_source_node_id); + let binding_statement = checker.semantic().statement(binding_source_node_id); + + // Loop over the references of those bindings to see if they're in the same block-scope + 'references: for reference in binding.references() { + let reference = checker.semantic().reference(reference); + // This is safe to unwrap because a loop var binding can't be a built-in so + // any reference to the loop var won't be a built-in where this could be `None`. + let reference_node_id = reference.expression_id().unwrap(); + + // Skip any reference that come before the control var binding in the source + // order, skip it because people can assign and use the same variable name + // above the block. + if reference.range().end() < binding_statement.range().start() { + continue; + } + + // Traverse the hierarchy and look for a block match + let statement_hierarchy = checker.semantic().parent_statement_ids(reference_node_id); + for ancestor_node_id in statement_hierarchy { + if binding_statement_id == ancestor_node_id { + continue 'references; + } + } + + // The reference wasn't used in the same block, report a violation/diagnostic + diagnostics.push(Diagnostic::new( + ForVariableUsedAfterBlock { + control_var_name: name.to_owned(), + }, + reference.range(), + )); + } + } +} diff --git a/crates/ruff_linter/src/rules/ruff/rules/mod.rs b/crates/ruff_linter/src/rules/ruff/rules/mod.rs index 0f23812df8c93..d808232927773 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/mod.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/mod.rs @@ -5,6 +5,7 @@ pub(crate) use asyncio_dangling_task::*; pub(crate) use collection_literal_concatenation::*; pub(crate) use default_factory_kwarg::*; pub(crate) use explicit_f_string_type_conversion::*; +pub(crate) use for_variable_used_after_block::*; pub(crate) use function_call_in_dataclass_default::*; pub(crate) use implicit_optional::*; pub(crate) use incorrectly_parenthesized_tuple_in_subscript::*; @@ -39,6 +40,7 @@ mod collection_literal_concatenation; mod confusables; mod default_factory_kwarg; mod explicit_f_string_type_conversion; +mod for_variable_used_after_block; mod function_call_in_dataclass_default; mod helpers; mod implicit_optional; diff --git a/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF032_for_variable_used_after_block.py.snap b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF032_for_variable_used_after_block.py.snap new file mode 100644 index 0000000000000..0456430e08e96 --- /dev/null +++ b/crates/ruff_linter/src/rules/ruff/snapshots/ruff_linter__rules__ruff__tests__RUF032_for_variable_used_after_block.py.snap @@ -0,0 +1,103 @@ +--- +source: crates/ruff_linter/src/rules/ruff/mod.rs +--- +for_variable_used_after_block.py:6:5: RUF032 `for` loop variable global_var is used outside of block + | +5 | # ❌ +6 | _ = global_var + | ^^^^^^^^^^ RUF032 +7 | +8 | def foo(): + | + +for_variable_used_after_block.py:15:9: RUF032 `for` loop variable event is used outside of block + | +14 | # ❌ +15 | _ = event + | ^^^^^ RUF032 +16 | +17 | for x in []: + | + +for_variable_used_after_block.py:28:12: RUF032 `for` loop variable x is used outside of block + | +26 | for y in []: +27 | # ❌ x is used outside of the loop it was defined in (meant to use y) +28 | if x == 5: + | ^ RUF032 +29 | pass + | + +for_variable_used_after_block.py:40:9: RUF032 `for` loop variable room_id is used outside of block + | +39 | # ❌ After the loop is not ok because the value is probably not what you expect +40 | _ = room_id + | ^^^^^^^ RUF032 +41 | +42 | # ❌ Augmented assignment is not allowed because the value is probably not what you expect + | + +for_variable_used_after_block.py:43:5: RUF032 `for` loop variable room_id is used outside of block + | +42 | # ❌ Augmented assignment is not allowed because the value is probably not what you expect +43 | room_id += 1 + | ^^^^^^^ RUF032 +44 | +45 | # Assigning again after the loop is ok + | + +for_variable_used_after_block.py:58:9: RUF032 `for` loop variable a is used outside of block + | +57 | # ❌ +58 | _ = a + | ^ RUF032 +59 | _ = b +60 | _ = c + | + +for_variable_used_after_block.py:59:9: RUF032 `for` loop variable b is used outside of block + | +57 | # ❌ +58 | _ = a +59 | _ = b + | ^ RUF032 +60 | _ = c + | + +for_variable_used_after_block.py:60:9: RUF032 `for` loop variable c is used outside of block + | +58 | _ = a +59 | _ = b +60 | _ = c + | ^ RUF032 +61 | +62 | # Array destructuring + | + +for_variable_used_after_block.py:70:9: RUF032 `for` loop variable d is used outside of block + | +69 | # ❌ +70 | _ = d + | ^ RUF032 +71 | _ = e +72 | _ = f + | + +for_variable_used_after_block.py:71:9: RUF032 `for` loop variable e is used outside of block + | +69 | # ❌ +70 | _ = d +71 | _ = e + | ^ RUF032 +72 | _ = f + | + +for_variable_used_after_block.py:72:9: RUF032 `for` loop variable f is used outside of block + | +70 | _ = d +71 | _ = e +72 | _ = f + | ^ RUF032 +73 | +74 | # Nested function and class definitions are fine + | diff --git a/crates/ruff_python_semantic/src/binding.rs b/crates/ruff_python_semantic/src/binding.rs index 9b9c74aa81fe2..80f69bca13557 100644 --- a/crates/ruff_python_semantic/src/binding.rs +++ b/crates/ruff_python_semantic/src/binding.rs @@ -25,7 +25,8 @@ pub struct Binding<'a> { pub scope: ScopeId, /// The context in which the [`Binding`] was created. pub context: ExecutionContext, - /// The statement in which the [`Binding`] was defined. + /// The statement in which the [`Binding`] was defined. `None` if the [`Binding`] + /// comes from a built-in. pub source: Option, /// The references to the [`Binding`]. pub references: Vec, @@ -232,7 +233,8 @@ impl<'a> Binding<'a> { locator.slice(self.range) } - /// Returns the statement in which the binding was defined. + /// Returns the statement in which the binding was defined. `None` if the binding + /// comes from a built-in. pub fn statement<'b>(&self, semantic: &SemanticModel<'b>) -> Option<&'b Stmt> { self.source .map(|statement_id| semantic.statement(statement_id)) diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs index 184fb9496b0b0..2a5263c794578 100644 --- a/crates/ruff_python_semantic/src/model.rs +++ b/crates/ruff_python_semantic/src/model.rs @@ -1201,9 +1201,17 @@ impl<'a> SemanticModel<'a> { /// Return the [`Stmt`] corresponding to the given [`NodeId`]. #[inline] pub fn statement(&self, node_id: NodeId) -> &'a Stmt { + self.nodes[self.statement_id(node_id)] + .as_statement() + .expect("No statement found") + } + + /// Return the statement [`NodeId`] corresponding to the given [`NodeId`]. + #[inline] + pub fn statement_id(&self, node_id: NodeId) -> NodeId { self.nodes .ancestor_ids(node_id) - .find_map(|id| self.nodes[id].as_statement()) + .find(|id| self.nodes[*id].is_statement()) .expect("No statement found") } @@ -1232,6 +1240,13 @@ impl<'a> SemanticModel<'a> { .nth(1) } + /// Given a [`NodeId`], return the [`NodeId`] of the parent statement, if any. + pub fn parent_statement_ids(&self, node_id: NodeId) -> impl Iterator + '_ { + self.nodes + .ancestor_ids(node_id) + .filter(|id| self.nodes[*id].is_statement()) + } + /// Return the [`Expr`] corresponding to the given [`NodeId`]. #[inline] pub fn expression(&self, node_id: NodeId) -> Option<&'a Expr> { diff --git a/ruff.schema.json b/ruff.schema.json index 83b27a24f71cf..20c6aafb9d1e5 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3736,6 +3736,7 @@ "RUF03", "RUF030", "RUF031", + "RUF032", "RUF1", "RUF10", "RUF100",