From 11fa74d9d5f8aad263f362ed6d543d11a081f197 Mon Sep 17 00:00:00 2001
From: Davis Vaughan <davis@posit.co>
Date: Thu, 5 Dec 2024 14:07:08 -0500
Subject: [PATCH] Implement "leading hole hugging" behavior (#82)

* Implement special handling of hole comments

* Add special leading hole behavior for calls

Particularly useful for things like data.table!

* Utilize `Cell()` and interior mutability to clear up borrowing issues

* Add a suite of tests for hole comments

* A few extra `[` tests for holes and line breaks
---
 Cargo.lock                                    |   1 +
 crates/air_r_formatter/Cargo.toml             |   1 +
 crates/air_r_formatter/src/comments.rs        | 185 ++++++++-
 .../src/r/auxiliary/call_arguments.rs         | 206 ++++++++--
 .../src/r/auxiliary/subset_2_arguments.rs     |   1 -
 .../src/r/auxiliary/subset_arguments.rs       |   1 -
 crates/air_r_formatter/tests/specs/r/call.R   | 188 ++++++++-
 .../air_r_formatter/tests/specs/r/call.R.snap | 383 +++++++++++++++++-
 crates/air_r_formatter/tests/specs/r/subset.R |  71 +++-
 .../tests/specs/r/subset.R.snap               | 153 ++++++-
 .../air_r_formatter/tests/specs/r/subset2.R   |  29 +-
 .../tests/specs/r/subset2.R.snap              |  63 ++-
 crates/air_r_syntax/src/argument_ext.rs       |  19 +
 crates/air_r_syntax/src/lib.rs                |   1 +
 14 files changed, 1216 insertions(+), 86 deletions(-)
 create mode 100644 crates/air_r_syntax/src/argument_ext.rs
diff --git a/Cargo.lock b/Cargo.lock
index e4266e97..3058eba0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -72,6 +72,7 @@ dependencies = [
  "biome_formatter",
  "biome_parser",
  "biome_rowan",
+ "itertools",
  "tests_macros",
  "tracing",
 ]
diff --git a/crates/air_r_formatter/Cargo.toml b/crates/air_r_formatter/Cargo.toml
index b2a0c709..32d2fef8 100644
--- a/crates/air_r_formatter/Cargo.toml
+++ b/crates/air_r_formatter/Cargo.toml
@@ -16,6 +16,7 @@ version = "0.0.0"
 air_r_syntax = { workspace = true }
 biome_formatter = { workspace = true }
 biome_rowan = { workspace = true }
+itertools = { workspace = true }
 tracing = { workspace = true }
 
 [dev-dependencies]
diff --git a/crates/air_r_formatter/src/comments.rs b/crates/air_r_formatter/src/comments.rs
index e350cdbd..423d1923 100644
--- a/crates/air_r_formatter/src/comments.rs
+++ b/crates/air_r_formatter/src/comments.rs
@@ -6,6 +6,7 @@ use air_r_syntax::RIfStatement;
 use air_r_syntax::RLanguage;
 use air_r_syntax::RParenthesizedExpression;
 use air_r_syntax::RSyntaxKind;
+use air_r_syntax::RSyntaxToken;
 use air_r_syntax::RWhileStatement;
 use biome_formatter::comments::CommentKind;
 use biome_formatter::comments::CommentPlacement;
@@ -65,7 +66,8 @@ impl CommentStyle for RCommentStyle {
                 .or_else(handle_if_statement_comment)
                 .or_else(handle_parenthesized_expression_comment)
                 .or_else(handle_argument_name_clause_comment)
-                .or_else(handle_argument_comment),
+                .or_else(handle_argument_comment)
+                .or_else(handle_hole_argument_comment),
             CommentTextPosition::OwnLine => handle_for_comment(comment)
                 .or_else(handle_function_comment)
                 .or_else(handle_while_comment)
@@ -73,7 +75,8 @@ impl CommentStyle for RCommentStyle {
                 .or_else(handle_if_statement_comment)
                 .or_else(handle_parenthesized_expression_comment)
                 .or_else(handle_argument_name_clause_comment)
-                .or_else(handle_argument_comment),
+                .or_else(handle_argument_comment)
+                .or_else(handle_hole_argument_comment),
             CommentTextPosition::SameLine => {
                 // Not applicable for R, we don't have `/* */` comments
                 CommentPlacement::Default(comment)
@@ -351,6 +354,184 @@ fn handle_argument_comment(comment: DecoratedComment<RLanguage>) -> CommentPlace
     CommentPlacement::Default(comment)
 }
 
+/// Handle comments close to an argument hole
+///
+/// Hole comment handling is complicated by the fact that holes don't have any
+/// associated tokens. This has two important consequences that we exploit:
+///
+/// - Comments are never enclosed by holes, i.e. `enclosing_node()` never
+///   returns a hole.
+///
+/// - Comments will ALWAYS trail the hole by default, i.e. `preceding_node()`
+///   is how you access the hole node connected to the comment.
+///
+/// Here is the logic for placing hole comments, assuming that we've already
+/// determined that the `preceding_node()` is a hole. Note that this logic is
+/// symmetric, which we quite like:
+///
+/// - If the following token is `,`, `)`, `]`, or `]]`, the comment is
+///   "inside" the hole.
+///
+///   - If the previous sibling node before the hole exists and is not another
+///     hole, attach the comment as trailing trivia on that.
+///
+/// - Else the comment is "after" the hole.
+///
+///   - If the following sibling node after the hole exists and is not another
+///     hole, attach the comment as leading trivia on that.
+///
+/// - Otherwise attach the comment as leading on the hole itself. This happens
+///   when there is not another preceding/following node, or that node is
+///   another hole.
+///
+/// Note that the "following token" check skips following whitespace and
+/// comments, which is what we want.
+///
+/// ## Comments "inside" the hole
+///
+/// Comment trails `a`. Following token is `,` and previous-sibling of hole is a
+/// non-hole.
+///
+/// ```r
+/// fn(
+///   a,<hole> # comment
+///   ,
+///   b
+/// )
+/// ```
+///
+/// CommentB trails `b`. Following token is `)` and previous-sibling of hole is
+/// a non-hole.
+///
+/// This example is particularly important. We want comments on trailing
+/// `,` in `list2()` calls to get attached to the `b` non-hole node,
+/// otherwise it will get moved to the next line if it stays attached to
+/// the hole.
+///
+/// ```r
+/// list2(
+///   a, # commentA
+///   b,<hole> # commentB
+/// )
+/// ```
+///
+/// Comment1 leads hole. Following token is `,` and there is no previous-sibling
+/// of the hole. Note that `following_token()` skips `# comment2` here and jumps
+/// straight to `,`, which is what we want.
+///
+/// Comment2 leads hole. Following token is `,` and there is no previous-sibling
+/// of the hole.
+///
+/// ```r
+/// fn(<hole># comment1
+///  # comment2
+///  ,
+///  x
+/// )
+/// ```
+///
+/// Comment leads hole. Following token is `,` and the previous-sibling of
+/// the hole is another hole.
+///
+/// ```r
+/// fn(
+///  a,<another-hole>
+///  ,<hole># comment
+///  ,
+///  b
+/// )
+/// ```
+///
+/// Comment leads hole. Following token is `,` and the previous-sibling of
+/// the hole is another hole.
+///
+/// ```r
+/// fn(<another-hole>
+///  ,<hole># comment
+///  ,
+///  x
+/// )
+/// ```
+///
+/// ## Comment "after" the hole
+///
+/// Comment leads `x`. Following token is not `,`, `)`, `]`, or `]]`, and the
+/// following-sibling of the hole is a non-hole we can lead.
+///
+/// ```r
+/// fn(
+///  ,<hole>
+///  ,# comment
+///  x
+/// )
+/// ```
+///
+/// Comment1 leads `x`. Following token is not `,`, `)`, `]`, or `]]`, and the
+/// following-sibling of the hole is a non-hole we can lead. Note that
+/// `following_token()` skips `# comment2` here and jumps straight to `,`, which
+/// is what we want.
+///
+/// Comment2 leads `x`. Following token is not `,`, `)`, `]`, or `]]`, and the
+/// following-sibling of the hole is a non-hole we can lead.
+///
+/// ```r
+/// fn(
+///   ,<hole>
+///   ,# comment1
+///   # comment2
+///   x
+/// )
+/// ```
+///
+/// We can't think of any scenario where we have a comment "after" the hole,
+/// but we don't have a following-sibling to lead.
+fn handle_hole_argument_comment(
+    comment: DecoratedComment<RLanguage>,
+) -> CommentPlacement<RLanguage> {
+    let Some(hole) = comment
+        .preceding_node()
+        .and_then(RArgument::cast_ref)
+        .filter(RArgument::is_hole)
+        .map(RArgument::into_syntax)
+    else {
+        return CommentPlacement::Default(comment);
+    };
+
+    // Note that `following_token()` nicely skips over following comments
+    let is_comment_inside_hole = matches!(
+        comment.following_token().map(RSyntaxToken::kind),
+        Some(
+            RSyntaxKind::COMMA
+                | RSyntaxKind::R_PAREN
+                | RSyntaxKind::R_BRACK
+                | RSyntaxKind::R_BRACK2
+        )
+    );
+
+    #[allow(clippy::collapsible_else_if)]
+    if is_comment_inside_hole {
+        if let Some(previous) = hole
+            .prev_sibling()
+            .and_then(RArgument::cast)
+            .filter(|argument| !argument.is_hole())
+            .map(RArgument::into_syntax)
+        {
+            return CommentPlacement::trailing(previous, comment);
+        }
+    } else {
+        if let Some(following) = comment
+            .following_node()
+            .and_then(RArgument::cast_ref)
+            .filter(|argument| !argument.is_hole())
+            .map(RArgument::into_syntax)
+        {
+            return CommentPlacement::leading(following, comment);
+        }
+    }
+
+    CommentPlacement::leading(hole, comment)
+}
+
 /// Make line comments between a `)` token and a `body`:
 /// - Leading comments of the first expression within `{}` if `body` is a braced expression
 /// - Dangling comments of the `{}` if `body` is an empty braced expression
diff --git a/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs b/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs
index 289d8e68..6a43db61 100644
--- a/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs
+++ b/crates/air_r_formatter/src/r/auxiliary/call_arguments.rs
@@ -1,5 +1,7 @@
 // TODO: (c) Biome
 
+use std::cell::Cell;
+
 use crate::comments::RComments;
 use crate::prelude::*;
 use crate::r::auxiliary::braced_expressions::as_curly_curly;
@@ -18,12 +20,12 @@ use air_r_syntax::RSyntaxToken;
 use biome_formatter::separated::TrailingSeparator;
 use biome_formatter::{format_args, format_element, write, VecBuffer};
 use biome_rowan::{AstSeparatedElement, AstSeparatedList, SyntaxResult};
+use itertools::Itertools;
 
 #[derive(Debug, Clone, Default)]
 pub(crate) struct FormatRCallArguments;
 impl FormatNodeRule<RCallArguments> for FormatRCallArguments {
     fn fmt_fields(&self, node: &RCallArguments, f: &mut RFormatter) -> FormatResult<()> {
-        // TODO: Special handling for comments? See `handle_array_holes` for JS.
         RCallLikeArguments::Call(node.clone()).fmt(f)
     }
 
@@ -122,29 +124,40 @@ impl Format<RFormatContext> for RCallLikeArguments {
             }
         }
 
-        let last_index = items.len().saturating_sub(1);
-        let mut has_empty_line = false;
+        let comments = f.comments();
+
+        let mut iter_elements = items.elements();
+
+        // Split leading holes out from the remainder of the arguments.
+        // Leading holes tightly hug the `l_token` no matter what.
+        // Because they are intended to tightly hug, a node is only considered
+        // a leading hole if there isn't a comment attached.
+        let leading_holes: Vec<_> = iter_elements
+            .take_while_ref(|element| {
+                element.node().map_or(false, |node| {
+                    node.is_hole() && !comments.has_comments(node.syntax())
+                })
+            })
+            .map(FormatCallArgumentHole::new)
+            .collect();
+
+        let last_index = (items.len() - leading_holes.len()).saturating_sub(1);
 
-        // Wrap `RArgumentList` elements in a `FormatCallArgument` type that
+        // Wrap remaining `RArgumentList` elements in a `FormatCallArgument` type that
         // knows how to cache itself when we use `will_break()` to check if
         // the argument breaks
-        let arguments: Vec<_> = items
-            .elements()
+        let arguments: Vec<_> = iter_elements
             .enumerate()
-            .map(|(index, element)| {
-                let leading_lines = element
-                    .node()
-                    .map_or(0, |node| get_lines_before(node.syntax()));
-                has_empty_line = has_empty_line || leading_lines > 1;
-
-                FormatCallArgument::Default {
-                    element,
-                    is_last: index == last_index,
-                    leading_lines,
-                }
-            })
+            .map(|(index, element)| FormatCallArgument::new(element, index == last_index))
             .collect();
 
+        let has_empty_line = leading_holes
+            .iter()
+            .any(|leading_hole| leading_hole.leading_lines() > 1)
+            || arguments
+                .iter()
+                .any(|argument| argument.leading_lines() > 1);
+
         // Special case where the user has requested a fully empty line between
         // some of their arguments. Let's respect that and use it as an
         // indicator to short circuit here.
@@ -153,6 +166,7 @@ impl Format<RFormatContext> for RCallLikeArguments {
                 f,
                 [FormatAllArgsBrokenOut {
                     l_token: &l_token.format(),
+                    leading_holes: &leading_holes,
                     args: &arguments,
                     r_token: &r_token.format(),
                     expand: true,
@@ -161,12 +175,13 @@ impl Format<RFormatContext> for RCallLikeArguments {
         }
 
         // Special case where a line break exists between the `l_token` and the
-        // first argument. Treat this as a user request to expand.
-        if needs_user_requested_expansion(&arguments) {
+        // first non-hole argument. Treat this as a user request to expand.
+        if needs_user_requested_expansion(&leading_holes, &arguments) {
             return write!(
                 f,
                 [FormatAllArgsBrokenOut {
                     l_token: &l_token.format(),
+                    leading_holes: &leading_holes,
                     args: &arguments,
                     r_token: &r_token.format(),
                     expand: true,
@@ -174,13 +189,14 @@ impl Format<RFormatContext> for RCallLikeArguments {
             );
         }
 
-        if let Some(group_layout) = arguments_grouped_layout(&items, f.comments()) {
-            write_grouped_arguments(&l_token, &r_token, arguments, group_layout, f)
+        if let Some(group_layout) = arguments_grouped_layout(&items, comments) {
+            write_grouped_arguments(l_token, leading_holes, arguments, r_token, group_layout, f)
         } else {
             write!(
                 f,
                 [FormatAllArgsBrokenOut {
                     l_token: &l_token.format(),
+                    leading_holes: &leading_holes,
                     args: &arguments,
                     r_token: &r_token.format(),
                     expand: false,
@@ -190,8 +206,9 @@ impl Format<RFormatContext> for RCallLikeArguments {
     }
 }
 
-/// Check if the user has inserted a leading newline before the very first `argument`.
-/// If so, we respect that and treat it as a request to break ALL of the arguments.
+/// Check if the user has inserted a leading newline before the very first
+/// non-hole `argument`. If so, we respect that and treat it as a request to
+/// break ALL of the arguments.
 /// Note this is a case of irreversible formatting!
 ///
 /// ```r
@@ -226,12 +243,123 @@ impl Format<RFormatContext> for RCallLikeArguments {
 /// # Output
 /// dictionary <- list(bob = "burger", dina = "dairy", john = "juice")
 /// ```
-fn needs_user_requested_expansion(arguments: &[FormatCallArgument]) -> bool {
+///
+/// The leading line check is done on the first non-hole argument, so this
+/// is considered a user requested expansion and stays as is because there
+/// is a leading newline before the `j` argument node.
+///
+/// ```r
+/// dt[,
+///   j = complex + things,
+///   by = col
+/// ]
+/// ```
+///
+/// This is also considered a user requested expansion. We treat holes as
+/// "invisible" for this check, so if you squint and remove the leading `,`
+/// and there are any leading lines before the first non-hole argument,
+/// that is still considered a user requested expansion, but the `,`s attached
+/// to the hole will get moved to hug the `[`.
+///
+/// ```r
+/// dt[
+///   , j = complex + things,
+///   by = col
+/// ]
+/// ```
+fn needs_user_requested_expansion(
+    leading_holes: &[FormatCallArgumentHole],
+    arguments: &[FormatCallArgument],
+) -> bool {
     // TODO: This should be configurable by an option, since it is a case of
     // irreversible formatting
-    arguments
+
+    // Do any leading holes have leading lines?
+    // We treat leading holes as "invisible" so a leading line in the hole
+    // implies a leading line in the first argument.
+    if leading_holes.iter().any(|hole| hole.leading_lines() > 0) {
+        return true;
+    }
+
+    // Does the first non-hole argument have leading lines?
+    if arguments
         .first()
         .map_or(false, |argument| argument.leading_lines() > 0)
+    {
+        return true;
+    }
+
+    false
+}
+
+/// Helper for formatting a call argument hole
+///
+/// We cache the result at `fmt()` time. This is necessary because
+/// using `BestFitting` will try and print the hole multiple times as it
+/// tries out the different variants, which would be an error if it wasn't
+/// cached.
+struct FormatCallArgumentHole {
+    /// The element to format
+    element: AstSeparatedElement<RLanguage, RArgument>,
+
+    /// The formatted element
+    ///
+    /// Cached using interior mutability the first time `fmt()` is called.
+    content: Cell<Option<FormatResult<Option<FormatElement>>>>,
+
+    /// The number of lines before this node
+    leading_lines: usize,
+}
+
+impl FormatCallArgumentHole {
+    fn new(element: AstSeparatedElement<RLanguage, RArgument>) -> Self {
+        // Note that holes by their very nature don't have any physical nodes
+        // to attach trivia to, so we can't use `get_lines_before()` on the
+        // node. Instead we look at the attached `,` token and look for lines
+        // before that!
+        let leading_lines = element
+            .trailing_separator()
+            .unwrap_or(None)
+            .map_or(0, get_lines_before_token);
+
+        Self {
+            element,
+            content: Cell::new(None),
+            leading_lines,
+        }
+    }
+
+    fn leading_lines(&self) -> usize {
+        self.leading_lines
+    }
+}
+
+impl Format<RFormatContext> for FormatCallArgumentHole {
+    fn fmt(&self, f: &mut Formatter<RFormatContext>) -> FormatResult<()> {
+        // If we've formatted this hole before, reuse the content.
+        // Otherwise `intern()` the hole and cache it.
+        let content = match self.content.take() {
+            Some(content) => content,
+            None => f.intern(&format_with(|f| {
+                write!(
+                    f,
+                    [
+                        self.element.node()?.format(),
+                        self.element.trailing_separator()?.format()
+                    ]
+                )
+            })),
+        };
+
+        // Set before writing in case there is an error at write time
+        self.content.set(Some(content.clone()));
+
+        if let Some(element) = content? {
+            f.write_element(element)?;
+        }
+
+        Ok(())
+    }
 }
 
 /// Helper for formatting a call argument
@@ -263,6 +391,18 @@ enum FormatCallArgument {
 }
 
 impl FormatCallArgument {
+    fn new(element: AstSeparatedElement<RLanguage, RArgument>, is_last: bool) -> Self {
+        let leading_lines = element
+            .node()
+            .map_or(0, |node| get_lines_before(node.syntax()));
+
+        FormatCallArgument::Default {
+            element,
+            is_last,
+            leading_lines,
+        }
+    }
+
     /// Returns `true` if this argument contains any content that forces a group to [`break`](FormatElements::will_break).
     ///
     /// Caches the formatted content after we check, so we can utilize it later
@@ -466,9 +606,10 @@ impl Format<RFormatContext> for FormatCallArgument {
 /// )
 /// ```
 fn write_grouped_arguments(
-    l_token: &RSyntaxToken,
-    r_token: &RSyntaxToken,
+    l_token: RSyntaxToken,
+    leading_holes: Vec<FormatCallArgumentHole>,
     mut arguments: Vec<FormatCallArgument>,
+    r_token: RSyntaxToken,
     group_layout: GroupedCallArgumentLayout,
     f: &mut RFormatter,
 ) -> FormatResult<()> {
@@ -494,6 +635,7 @@ fn write_grouped_arguments(
                 f,
                 [FormatAllArgsBrokenOut {
                     l_token: &l_token.format(),
+                    leading_holes: &leading_holes,
                     args: &arguments,
                     r_token: &r_token.format(),
                     expand: true,
@@ -521,6 +663,7 @@ fn write_grouped_arguments(
             buffer,
             [FormatAllArgsBrokenOut {
                 l_token: &l_token,
+                leading_holes: &leading_holes,
                 args: &arguments,
                 r_token: &r_token,
                 expand: true,
@@ -574,6 +717,8 @@ fn write_grouped_arguments(
             buffer,
             [
                 l_token,
+                format_with(|f| { f.join().entries(leading_holes.iter()).finish() }),
+                maybe_space(!leading_holes.is_empty() && !grouped.is_empty()),
                 format_with(|f| {
                     f.join_with(soft_line_break_or_space())
                         .entries(grouped.iter())
@@ -614,6 +759,8 @@ fn write_grouped_arguments(
             buffer,
             [
                 l_token,
+                format_with(|f| { f.join().entries(leading_holes.iter()).finish() }),
+                maybe_space(!leading_holes.is_empty() && !grouped.is_empty()),
                 format_with(|f| {
                     let mut joiner = f.join_with(soft_line_break_or_space());
 
@@ -770,6 +917,7 @@ impl Format<RFormatContext> for FormatGroupedArgument {
 
 struct FormatAllArgsBrokenOut<'a> {
     l_token: &'a dyn Format<RFormatContext>,
+    leading_holes: &'a [FormatCallArgumentHole],
     args: &'a [FormatCallArgument],
     r_token: &'a dyn Format<RFormatContext>,
     expand: bool,
@@ -798,6 +946,8 @@ impl<'a> Format<RFormatContext> for FormatAllArgsBrokenOut<'a> {
             f,
             [group(&format_args![
                 self.l_token,
+                format_with(|f| f.join().entries(self.leading_holes.iter()).finish()),
+                maybe_space(!self.leading_holes.is_empty() && !self.args.is_empty()),
                 soft_block_indent(&args),
                 self.r_token,
             ])
diff --git a/crates/air_r_formatter/src/r/auxiliary/subset_2_arguments.rs b/crates/air_r_formatter/src/r/auxiliary/subset_2_arguments.rs
index 6b918943..d2c3e97e 100644
--- a/crates/air_r_formatter/src/r/auxiliary/subset_2_arguments.rs
+++ b/crates/air_r_formatter/src/r/auxiliary/subset_2_arguments.rs
@@ -6,7 +6,6 @@ use air_r_syntax::RSubset2Arguments;
 pub(crate) struct FormatRSubset2Arguments;
 impl FormatNodeRule<RSubset2Arguments> for FormatRSubset2Arguments {
     fn fmt_fields(&self, node: &RSubset2Arguments, f: &mut RFormatter) -> FormatResult<()> {
-        // TODO: Special handling for comments? See `handle_array_holes` for JS.
         RCallLikeArguments::Subset2(node.clone()).fmt(f)
     }
 
diff --git a/crates/air_r_formatter/src/r/auxiliary/subset_arguments.rs b/crates/air_r_formatter/src/r/auxiliary/subset_arguments.rs
index 340e2c44..3d56239b 100644
--- a/crates/air_r_formatter/src/r/auxiliary/subset_arguments.rs
+++ b/crates/air_r_formatter/src/r/auxiliary/subset_arguments.rs
@@ -6,7 +6,6 @@ use air_r_syntax::RSubsetArguments;
 pub(crate) struct FormatRSubsetArguments;
 impl FormatNodeRule<RSubsetArguments> for FormatRSubsetArguments {
     fn fmt_fields(&self, node: &RSubsetArguments, f: &mut RFormatter) -> FormatResult<()> {
-        // TODO: Special handling for comments? See `handle_array_holes` for JS.
         RCallLikeArguments::Subset(node.clone()).fmt(f)
     }
 
diff --git a/crates/air_r_formatter/tests/specs/r/call.R b/crates/air_r_formatter/tests/specs/r/call.R
index dd73890f..28b7b57a 100644
--- a/crates/air_r_formatter/tests/specs/r/call.R
+++ b/crates/air_r_formatter/tests/specs/r/call.R
@@ -4,9 +4,16 @@ fn(a)
 # ------------------------------------------------------------------------
 # Holes
 
+# Leading holes should hug the `(` token
 fn(,)
 fn(,,)
+
+# Non-leading holes retain spaces because they are considered "weird"
+# and we want them to stand out
+fn(, a,)
+fn(, a, , )
 fn(a,,b,,)
+
 fn(a_really_long_argument_here,,another_really_really_long_argument_to_test_this_feature,,)
 
 # ------------------------------------------------------------------------
@@ -64,13 +71,169 @@ test_that(
 
 })
 
-# TODO: Holes currently don't force expansion. There is no token attached to
-# `RHoleArgument`, so we can't compute the "number of lines before the first token".
+# ------------------------------------------------------------------------
+# User requested line break and leading holes
+
+# Leading holes are "invisible" when determining user requested expansion
+# These all expand
+fn(,
+  x = 1
+)
+
+fn(
+  ,
+  x = 1
+)
+
+fn(
+  , x = 1
+)
+
+fn(
+  ,, x = 1
+)
+
+# A comment connected to a hole prevents it from being a "leading hole",
+# instead it just becomes part of the typical arguments list and expands
+fn(
+  # comment
+  ,
+  x = 1
+)
+
 fn(
+  ,
+  # comment
   ,
   x = 1
 )
 
+# ------------------------------------------------------------------------
+# Comments "inside" holes
+
+fn(# comment
+  ,
+)
+
+fn(, # comment
+)
+fn(,
+  # comment
+)
+fn(
+  , # comment
+)
+fn(
+  ,
+  # comment
+)
+
+fn(, # comment
+  ,
+)
+fn(,
+  # comment
+  ,
+)
+fn(
+  , # comment
+  ,
+)
+fn(
+  ,
+  # comment
+  ,
+)
+
+fn(
+  ,
+  , # comment1
+  # comment2
+  ,
+  x
+)
+
+# Trails `a`
+fn(
+  a, # comment
+  ,
+  b
+)
+# Trails `a` technically, but should stay on own line
+fn(
+  a,
+  # comment
+  ,
+  b
+)
+# Trails `a`
+fn(
+  a, # comment
+  # comment2
+  ,
+  b
+)
+
+# Special test - ensure this leads `b` rather than trails `a`
+fn(
+  ,
+  a,
+  , # comment
+  b
+)
+
+# Both comments lead the hole
+fn(# comment1
+  # comment2
+  ,
+  x
+)
+
+# Comment leads hole
+# Following token is `,`, preceding before hole is another hole
+fn(
+  a,
+  , # comment
+  ,
+  b
+)
+fn(
+  , # comment
+  ,
+  x
+)
+
+# Comment leads `{` but doesn't move inside it
+fn(
+  ,
+  , # comment
+  { 1 +  1 }
+)
+
+# A particular motivating case. Want trailing `,` commentB to stay on `b`.
+list2(
+  a, # commentA
+  b, # commentB
+)
+
+# ------------------------------------------------------------------------
+# Comments "after" holes
+
+# Both get attached to `x`
+# Following token isn't `,`, `)`, `]`, or `]]`, and following node is non-hole,
+# so we attach to it
+fn(
+  ,
+  , # comment
+  x
+)
+fn(
+  ,
+  , # comment1
+  # comment2
+  x
+)
+
 # ------------------------------------------------------------------------
 # Trailing braced expression
 
@@ -440,6 +603,27 @@ fn(
   c
 )
 
+# Due to holes not having tokens, we collapse full empty lines in them
+fn(
+
+  # comment1
+  ,
+
+  # comment2
+  ,
+
+  b
+)
+
+fn(
+  ,
+
+  # comment2
+  ,
+
+  b
+)
+
 # ------------------------------------------------------------------------
 # Comments
 
diff --git a/crates/air_r_formatter/tests/specs/r/call.R.snap b/crates/air_r_formatter/tests/specs/r/call.R.snap
index 3e298154..88937ff5 100644
--- a/crates/air_r_formatter/tests/specs/r/call.R.snap
+++ b/crates/air_r_formatter/tests/specs/r/call.R.snap
@@ -11,9 +11,16 @@ fn(a)
 # ------------------------------------------------------------------------
 # Holes
 
+# Leading holes should hug the `(` token
 fn(,)
 fn(,,)
+
+# Non-leading holes retain spaces because they are considered "weird"
+# and we want them to stand out
+fn(, a,)
+fn(, a, , )
 fn(a,,b,,)
+
 fn(a_really_long_argument_here,,another_really_really_long_argument_to_test_this_feature,,)
 
 # ------------------------------------------------------------------------
@@ -71,13 +78,169 @@ test_that(
 
 })
 
-# TODO: Holes currently don't force expansion. There is no token attached to
-# `RHoleArgument`, so we can't compute the "number of lines before the first token".
+# ------------------------------------------------------------------------
+# User requested line break and leading holes
+
+# Leading holes are "invisible" when determining user requested expansion
+# These all expand
+fn(,
+  x = 1
+)
+
+fn(
+  ,
+  x = 1
+)
+
+fn(
+  , x = 1
+)
+
 fn(
+  ,, x = 1
+)
+
+# A comment connected to a hole prevents it from being a "leading hole",
+# instead it just becomes part of the typical arguments list and expands
+fn(
+  # comment
+  ,
+  x = 1
+)
+
+fn(
+  ,
+  # comment
   ,
   x = 1
 )
 
+# ------------------------------------------------------------------------
+# Comments "inside" holes
+
+fn(# comment
+  ,
+)
+
+fn(, # comment
+)
+fn(,
+  # comment
+)
+fn(
+  , # comment
+)
+fn(
+  ,
+  # comment
+)
+
+fn(, # comment
+  ,
+)
+fn(,
+  # comment
+  ,
+)
+fn(
+  , # comment
+  ,
+)
+fn(
+  ,
+  # comment
+  ,
+)
+
+fn(
+  ,
+  , # comment1
+  # comment2
+  ,
+  x
+)
+
+# Trails `a`
+fn(
+  a, # comment
+  ,
+  b
+)
+# Trails `a` technically, but should stay on own line
+fn(
+  a,
+  # comment
+  ,
+  b
+)
+# Trails `a`
+fn(
+  a, # comment
+  # comment2
+  ,
+  b
+)
+
+# Special test - ensure this leads `b` rather than trails `a`
+fn(
+  ,
+  a,
+  , # comment
+  b
+)
+
+# Both comments lead the hole
+fn(# comment1
+  # comment2
+  ,
+  x
+)
+
+# Comment leads hole
+# Following token is `,`, preceding before hole is another hole
+fn(
+  a,
+  , # comment
+  ,
+  b
+)
+fn(
+  , # comment
+  ,
+  x
+)
+
+# Comment leads `{` but doesn't move inside it
+fn(
+  ,
+  , # comment
+  { 1 +  1 }
+)
+
+# A particular motivating case. Want trailing `,` commentB to stay on `b`.
+list2(
+  a, # commentA
+  b, # commentB
+)
+
+# ------------------------------------------------------------------------
+# Comments "after" holes
+
+# Both get attached to `x`
+# Following token isn't `,`, `)`, `]`, or `]]`, and following node is non-hole,
+# so we attach to it
+fn(
+  ,
+  , # comment
+  x
+)
+fn(
+  ,
+  , # comment1
+  # comment2
+  x
+)
+
 # ------------------------------------------------------------------------
 # Trailing braced expression
 
@@ -447,6 +610,27 @@ fn(
   c
 )
 
+# Due to holes not having tokens, we collapse full empty lines in them
+fn(
+
+  # comment1
+  ,
+
+  # comment2
+  ,
+
+  b
+)
+
+fn(
+  ,
+
+  # comment2
+  ,
+
+  b
+)
+
 # ------------------------------------------------------------------------
 # Comments
 
@@ -488,9 +672,16 @@ fn(a)
 # ------------------------------------------------------------------------
 # Holes
 
-fn(, )
-fn(, , )
+# Leading holes should hug the `(` token
+fn(,)
+fn(,,)
+
+# Non-leading holes retain spaces because they are considered "weird"
+# and we want them to stand out
+fn(, a, )
+fn(, a, , )
 fn(a, , b, , )
+
 fn(
 	a_really_long_argument_here,
 	,
@@ -565,9 +756,167 @@ df |>
 test_that("description", {
 })
 
-# TODO: Holes currently don't force expansion. There is no token attached to
-# `RHoleArgument`, so we can't compute the "number of lines before the first token".
-fn(, x = 1)
+# ------------------------------------------------------------------------
+# User requested line break and leading holes
+
+# Leading holes are "invisible" when determining user requested expansion
+# These all expand
+fn(,
+	x = 1
+)
+
+fn(,
+	x = 1
+)
+
+fn(,
+	x = 1
+)
+
+fn(,,
+	x = 1
+)
+
+# A comment connected to a hole prevents it from being a "leading hole",
+# instead it just becomes part of the typical arguments list and expands
+fn(
+	# comment
+	,
+	x = 1
+)
+
+fn(,
+	# comment
+	,
+	x = 1
+)
+
+# ------------------------------------------------------------------------
+# Comments "inside" holes
+
+fn(
+	# comment
+	,
+)
+
+fn(,
+	# comment
+)
+fn(,
+	# comment
+)
+fn(,
+	# comment
+)
+fn(,
+	# comment
+)
+
+fn(,
+	# comment
+	,
+)
+fn(,
+	# comment
+	,
+)
+fn(,
+	# comment
+	,
+)
+fn(,
+	# comment
+	,
+)
+
+fn(,,
+	# comment1
+	# comment2
+	,
+	x
+)
+
+# Trails `a`
+fn(
+	a, # comment
+	,
+	b
+)
+# Trails `a` technically, but should stay on own line
+fn(
+	a,
+	# comment
+	,
+	b
+)
+# Trails `a`
+fn(
+	a, # comment
+	# comment2
+	,
+	b
+)
+
+# Special test - ensure this leads `b` rather than trails `a`
+fn(,
+	a,
+	,
+	# comment
+	b
+)
+
+# Both comments lead the hole
+fn(
+	# comment1
+	# comment2
+	,
+	x
+)
+
+# Comment leads hole
+# Following token is `,`, preceding before hole is another hole
+fn(
+	a,
+	,
+	# comment
+	,
+	b
+)
+fn(,
+	# comment
+	,
+	x
+)
+
+# Comment leads `{` but doesn't move inside it
+fn(,,
+	# comment
+	{
+		1 + 1
+	}
+)
+
+# A particular motivating case. Want trailing `,` commentB to stay on `b`.
+list2(
+	a, # commentA
+	b, # commentB
+)
+
+# ------------------------------------------------------------------------
+# Comments "after" holes
+
+# Both get attached to `x`
+# Following token isn't `,`, `)`, `]`, or `]]`, and following node is non-hole,
+# so we attach to it
+fn(,,
+	# comment
+	x
+)
+fn(,,
+	# comment1
+	# comment2
+	x
+)
 
 # ------------------------------------------------------------------------
 # Trailing braced expression
@@ -958,6 +1307,23 @@ fn(
 	c
 )
 
+# Due to holes not having tokens, we collapse full empty lines in them
+fn(
+	# comment1
+	,
+	# comment2
+	,
+
+	b
+)
+
+fn(,
+	# comment2
+	,
+
+	b
+)
+
 # ------------------------------------------------------------------------
 # Comments
 
@@ -979,6 +1345,5 @@ fn(
 
 # Lines exceeding max width of 80 characters
 ```
-   85: # `RHoleArgument`, so we can't compute the "number of lines before the first token".
-  142: 	my_long_list_my_long_list_my_long_list_my_long_list_long_long_long_long_long_list,
+  307: 	my_long_list_my_long_list_my_long_list_my_long_list_long_long_long_long_long_list,
 ```
diff --git a/crates/air_r_formatter/tests/specs/r/subset.R b/crates/air_r_formatter/tests/specs/r/subset.R
index 6a8167a3..fffb1ddd 100644
--- a/crates/air_r_formatter/tests/specs/r/subset.R
+++ b/crates/air_r_formatter/tests/specs/r/subset.R
@@ -7,7 +7,7 @@ fn["description", {
   1 + 1
 }]
 
-# TODO: Think about data.table usage, like:
+# Leading hole hugs `[`
 DT[, {
   # write each group to a different file
   fwrite(.SD, "name")
@@ -22,11 +22,49 @@ DT[, by=x, {
 # ------------------------------------------------------------------------
 # Holes
 
+# Leading holes should hug the `[` token
 fn[,]
 fn[,,]
+
+# Trailing holes get a trailing space
+df[a,]
+
 fn[a,,b,,]
 fn[a_really_long_argument_here,,another_really_really_long_argument_to_test_this_feature,,]
 
+# Holes are "invisible" when determining user requested expansion
+# These all expand
+fn[,
+  x = 1
+]
+fn[
+  ,
+  x = 1
+]
+fn[
+  , x = 1
+]
+fn[
+  ,, x = 1
+]
+
+# ------------------------------------------------------------------------
+# Holes and trailing inline functions / braced expressions
+
+dt[, {
+  1 + 1
+}]
+dt[, , j, {
+  1 + 1
+}]
+
+dt[, function(x) {
+  1 + x
+}]
+dt[, , j, function(x) {
+  1 + x
+}]
+
 # ------------------------------------------------------------------------
 # Dots
 
@@ -73,9 +111,30 @@ df[df$col > 7, map[
   names(df)
 ]]
 
-# TODO: Holes currently don't force expansion. There is no token attached to
-# `RHoleArgument`, so we can't compute the "number of lines before the first token".
-df[
-  ,
-  x = 1
+# ------------------------------------------------------------------------
+# User requested line break and leading holes
+
+# Leading holes are "invisible" when computing user requested line breaks,
+# so you can break the line before or after the hole as long as you are
+# before the first argument.
+# Starting from:
+# dt[, j, by = col]
+
+dt[
+  , j, by = col]
+
+dt[,
+  j, by = col]
+
+# No longer user requested expansion
+dt[, j
+  , by = col]
+
+# ------------------------------------------------------------------------
+# Comments "after" holes
+
+# Common in data.table world
+dt[,
+  # comment
+  x
 ]
diff --git a/crates/air_r_formatter/tests/specs/r/subset.R.snap b/crates/air_r_formatter/tests/specs/r/subset.R.snap
index 9f2692f2..59568939 100644
--- a/crates/air_r_formatter/tests/specs/r/subset.R.snap
+++ b/crates/air_r_formatter/tests/specs/r/subset.R.snap
@@ -14,7 +14,7 @@ fn["description", {
   1 + 1
 }]
 
-# TODO: Think about data.table usage, like:
+# Leading hole hugs `[`
 DT[, {
   # write each group to a different file
   fwrite(.SD, "name")
@@ -29,11 +29,49 @@ DT[, by=x, {
 # ------------------------------------------------------------------------
 # Holes
 
+# Leading holes should hug the `[` token
 fn[,]
 fn[,,]
+
+# Trailing holes get a trailing space
+df[a,]
+
 fn[a,,b,,]
 fn[a_really_long_argument_here,,another_really_really_long_argument_to_test_this_feature,,]
 
+# Holes are "invisible" when determining user requested expansion
+# These all expand
+fn[,
+  x = 1
+]
+fn[
+  ,
+  x = 1
+]
+fn[
+  , x = 1
+]
+fn[
+  ,, x = 1
+]
+
+# ------------------------------------------------------------------------
+# Holes and trailing inline functions / braced expressions
+
+dt[, {
+  1 + 1
+}]
+dt[, , j, {
+  1 + 1
+}]
+
+dt[, function(x) {
+  1 + x
+}]
+dt[, , j, function(x) {
+  1 + x
+}]
+
 # ------------------------------------------------------------------------
 # Dots
 
@@ -80,11 +118,32 @@ df[df$col > 7, map[
   names(df)
 ]]
 
-# TODO: Holes currently don't force expansion. There is no token attached to
-# `RHoleArgument`, so we can't compute the "number of lines before the first token".
-df[
-  ,
-  x = 1
+# ------------------------------------------------------------------------
+# User requested line break and leading holes
+
+# Leading holes are "invisible" when computing user requested line breaks,
+# so you can break the line before or after the hole as long as you are
+# before the first argument.
+# Starting from:
+# dt[, j, by = col]
+
+dt[
+  , j, by = col]
+
+dt[,
+  j, by = col]
+
+# No longer user requested expansion
+dt[, j
+  , by = col]
+
+# ------------------------------------------------------------------------
+# Comments "after" holes
+
+# Common in data.table world
+dt[,
+  # comment
+  x
 ]
 
 ```
@@ -115,9 +174,8 @@ fn["description", {
 	1 + 1
 }]
 
-# TODO: Think about data.table usage, like:
-DT[
-	,
+# Leading hole hugs `[`
+DT[,
 	{
 		# write each group to a different file
 		fwrite(.SD, "name")
@@ -134,8 +192,13 @@ DT[, by = x, {
 # ------------------------------------------------------------------------
 # Holes
 
-fn[, ]
-fn[, , ]
+# Leading holes should hug the `[` token
+fn[,]
+fn[,,]
+
+# Trailing holes get a trailing space
+df[a, ]
+
 fn[a, , b, , ]
 fn[
 	a_really_long_argument_here,
@@ -144,6 +207,38 @@ fn[
 	,
 ]
 
+# Holes are "invisible" when determining user requested expansion
+# These all expand
+fn[,
+	x = 1
+]
+fn[,
+	x = 1
+]
+fn[,
+	x = 1
+]
+fn[,,
+	x = 1
+]
+
+# ------------------------------------------------------------------------
+# Holes and trailing inline functions / braced expressions
+
+dt[, {
+	1 + 1
+}]
+dt[,, j, {
+	1 + 1
+}]
+
+dt[, function(x) {
+	1 + x
+}]
+dt[,, j, function(x) {
+	1 + x
+}]
+
 # ------------------------------------------------------------------------
 # Dots
 
@@ -195,12 +290,34 @@ df[
 	]
 ]
 
-# TODO: Holes currently don't force expansion. There is no token attached to
-# `RHoleArgument`, so we can't compute the "number of lines before the first token".
-df[, x = 1]
-```
+# ------------------------------------------------------------------------
+# User requested line break and leading holes
 
-# Lines exceeding max width of 80 characters
-```
-   93: # `RHoleArgument`, so we can't compute the "number of lines before the first token".
+# Leading holes are "invisible" when computing user requested line breaks,
+# so you can break the line before or after the hole as long as you are
+# before the first argument.
+# Starting from:
+# dt[, j, by = col]
+
+dt[,
+	j,
+	by = col
+]
+
+dt[,
+	j,
+	by = col
+]
+
+# No longer user requested expansion
+dt[, j, by = col]
+
+# ------------------------------------------------------------------------
+# Comments "after" holes
+
+# Common in data.table world
+dt[,
+	# comment
+	x
+]
 ```
diff --git a/crates/air_r_formatter/tests/specs/r/subset2.R b/crates/air_r_formatter/tests/specs/r/subset2.R
index 31df85d6..6dbc7a08 100644
--- a/crates/air_r_formatter/tests/specs/r/subset2.R
+++ b/crates/air_r_formatter/tests/specs/r/subset2.R
@@ -10,11 +10,29 @@ fn[["description", {
 # ------------------------------------------------------------------------
 # Holes
 
+# Leading holes should hug the `[[` token
 fn[[,]]
 fn[[,,]]
+
 fn[[a,,b,,]]
 fn[[a_really_long_argument_here,,another_really_really_long_argument_to_test_this_feature,,]]
 
+# Holes are "invisible" when determining user requested expansion
+# These all expand
+fn[[,
+  x = 1
+]]
+fn[[
+  ,
+  x = 1
+]]
+fn[[
+  , x = 1
+]]
+fn[[
+  ,, x = 1
+]]
+
 # ------------------------------------------------------------------------
 # Dots
 
@@ -61,9 +79,10 @@ df[[df$col > 7, map[[
   names(df)
 ]]]]
 
-# TODO: Holes currently don't force expansion. There is no token attached to
-# `RHoleArgument`, so we can't compute the "number of lines before the first token".
-df[[
-  ,
-  x = 1
+# ------------------------------------------------------------------------
+# Comments "after" holes
+
+df[[,
+  # comment
+  x
 ]]
diff --git a/crates/air_r_formatter/tests/specs/r/subset2.R.snap b/crates/air_r_formatter/tests/specs/r/subset2.R.snap
index c44f769d..c01f01ba 100644
--- a/crates/air_r_formatter/tests/specs/r/subset2.R.snap
+++ b/crates/air_r_formatter/tests/specs/r/subset2.R.snap
@@ -17,11 +17,29 @@ fn[["description", {
 # ------------------------------------------------------------------------
 # Holes
 
+# Leading holes should hug the `[[` token
 fn[[,]]
 fn[[,,]]
+
 fn[[a,,b,,]]
 fn[[a_really_long_argument_here,,another_really_really_long_argument_to_test_this_feature,,]]
 
+# Holes are "invisible" when determining user requested expansion
+# These all expand
+fn[[,
+  x = 1
+]]
+fn[[
+  ,
+  x = 1
+]]
+fn[[
+  , x = 1
+]]
+fn[[
+  ,, x = 1
+]]
+
 # ------------------------------------------------------------------------
 # Dots
 
@@ -68,11 +86,12 @@ df[[df$col > 7, map[[
   names(df)
 ]]]]
 
-# TODO: Holes currently don't force expansion. There is no token attached to
-# `RHoleArgument`, so we can't compute the "number of lines before the first token".
-df[[
-  ,
-  x = 1
+# ------------------------------------------------------------------------
+# Comments "after" holes
+
+df[[,
+  # comment
+  x
 ]]
 
 ```
@@ -106,8 +125,10 @@ fn[["description", {
 # ------------------------------------------------------------------------
 # Holes
 
-fn[[, ]]
-fn[[, , ]]
+# Leading holes should hug the `[[` token
+fn[[,]]
+fn[[,,]]
+
 fn[[a, , b, , ]]
 fn[[
 	a_really_long_argument_here,
@@ -116,6 +137,21 @@ fn[[
 	,
 ]]
 
+# Holes are "invisible" when determining user requested expansion
+# These all expand
+fn[[,
+	x = 1
+]]
+fn[[,
+	x = 1
+]]
+fn[[,
+	x = 1
+]]
+fn[[,,
+	x = 1
+]]
+
 # ------------------------------------------------------------------------
 # Dots
 
@@ -167,12 +203,11 @@ df[[
 	]]
 ]]
 
-# TODO: Holes currently don't force expansion. There is no token attached to
-# `RHoleArgument`, so we can't compute the "number of lines before the first token".
-df[[, x = 1]]
-```
+# ------------------------------------------------------------------------
+# Comments "after" holes
 
-# Lines exceeding max width of 80 characters
-```
-   77: # `RHoleArgument`, so we can't compute the "number of lines before the first token".
+df[[,
+	# comment
+	x
+]]
 ```
diff --git a/crates/air_r_syntax/src/argument_ext.rs b/crates/air_r_syntax/src/argument_ext.rs
new file mode 100644
index 00000000..42fc6854
--- /dev/null
+++ b/crates/air_r_syntax/src/argument_ext.rs
@@ -0,0 +1,19 @@
+use crate::RArgument;
+
+impl RArgument {
+    /// Is this argument a "hole"?
+    ///
+    /// To be a hole, the argument must be missing both its `name =` clause
+    /// and its value.
+    ///
+    /// ```r
+    /// # First argument is a hole
+    /// fn( , x)
+    ///
+    /// # First argument is not a hole
+    /// fn(x = , x)
+    /// ```
+    pub fn is_hole(&self) -> bool {
+        self.name_clause().is_none() && self.value().is_none()
+    }
+}
diff --git a/crates/air_r_syntax/src/lib.rs b/crates/air_r_syntax/src/lib.rs
index 63f444c2..010e7dc0 100644
--- a/crates/air_r_syntax/src/lib.rs
+++ b/crates/air_r_syntax/src/lib.rs
@@ -1,5 +1,6 @@
 #[macro_use]
 mod generated;
+pub mod argument_ext;
 pub mod call_ext;
 mod file_source;
 pub mod string_ext;