[SPARK-3854][BUILD] Scala style: require spaces before {.

## What changes were proposed in this pull request? Since the opening curly brace, '{', has many usages as discussed in [SPARK-3854](https://issues.apache.org/jira/browse/SPARK-3854), this PR adds a ScalaStyle rule to prevent '){' pattern for the following majority pattern and fixes the code accordingly. If we enforce this in ScalaStyle from now, it will improve the Scala code quality and reduce review time. ``` // Correct: if (true) { println("Wow!") } // Incorrect: if (true){ println("Wow!") } ``` IntelliJ also shows new warnings based on this. ## How was this patch tested? Pass the Jenkins ScalaStyle test. Author: Dongjoon Hyun <[email protected]> Closes apache#11637 from dongjoon-hyun/SPARK-3854.
aiping312 · Mar 10, 2016 · 91fed8e · 91fed8e
1 parent 81d4853
commit 91fed8e
Show file tree

Hide file tree

Showing 42 changed files with 60 additions and 52 deletions.
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -722,7 +722,7 @@ abstract class RDD[T: ClassTag](
    *                        An example of pipe the RDD data of groupBy() in a streaming way,
    *                        instead of constructing a huge String to concat all the elements:
    *                        def printRDDElement(record:(String, Seq[String]), f:String=&gt;Unit) =
-   *                          for (e &lt;- record._2){f(e)}
+   *                          for (e &lt;- record._2) {f(e)}
    * @param separateWorkingDir Use separate working directories for each task.
    * @return the result RDD
    */

diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala b/core/src/main/scala/org/apache/spark/rpc/RpcTimeout.scala
@@ -119,7 +119,7 @@ private[spark] object RpcTimeout {
     // Find the first set property or use the default value with the first property
     val itr = timeoutPropList.iterator
     var foundProp: Option[(String, String)] = None
-    while (itr.hasNext && foundProp.isEmpty){
+    while (itr.hasNext && foundProp.isEmpty) {
       val propKey = itr.next()
       conf.getOption(propKey).foreach { prop => foundProp = Some(propKey, prop) }
     }

diff --git a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
@@ -173,7 +173,7 @@ object InputFormatInfo {
     for (inputSplit <- formats) {
       val splits = inputSplit.findPreferredLocations()
 
-      for (split <- splits){
+      for (split <- splits) {
         val location = split.hostLocation
         val set = nodeToSplit.getOrElseUpdate(location, new HashSet[SplitInfo])
         set += split

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -623,7 +623,7 @@ private[spark] object TaskSchedulerImpl {
         val containerList: ArrayBuffer[T] = map.getOrElse(key, null)
         assert(containerList != null)
         // Get the index'th entry for this host - if present
-        if (index < containerList.size){
+        if (index < containerList.size) {
           retval += containerList.apply(index)
           found = true
         }

diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala
@@ -33,7 +33,7 @@ private[v1] class OneStageResource(ui: SparkUI) {
   @GET
   @Path("")
   def stageData(@PathParam("stageId") stageId: Int): Seq[StageData] = {
-    withStage(stageId){ stageAttempts =>
+    withStage(stageId) { stageAttempts =>
       stageAttempts.map { stage =>
         AllStagesResource.stageUiToStageData(stage.status, stage.info, stage.ui,
           includeDetails = true)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -44,7 +44,7 @@ class BlockManagerId private (
 
   def executorId: String = executorId_
 
-  if (null != host_){
+  if (null != host_) {
     Utils.checkHost(host_, "Expected hostname")
     assert (port_ > 0)
   }

diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -57,7 +57,7 @@ class AccumulatorSuite extends SparkFunSuite with Matchers with LocalSparkContex
       }
     }
 
-  test ("basic accumulation"){
+  test ("basic accumulation") {
     sc = new SparkContext("local", "test")
     val acc : Accumulator[Int] = sc.accumulator(0)
 

diff --git a/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala b/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.rdd.RDD
 
 class ImplicitOrderingSuite extends SparkFunSuite with LocalSparkContext {
   // Tests that PairRDDFunctions grabs an implicit Ordering in various cases where it should.
-  test("basic inference of Orderings"){
+  test("basic inference of Orderings") {
     sc = new SparkContext("local", "test")
     val rdd = sc.parallelize(1 to 10)
 

diff --git a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
@@ -476,7 +476,7 @@ class ApplicationCacheSuite extends SparkFunSuite with Logging with MockitoSugar
     when(request.getRequestURI()).thenReturn("http://localhost:18080/history/local-123/jobs/job/")
     when(request.getQueryString()).thenReturn("id=2")
     val resp = mock[HttpServletResponse]
-    when(resp.encodeRedirectURL(any())).thenAnswer(new Answer[String](){
+    when(resp.encodeRedirectURL(any())).thenAnswer(new Answer[String]() {
       override def answer(invocationOnMock: InvocationOnMock): String = {
         invocationOnMock.getArguments()(0).asInstanceOf[String]
       }

diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -58,7 +58,7 @@ object LocalFileLR {
     val ITERATIONS = args(1).toInt
 
     // Initialize w to a random value
-    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
+    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
     println("Initial w: " + w)
 
     for (i <- 1 to ITERATIONS) {

diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -41,7 +41,7 @@ object LocalKMeans {
 
   def generateData: Array[DenseVector[Double]] = {
     def generatePoint(i: Int): DenseVector[Double] = {
-      DenseVector.fill(D){rand.nextDouble * R}
+      DenseVector.fill(D) {rand.nextDouble * R}
     }
     Array.tabulate(N)(generatePoint)
   }

diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -41,7 +41,7 @@ object LocalLR {
   def generateData: Array[DataPoint] = {
     def generatePoint(i: Int): DataPoint = {
       val y = if (i % 2 == 0) -1 else 1
-      val x = DenseVector.fill(D){rand.nextGaussian + y * R}
+      val x = DenseVector.fill(D) {rand.nextGaussian + y * R}
       DataPoint(x, y)
     }
     Array.tabulate(N)(generatePoint)
@@ -62,7 +62,7 @@ object LocalLR {
 
     val data = generateData
     // Initialize w to a random value
-    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
+    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
     println("Initial w: " + w)
 
     for (i <- 1 to ITERATIONS) {

diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -78,7 +78,7 @@ object SparkHdfsLR {
     val ITERATIONS = args(1).toInt
 
     // Initialize w to a random value
-    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
+    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
     println("Initial w: " + w)
 
     for (i <- 1 to ITERATIONS) {

diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -46,7 +46,7 @@ object SparkLR {
   def generateData: Array[DataPoint] = {
     def generatePoint(i: Int): DataPoint = {
       val y = if (i % 2 == 0) -1 else 1
-      val x = DenseVector.fill(D){rand.nextGaussian + y * R}
+      val x = DenseVector.fill(D) {rand.nextGaussian + y * R}
       DataPoint(x, y)
     }
     Array.tabulate(N)(generatePoint)
@@ -71,7 +71,7 @@ object SparkLR {
     val points = sc.parallelize(generateData, numSlices).cache()
 
     // Initialize w to a random value
-    var w = DenseVector.fill(D){2 * rand.nextDouble - 1}
+    var w = DenseVector.fill(D) {2 * rand.nextDouble - 1}
     println("Initial w: " + w)
 
     for (i <- 1 to ITERATIONS) {

diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/ActorWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/ActorWordCount.scala
@@ -100,7 +100,7 @@ class SampleActorReceiver[T](urlOfPublisher: String) extends ActorReceiver {
 object FeederActor {
 
   def main(args: Array[String]) {
-    if (args.length < 2){
+    if (args.length < 2) {
       System.err.println("Usage: FeederActor <hostname> <port>\n")
       System.exit(1)
     }

diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
@@ -213,8 +213,8 @@ private[ann] object AffineLayerModel {
    */
   def randomWeights(numIn: Int, numOut: Int, seed: Long = 11L): (BDM[Double], BDV[Double]) = {
     val rand: XORShiftRandom = new XORShiftRandom(seed)
-    val weights = BDM.fill[Double](numOut, numIn){ (rand.nextDouble * 4.8 - 2.4) / numIn }
-    val bias = BDV.fill[Double](numOut){ (rand.nextDouble * 4.8 - 2.4) / numIn }
+    val weights = BDM.fill[Double](numOut, numIn) { (rand.nextDouble * 4.8 - 2.4) / numIn }
+    val bias = BDV.fill[Double](numOut) { (rand.nextDouble * 4.8 - 2.4) / numIn }
     (weights, bias)
   }
 }
@@ -529,7 +529,7 @@ private[ml] object FeedForwardTopology {
    */
   def multiLayerPerceptron(layerSizes: Array[Int], softmax: Boolean = true): FeedForwardTopology = {
     val layers = new Array[Layer]((layerSizes.length - 1) * 2)
-    for(i <- 0 until layerSizes.length - 1){
+    for(i <- 0 until layerSizes.length - 1) {
       layers(i * 2) = new AffineLayer(layerSizes(i), layerSizes(i + 1))
       layers(i * 2 + 1) =
         if (softmax && i == layerSizes.length - 2) {
@@ -655,7 +655,7 @@ private[ann] object FeedForwardModel {
     val layers = topology.layers
     val layerModels = new Array[LayerModel](layers.length)
     var offset = 0
-    for(i <- 0 until layers.length){
+    for(i <- 0 until layers.length) {
       layerModels(i) = layers(i).getInstance(seed)
       offset += layerModels(i).size
     }

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -145,7 +145,7 @@ class NaiveBayesModel private[ml] (
     case Multinomial => (None, None)
     case Bernoulli =>
       val negTheta = theta.map(value => math.log(1.0 - math.exp(value)))
-      val ones = new DenseVector(Array.fill(theta.numCols){1.0})
+      val ones = new DenseVector(Array.fill(theta.numCols) {1.0})
       val thetaMinusNegTheta = theta.map { value =>
         value - math.log(1.0 - math.exp(value))
       }

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -428,7 +428,7 @@ private[python] class PythonMLLibAPI extends Serializable {
       val weight = wt.toArray
       val mean = mu.map(_.asInstanceOf[DenseVector])
       val sigma = si.map(_.asInstanceOf[DenseMatrix])
-      val gaussians = Array.tabulate(weight.length){
+      val gaussians = Array.tabulate(weight.length) {
         i => new MultivariateGaussian(mean(i), sigma(i))
       }
       val model = new GaussianMixtureModel(weight, gaussians)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -74,7 +74,7 @@ class NaiveBayesModel private[spark] (
     case Multinomial => (None, None)
     case Bernoulli =>
       val negTheta = thetaMatrix.map(value => math.log(1.0 - math.exp(value)))
-      val ones = new DenseVector(Array.fill(thetaMatrix.numCols){1.0})
+      val ones = new DenseVector(Array.fill(thetaMatrix.numCols) {1.0})
       val thetaMinusNegTheta = thetaMatrix.map { value =>
         value - math.log(1.0 - math.exp(value))
       }

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -152,7 +152,7 @@ class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double]
    */
   @Since("1.2.0")
   lazy val microPrecision: Double = {
-    val sumFp = fpPerClass.foldLeft(0L){ case(cum, (_, fp)) => cum + fp}
+    val sumFp = fpPerClass.foldLeft(0L) { case(cum, (_, fp)) => cum + fp}
     sumTp.toDouble / (sumTp + sumFp)
   }
 
@@ -162,7 +162,7 @@ class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double]
    */
   @Since("1.2.0")
   lazy val microRecall: Double = {
-    val sumFn = fnPerClass.foldLeft(0.0){ case(cum, (_, fn)) => cum + fn}
+    val sumFn = fnPerClass.foldLeft(0.0) { case(cum, (_, fn)) => cum + fn}
     sumTp.toDouble / (sumTp + sumFn)
   }
 

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -420,7 +420,7 @@ private[spark] object BLAS extends Serializable with Logging {
     val AcolPtrs = A.colPtrs
 
     // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
-    if (A.isTransposed){
+    if (A.isTransposed) {
       var colCounterForB = 0
       if (!B.isTransposed) { // Expensive to put the check inside the loop
         while (colCounterForB < nB) {

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -136,7 +136,7 @@ class IsotonicRegressionModel @Since("1.3.0") (
     // higher than all values, in between two values or exact match.
     if (insertIndex == 0) {
       predictions.head
-    } else if (insertIndex == boundaries.length){
+    } else if (insertIndex == boundaries.length) {
       predictions.last
     } else if (foundIndex < 0) {
       linearInterpolation(

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
@@ -98,7 +98,7 @@ private[mllib] object NumericParser {
         }
       } else if (token == ")") {
         parsing = false
-      } else if (token.trim.isEmpty){
+      } else if (token.trim.isEmpty) {
           // ignore whitespaces between delim chars, e.g. ", ["
       } else {
         // expecting a number

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -105,7 +105,7 @@ class RandomForestClassifierSuite extends SparkFunSuite with MLlibTestSparkConte
     compareAPIs(rdd, rf, categoricalFeatures, numClasses)
   }
 
-  test("subsampling rate in RandomForest"){
+  test("subsampling rate in RandomForest") {
     val rdd = orderedLabeledPoints5_20
     val categoricalFeatures = Map.empty[Int, Int]
     val numClasses = 2

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
@@ -182,7 +182,7 @@ class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext {
       Vectors.dense( 4.5605), Vectors.dense( 5.2043), Vectors.dense( 6.2734)
     )
 
-    val data2: Array[Vector] = Array.tabulate(25){ i: Int =>
+    val data2: Array[Vector] = Array.tabulate(25) { i: Int =>
       Vectors.dense(Array.tabulate(50)(i + _.toDouble))
     }
 

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
@@ -197,7 +197,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
       featureSubsetStrategy = "sqrt", seed = 12345)
   }
 
-  test("subsampling rate in RandomForest"){
+  test("subsampling rate in RandomForest") {
     val arr = EnsembleTestHelper.generateOrderedLabeledPoints(5, 20)
     val rdd = sc.parallelize(arr)
     val strategy = new Strategy(algo = Classification, impurity = Gini, maxDepth = 2,

diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -169,7 +169,7 @@ class SparkILoop(
   }
 
 
-  private def sparkCleanUp(){
+  private def sparkCleanUp() {
     echo("Stopping spark context.")
     intp.beQuietDuring {
       command("sc.stop()")

diff --git a/scalastyle-config.xml b/scalastyle-config.xml
@@ -215,6 +215,14 @@ This file is divided into 3 sections:
     </parameters>
   </check>
 
+  <!-- SPARK-3854: Single Space between ')' and '{' -->
+  <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">\)\{</parameter></parameters>
+    <customMessage><![CDATA[
+      Single Space between ')' and `{`.
+    ]]></customMessage>
+  </check>
+
   <!-- ================================================================================ -->
   <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
   <!-- ================================================================================ -->

diff --git a/...atalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/...atalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -159,7 +159,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
         TypeCheckResult.TypeCheckFailure(
           s"Only foldable StringType expressions are allowed to appear at odd position , got :" +
             s" ${invalidNames.mkString(",")}")
-      } else if (!names.contains(null)){
+      } else if (!names.contains(null)) {
         TypeCheckResult.TypeCheckSuccess
       } else {
         TypeCheckResult.TypeCheckFailure("Field name should not be null")

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -806,14 +806,14 @@ case class Round(child: Expression, scale: Expression)
       case FloatType => // if child eval to NaN or Infinity, just return it.
         if (_scale == 0) {
           s"""
-            if (Float.isNaN(${ce.value}) || Float.isInfinite(${ce.value})){
+            if (Float.isNaN(${ce.value}) || Float.isInfinite(${ce.value})) {
               ${ev.value} = ${ce.value};
             } else {
               ${ev.value} = Math.round(${ce.value});
             }"""
         } else {
           s"""
-            if (Float.isNaN(${ce.value}) || Float.isInfinite(${ce.value})){
+            if (Float.isNaN(${ce.value}) || Float.isInfinite(${ce.value})) {
               ${ev.value} = ${ce.value};
             } else {
               ${ev.value} = java.math.BigDecimal.valueOf(${ce.value}).
@@ -823,14 +823,14 @@ case class Round(child: Expression, scale: Expression)
       case DoubleType => // if child eval to NaN or Infinity, just return it.
         if (_scale == 0) {
           s"""
-            if (Double.isNaN(${ce.value}) || Double.isInfinite(${ce.value})){
+            if (Double.isNaN(${ce.value}) || Double.isInfinite(${ce.value})) {
               ${ev.value} = ${ce.value};
             } else {
               ${ev.value} = Math.round(${ce.value});
             }"""
         } else {
           s"""
-            if (Double.isNaN(${ce.value}) || Double.isInfinite(${ce.value})){
+            if (Double.isNaN(${ce.value}) || Double.isInfinite(${ce.value})) {
               ${ev.value} = ${ce.value};
             } else {
               ${ev.value} = java.math.BigDecimal.valueOf(${ce.value}).

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
@@ -86,7 +86,7 @@ class RowTest extends FunSpec with Matchers {
       }
     }
 
-    it("getAs() on type extending AnyVal does not throw exception when value is null"){
+    it("getAs() on type extending AnyVal does not throw exception when value is null") {
       sampleRowWithoutCol3.getAs[String](sampleRowWithoutCol3.fieldIndex("col1")) shouldBe null
     }
   }

diff --git a/...catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/...catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -535,7 +535,7 @@ class FilterPushdownSuite extends PlanTest {
     // Filter("c" > 6)
     assertResult(classOf[Filter])(optimized.getClass)
     assertResult(1)(optimized.asInstanceOf[Filter].condition.references.size)
-    assertResult("c"){
+    assertResult("c") {
       optimized.asInstanceOf[Filter].condition.references.toSeq(0).name
     }
 

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -1147,7 +1147,7 @@ class DataFrame private[sql](
    * columns of the input row are implicitly joined with each value that is output by the function.
    *
    * {{{
-   *   df.explode("words", "word"){words: String => words.split(" ")}
+   *   df.explode("words", "word") {words: String => words.split(" ")}
    * }}}
    * @group dfops
    * @since 1.3.0

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -605,7 +605,7 @@ class Dataset[T] private[sql](
    * duplicate items.  As such, it is analogous to `UNION ALL` in SQL.
    * @since 1.6.0
    */
-  def union(other: Dataset[T]): Dataset[T] = withPlan[T](other){ (left, right) =>
+  def union(other: Dataset[T]): Dataset[T] = withPlan[T](other) { (left, right) =>
     // This breaks caching, but it's usually ok because it addresses a very specific use case:
     // using union to union many files or partitions.
     CombineUnions(Union(left, right))

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CompressionCodecs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CompressionCodecs.scala
@@ -57,7 +57,7 @@ private[datasources] object CompressionCodecs {
    * `codec` should be a full class path
    */
   def setCodecConfiguration(conf: Configuration, codec: String): Unit = {
-    if (codec != null){
+    if (codec != null) {
       conf.set("mapreduce.output.fileoutputformat.compress", "true")
       conf.set("mapreduce.output.fileoutputformat.compress.type", CompressionType.BLOCK.toString)
       conf.set("mapreduce.output.fileoutputformat.compress.codec", codec)
-Original file line number
+Diff line change
@@ Expand Up / @@ -86,7 +86,7 @@ class RowTest extends FunSpec with Matchers { @@
           }
         }
-        it("getAs() on type extending AnyVal does not throw exception when value is null"){
+        it("getAs() on type extending AnyVal does not throw exception when value is null") {
           sampleRowWithoutCol3.getAs[String](sampleRowWithoutCol3.fieldIndex("col1")) shouldBe null
         }
       }
@@ Expand Down @@