Skip to content

Commit

Permalink
Add the dropColumns DataFrame extension
Browse files Browse the repository at this point in the history
  • Loading branch information
MrPowers committed Nov 30, 2018
1 parent e856a5e commit 1f846ba
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,9 @@ object DataFrameExt {
)
)

def dropColumns(f: String => Boolean): DataFrame =
df.columns.foldLeft(df)((tempDf, c) => if (f(c)) tempDf.drop(c) else tempDf)

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -1153,6 +1153,48 @@ object DataFrameExtTest extends TestSuite with DataFrameComparer with SparkSessi

}

'dropColumns - {

"drop columns which start with underscore" - {

val df = spark
.createDF(
List(
("John", 1, 101),
("Paul", 2, 102),
("Jane", 3, 103)
),
List(
("name", StringType, true),
("id", IntegerType, true),
("_internal_id", IntegerType, true)
)
)
.dropColumns(_.startsWith("_"))

val expectedDF = spark
.createDF(
List(
("John", 1),
("Paul", 2),
("Jane", 3)
),
List(
("name", StringType, true),
("id", IntegerType, true)
)
)
assert(df.columns.toSet == expectedDF.columns.toSet)
assertSmallDataFrameEquality(
df,
expectedDF,
orderedComparison = false
)

}

}

}

}

2 comments on commit 1f846ba

@MrPowers
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gorros - I added your dropColumns method in this commit. I had to think about this code a bit and determined this was a good addition to the library after some reflection. Thank you for the valuable contributions - your PRs are always welcome here!

@snithish FYI

@gorros
Copy link
Collaborator

@gorros gorros commented on 1f846ba Nov 30, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MrPowers Thanks!

Please sign in to comment.