From 1f846ba7b5d27959c67b52df7ab2ef9529dd3956 Mon Sep 17 00:00:00 2001 From: MrPowers Date: Thu, 29 Nov 2018 16:40:20 -0800 Subject: [PATCH] Add the dropColumns DataFrame extension --- .../spark/daria/sql/DataFrameExt.scala | 3 ++ .../spark/daria/sql/DataFrameExtTest.scala | 42 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/src/main/scala/com/github/mrpowers/spark/daria/sql/DataFrameExt.scala b/src/main/scala/com/github/mrpowers/spark/daria/sql/DataFrameExt.scala index 527e4ca9..499ee73c 100644 --- a/src/main/scala/com/github/mrpowers/spark/daria/sql/DataFrameExt.scala +++ b/src/main/scala/com/github/mrpowers/spark/daria/sql/DataFrameExt.scala @@ -290,6 +290,9 @@ object DataFrameExt { ) ) + def dropColumns(f: String => Boolean): DataFrame = + df.columns.foldLeft(df)((tempDf, c) => if (f(c)) tempDf.drop(c) else tempDf) + } } diff --git a/src/test/scala/com/github/mrpowers/spark/daria/sql/DataFrameExtTest.scala b/src/test/scala/com/github/mrpowers/spark/daria/sql/DataFrameExtTest.scala index 30c42d84..40ef21ad 100644 --- a/src/test/scala/com/github/mrpowers/spark/daria/sql/DataFrameExtTest.scala +++ b/src/test/scala/com/github/mrpowers/spark/daria/sql/DataFrameExtTest.scala @@ -1153,6 +1153,48 @@ object DataFrameExtTest extends TestSuite with DataFrameComparer with SparkSessi } + 'dropColumns - { + + "drop columns which start with underscore" - { + + val df = spark + .createDF( + List( + ("John", 1, 101), + ("Paul", 2, 102), + ("Jane", 3, 103) + ), + List( + ("name", StringType, true), + ("id", IntegerType, true), + ("_internal_id", IntegerType, true) + ) + ) + .dropColumns(_.startsWith("_")) + + val expectedDF = spark + .createDF( + List( + ("John", 1), + ("Paul", 2), + ("Jane", 3) + ), + List( + ("name", StringType, true), + ("id", IntegerType, true) + ) + ) + assert(df.columns.toSet == expectedDF.columns.toSet) + assertSmallDataFrameEquality( + df, + expectedDF, + orderedComparison = false + ) + + } + + } + } }