From 49b9ea439f57d0198f2052754abacff70acc3998 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Sat, 14 Oct 2017 16:36:14 -0700 Subject: [PATCH] tools --- bin/totgen.sh | 3 ++ build.sbt | 16 ++----- linter.sh | 14 ++++++ project/GenClasspathPlugin.scala | 78 ++++++++++++++++++++++++++++++++ project/LibDeps.scala | 26 +++++++++++ prospector.yaml | 39 ++++++++++++++++ 6 files changed, 163 insertions(+), 13 deletions(-) create mode 100755 linter.sh create mode 100644 project/GenClasspathPlugin.scala create mode 100644 project/LibDeps.scala create mode 100644 prospector.yaml diff --git a/bin/totgen.sh b/bin/totgen.sh index 14bbd81e..06c7b454 100755 --- a/bin/totgen.sh +++ b/bin/totgen.sh @@ -162,7 +162,10 @@ exec "${_spark_shell}" \ --master "local[4]" \ --conf spark.app.name="[drgscl]::spark-shell" \ --conf spark.eventLog.enabled=false \ + --conf spark.driver.memory=10g \ + --conf spark.executor.memory=10g \ --jars "${_submit_jars}" \ + --verbose \ $@ _EXEC_SCRIPT_EOF_ diff --git a/build.sbt b/build.sbt index 95bc6043..bc04b090 100644 --- a/build.sbt +++ b/build.sbt @@ -1,16 +1,6 @@ // Your sbt build file. Guides on how to write one can be found at // http://www.scala-sbt.org/0.13/docs/index.html - -val sparkVer = sys.props.getOrElse("spark.version", "2.1.1") -val sparkBranch = sparkVer.substring(0, 3) -val defaultScalaVer = sparkBranch match { - case "2.0" => "2.11.8" - case "2.1" => "2.11.8" - case "2.2" => "2.11.8" - case _ => throw new IllegalArgumentException(s"Unsupported Spark version: $sparkVer.") -} -val scalaVer = sys.props.getOrElse("scala.version", defaultScalaVer) -val scalaMajorVersion = scalaVer.substring(0, scalaVer.indexOf(".", scalaVer.indexOf(".") + 1)) +import libdeps.LibVers._ sparkVersion := sparkVer @@ -19,7 +9,7 @@ scalaVersion := scalaVer spName := "databricks/spark-deep-learning" // Don't forget to set the version -version := s"0.1.0-spark$sparkBranch" +version := s"0.2.0-spark$sparkBranch" // All Spark Packages need a license licenses := Seq("Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0")) @@ -34,7 +24,7 @@ sparkComponents ++= Seq("mllib-local", "mllib", "sql") // add any Spark Package dependencies using spDependencies. // e.g. spDependencies += "databricks/spark-avro:0.1" -spDependencies += s"databricks/tensorframes:0.2.9-s_${scalaMajorVersion}" +spDependencies += s"databricks/tensorframes:0.2.9-s_${scalaMajorVer}" // These versions are ancient, but they cross-compile around scala 2.10 and 2.11. // Update them when dropping support for scala 2.10 diff --git a/linter.sh b/linter.sh new file mode 100755 index 00000000..1e51a7c6 --- /dev/null +++ b/linter.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +_bsd_="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if [[ $# -gt 1 ]]; then + target_files=(${@}) +else + target_files=($(git diff --name-only upstream/master HEAD)) +fi + +echo "${target_files[@]}" +pushd "${_bsd_}" +exec prospector --profile ${_bsd_}/prospector.yaml "${target_files[@]}" +popd diff --git a/project/GenClasspathPlugin.scala b/project/GenClasspathPlugin.scala new file mode 100644 index 00000000..4f9ecb3d --- /dev/null +++ b/project/GenClasspathPlugin.scala @@ -0,0 +1,78 @@ +package sbtgenclasspath + +import sbt._, Keys._ +import sbtsparkpackage.SparkPackagePlugin.autoImport._ +import libdeps.LibVers._ + +object GenClasspathPlugin extends sbt.AutoPlugin { + + object autoImport { + + lazy val genClasspath = taskKey[Unit]("Build runnable script with classpath") + lazy val extraSparkSubmitModules = settingKey[Seq[ModuleID]]("Additional spark submit jar dependencies") + + lazy val genClasspathSettings: Seq[Def.Setting[_]] = Seq( + + extraSparkSubmitModules := Seq.empty[ModuleID], + + genClasspath := { + import java.io.PrintWriter + + val sbtPathRoot = baseDirectory.value / ".sbt.paths" + sbtPathRoot.mkdirs() + + def writeClasspath(cpType: String)(R: => String): Unit = { + val fout = new PrintWriter((sbtPathRoot / s"SBT_${cpType}_CLASSPATH").toString) + println(s"Building ${cpType} classpath for current project") + try fout.write(R) finally fout.close() + } + + writeClasspath("RUNTIME") { + (fullClasspath in Runtime).value.files.map(_.toString).mkString(":") + } + + writeClasspath("SPARK_PACKAGE") { + import scala.util.matching.Regex + val patt = s"(.+?)/(.+?):(.+?)(-s_${scalaMajorVer})?".r + val pkgs = (spDependencies.value).map { _ match { + case patt(orgName, pkgName, pkgVer, stem, _*) => + if (null != stem) { + println(s"org ${orgName}, pkg ${pkgName}, ver ${pkgVer}, ${stem}") + s"${pkgName}-${pkgVer}${stem}.jar" + } else { + println(s"org ${orgName}, pkg ${pkgName}, ver ${pkgVer}") + s"${pkgName}-${pkgVer}.jar" + } + }}.toSet + + // TODO: not knowing the proper way, I just fall back to Regex + val extraSpModIds = (extraSparkSubmitModules in Compile).value.flatMap { mod => + //"com.typesafe.scala-logging:scala-logging-api:2.1.2" + // scala-logging-api_2.11-2.1.2.jar + val patt = s"(.+?):(.+?):(.+?)".r + mod.toString match { + case patt(orgName, pkgName, pkgVer) => + Seq(s"${pkgName}_${scalaMajorVer}-${pkgVer}.jar", s"${pkgName}-${pkgVer}.jar") + } + }.toSet + + (fullClasspath in Compile).value.files.filter { cpFile => + val cpName = cpFile.getName + println(cpName) + (pkgs contains cpName) || (extraSpModIds contains cpName) + }.map(_.toString).mkString(":") + } + } + ) + } + import autoImport._ + + override def requires = sbt.plugins.JvmPlugin + + // This plugin is automatically enabled for projects which are JvmPlugin. + override def trigger = allRequirements + + // a group of settings that are automatically added to projects. + override val projectSettings = + inConfig(Compile)(genClasspathSettings) ++ inConfig(Test)(genClasspathSettings) +} diff --git a/project/LibDeps.scala b/project/LibDeps.scala new file mode 100644 index 00000000..93b578c3 --- /dev/null +++ b/project/LibDeps.scala @@ -0,0 +1,26 @@ +package libdeps + +/** + ====================================================== + * Build parameters + ====================================================== + */ +object LibVers { + + lazy val sparkVer = sys.props.getOrElse("spark.version", "2.2.0") + lazy val sparkBranch = sparkVer.substring(0, 3) + lazy val defaultScalaVer = sparkBranch match { + case "2.0" => "2.11.8" + case "2.1" => "2.11.8" + case "2.2" => "2.11.8" + case _ => throw new IllegalArgumentException(s"Unsupported Spark version: $sparkVer.") + } + + lazy val scalaVer = sys.props.getOrElse("scala.version", defaultScalaVer) + lazy val scalaMajorVer = scalaVer.substring(0, scalaVer.indexOf(".", scalaVer.indexOf(".") + 1)) + + lazy val defaultScalaTestVer = scalaVer match { + case s if s.startsWith("2.10") => "2.0" + case s if s.startsWith("2.11") => "2.2.6" // scalatest_2.11 does not have 2.0 published + } +} diff --git a/prospector.yaml b/prospector.yaml new file mode 100644 index 00000000..d8ffa3c7 --- /dev/null +++ b/prospector.yaml @@ -0,0 +1,39 @@ +strictness: high +test-warnings: True +doc-warnings: false + +ignore-paths: + - docs + - spark-warehouse + - cover + +max-line-length: 100 + +pep8: + run: true + disable: + - N802 + - N803 + - N806 + - E302 + +pylint: + run: true + disable: + - too-many-instance-attributes + - cyclic-import + - len-as-condition + - invalid-name + - no-else-return + - no-self-use + - import-error + - protected-access + - reimported + +mccabe: + disable: + - MC0001 + +pyroma: + run: true + \ No newline at end of file