I have a spark repo that I run on EMR. I use sbt-assembly to build a fat JAR for this. I'd like to start doing some testing on my local, but there is a mismatch in dependencies (hadoop dependency shades EMR's JARs when running in cluster mode), making it impossible to use the same JAR for both. I'm wondering if there's a way for me to produce multiple jars using the same build.sbt file. My first shot is below, but it doesn't generate multiple JARs in the output directory. I assume this is doable with SBT, but I'm not sure how.
ThisBuild / version := "0.1.0"
ThisBuild / scalaVersion := "2.12.16"
ThisBuild / javaOptions := Seq("-Xms512M", "-Xmx1024M", "-Xss1M", "-XX:+UseG1GC")
ThisBuild / assemblyMergeStrategy := {
case PathList("org", "aopalliance", xs@_*) => MergeStrategy.last
case PathList("javax", "inject", xs@_*) => MergeStrategy.last
case PathList("javax", "servlet", xs@_*) => MergeStrategy.last
case PathList("javax", "activation", xs@_*) => MergeStrategy.last
case PathList("org", "apache", xs@_*) => MergeStrategy.last
case PathList("com", "google", xs@_*) => MergeStrategy.last
case PathList("com", "esotericsoftware", xs@_*) => MergeStrategy.last
case PathList("com", "codahale", xs@_*) => MergeStrategy.last
case PathList("com", "yammer", xs@_*) => MergeStrategy.last
case PathList("META-INF", xs@_*) => MergeStrategy.discard
case "about.html" => MergeStrategy.rename
case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
case "META-INF/mailcap" => MergeStrategy.last
case "META-INF/mimetypes.default" => MergeStrategy.last
case "plugin.properties" => MergeStrategy.last
case "log4j.properties" => MergeStrategy.last
case x if x.endsWith(".properties") => MergeStrategy.last
case x => MergeStrategy.first
}
lazy val commonSettings = Seq(
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-sql" % "3.2.1" % Provided,
"com.amazonaws" % "aws-java-sdk" % "1.12.170",
"com.datastax.spark" %% "spark-cassandra-connector" % "3.2.0",
"org.postgresql" % "postgresql" % "42.5.0",
"com.typesafe.play" %% "play-json" % "2.8.2",
"com.github.jnr" % "jnr-posix" % "3.1.16",
"org.postgresql" % "postgresql" % "42.6.0",
"com.google.guava" % "guava" % "31.1-jre",
"org.apache.httpcomponents" % "httpcore" % "4.4.16",
"org.scalatest" %% "scalatest" % "3.2.15" % Test,
"org.mockito" %% "mockito-scala" % "1.17.12" % Test
)
)
lazy val root = (project in file("."))
.settings(commonSettings: _*)
.settings(
name := "admin-precompute",
libraryDependencies ++= Seq(
"org.apache.hadoop" % "hadoop-common" % "3.2.2" % Provided,
"org.apache.hadoop" % "hadoop-client" % "3.2.2" % Provided,
"org.apache.hadoop" % "hadoop-aws" % "3.2.2" % Provided
),
assembly / assemblyOption := (assembly / assemblyOption).value.withIncludeScala(false),
assembly / assemblyJarName := s"admin-precompute-assembly.jar",
)
lazy val local = (project in file("."))
.settings(commonSettings: _*)
.settings(
name := "admin-precompute-local",
libraryDependencies ++= Seq(
"org.apache.hadoop" % "hadoop-common" % "3.2.2",
"org.apache.hadoop" % "hadoop-client" % "3.2.2",
"org.apache.hadoop" % "hadoop-aws" % "3.2.2"
),
assembly / assemblyOption := (assembly / assemblyOption).value.withIncludeScala(true),
assembly / assemblyJarName := s"admin-precompute-assembly-local.jar",
)
Thanks!