SBT Assembly with Multiple Jars

45 Views Asked by At

I have a spark repo that I run on EMR. I use sbt-assembly to build a fat JAR for this. I'd like to start doing some testing on my local, but there is a mismatch in dependencies (hadoop dependency shades EMR's JARs when running in cluster mode), making it impossible to use the same JAR for both. I'm wondering if there's a way for me to produce multiple jars using the same build.sbt file. My first shot is below, but it doesn't generate multiple JARs in the output directory. I assume this is doable with SBT, but I'm not sure how.

ThisBuild / version := "0.1.0"

ThisBuild / scalaVersion := "2.12.16"

ThisBuild / javaOptions := Seq("-Xms512M", "-Xmx1024M", "-Xss1M", "-XX:+UseG1GC")

ThisBuild / assemblyMergeStrategy := {
  case PathList("org", "aopalliance", xs@_*) => MergeStrategy.last
  case PathList("javax", "inject", xs@_*) => MergeStrategy.last
  case PathList("javax", "servlet", xs@_*) => MergeStrategy.last
  case PathList("javax", "activation", xs@_*) => MergeStrategy.last
  case PathList("org", "apache", xs@_*) => MergeStrategy.last
  case PathList("com", "google", xs@_*) => MergeStrategy.last
  case PathList("com", "esotericsoftware", xs@_*) => MergeStrategy.last
  case PathList("com", "codahale", xs@_*) => MergeStrategy.last
  case PathList("com", "yammer", xs@_*) => MergeStrategy.last
  case PathList("META-INF", xs@_*) => MergeStrategy.discard
  case "about.html" => MergeStrategy.rename
  case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
  case "META-INF/mailcap" => MergeStrategy.last
  case "META-INF/mimetypes.default" => MergeStrategy.last
  case "plugin.properties" => MergeStrategy.last
  case "log4j.properties" => MergeStrategy.last
  case x if x.endsWith(".properties") => MergeStrategy.last
  case x => MergeStrategy.first
}

lazy val commonSettings = Seq(
  libraryDependencies ++= Seq(
    "org.apache.spark" %% "spark-sql" % "3.2.1" % Provided,
    "com.amazonaws" % "aws-java-sdk" % "1.12.170",
    "com.datastax.spark" %% "spark-cassandra-connector" % "3.2.0",
    "org.postgresql" % "postgresql" % "42.5.0",
    "com.typesafe.play" %% "play-json" % "2.8.2",
    "com.github.jnr" % "jnr-posix" % "3.1.16",
    "org.postgresql" % "postgresql" % "42.6.0",
    "com.google.guava" % "guava" % "31.1-jre",
    "org.apache.httpcomponents" % "httpcore" % "4.4.16",
    "org.scalatest" %% "scalatest" % "3.2.15" % Test,
    "org.mockito" %% "mockito-scala" % "1.17.12" % Test
  )
)

lazy val root = (project in file("."))
  .settings(commonSettings: _*)
  .settings(
    name := "admin-precompute",
    libraryDependencies ++= Seq(
      "org.apache.hadoop" % "hadoop-common" % "3.2.2" % Provided,
      "org.apache.hadoop" % "hadoop-client" % "3.2.2" % Provided,
      "org.apache.hadoop" % "hadoop-aws" % "3.2.2" % Provided
    ),
    assembly / assemblyOption := (assembly / assemblyOption).value.withIncludeScala(false),
    assembly / assemblyJarName := s"admin-precompute-assembly.jar",
  )

lazy val local = (project in file("."))
  .settings(commonSettings: _*)
  .settings(
    name := "admin-precompute-local",
    libraryDependencies ++= Seq(
      "org.apache.hadoop" % "hadoop-common" % "3.2.2",
      "org.apache.hadoop" % "hadoop-client" % "3.2.2",
      "org.apache.hadoop" % "hadoop-aws" % "3.2.2"
    ),
    assembly / assemblyOption := (assembly / assemblyOption).value.withIncludeScala(true),
    assembly / assemblyJarName := s"admin-precompute-assembly-local.jar",
  )

Thanks!

0

There are 0 best solutions below