I am trying to create an assembly jar executable file But getting the following error
Caused by: java.lang.ClassNotFoundException: csv.DefaultSource
The problem is with the CSV file read. The code is working fine in the IDE. Please help me
Scala code is below
package extendedtable
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import scala.collection.mutable.ListBuffer
object mainObject {
// var read = new fileRead
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder().appName("generationobj").master("local[*]").config("spark.sql.crossJoin.enabled", value = true).getOrCreate()
val sc: SparkContext = spark.sparkContext
import spark.implicits._
val atomData = spark.read.format("csv")
.option("header", "true")
.option("inferSchema", "true")
.load("Resources/atom.csv")
val moleculeData = spark.read.format("csv")
.option("header", "true")
.option("inferSchema", "true")
.load("Resources/molecule.csv")
val df = moleculeData.join(atomData,"molecule_id")
val molecule_df = moleculeData
val mid: List[Row] = molecule_df.select("molecule_id").collect.toList
var listofmoleculeid: List[String] = mid.map(r => r.getString(0))
// print(listofmoleculeid)
newDF.createTempView("table")
newDF.show()}
Following is the build File
name := "ExtendedTable"
version := "0.1"
scalaVersion := "2.11.12"
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.3.0"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.3.0"
libraryDependencies += "org.apache.spark" %% "spark-mllib" % "2.3.0"
mainClass := Some("extendedtable.mainObject")
assemblyMergeStrategy in assembly := {
case PathList("META-INF", xs @ _*) => MergeStrategy.discard
case x => MergeStrategy.first
}