0

I'm trying to do a left-outer join below. The code performs outer join using only one column 'ID'. can you please help me alter the code to include two more columns: 'date' and 'location' in the join condition? Thank you.

import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.SparkSession
val conf = 
SparkSession.builder.master("local").appName("testing").enableHiveSupport().getOrCreate()
//val spark: 
SparkSession.builder.master("local").appName("testing").enableHiveSupport().getOrCreate()
import spark.implicits._



def getRelevantSegmentInfo(tableName: String, segmentName: String, pocs: Seq[String])(implicit
    spark: SparkSession
): DataFrame = {
  spark
    .table(tableName)
   .select(segmentName, "ID")
}

val firstDF: DataFrame = getRelevantSegmentInfo(result_as_sequence.head.tableName, 
result_as_sequence.head.segmentName,result_as_sequence.head.pocs)(spark)

val finalDF = result_as_sequence.tail.foldLeft(firstDF) {
  case (leftDF, segmentStruct) =>
    leftDF.join(
      getRelevantSegmentInfo(
        segmentStruct.tableName,
        segmentStruct.segmentName,
        segmentStruct.pocs
      )(spark),
      Seq("ID"),
      "left_outer"
    )
}
2

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.