tblA:
100,Raja,55
101,Ravi,44
102,Kumar,33
104,Siva,22
105,Malar,32
tblB:
100,Raja,55
102,Kumar,33
105,Malar,32
scala> val dfA = spark.read.format("csv").option("inferSchema","True").option("header","True").load("D:\\Ex\\My\\tblA.txt")
dfA: org.apache.spark.sql.DataFrame = [Id: int, Name: string ... 1 more field]
scala> dfA.printSchema()
root
|-- Id: integer (nullable = true)
|-- Name: string (nullable = true)
|-- Age: integer (nullable = true)
scala> dfA.show()
+---+-----+---+
| Id| Name|Age|
+---+-----+---+
|100| Raja| 55|
|101| Ravi| 44|
|102|Kumar| 33|
|104| Siva| 22|
|105|Malar| 32|
+---+-----+---+
scala> val dfB = spark.read.format("csv").option("inferSchema","True").option("header","True").load("D:\\Ex\\My\\tblB.txt")
dfB: org.apache.spark.sql.DataFrame = [Id: int, Name: string ... 1 more field]
scala> dfB.printSchema()
root
|-- Id: integer (nullable = true)
|-- Name: string (nullable = true)
|-- Age: integer (nullable = true)
scala> dfB.show()
+---+-----+---+
| Id| Name|Age|
+---+-----+---+
|100| Raja| 55|
|102|Kumar| 33|
|105|Malar| 32|
+---+-----+---+
// Here is the answer
scala> dfA.exceptAll(dfB).show()
+---+----+---+
| Id|Name|Age|
+---+----+---+
|101|Ravi| 44|
|104|Siva| 22|
+---+----+---+
scala> dfB.exceptAll(dfA).show()
+---+----+---+
| Id|Name|Age|
+---+----+---+
+---+----+---+
//One more solution
scala> dfA.unionAll(dfB).except(dfA.intersect(dfB)).show()
warning: there was one deprecation warning; re-run with -deprecation for details
+---+----+---+
| Id|Name|Age|
+---+----+---+
|101|Ravi| 44|
|104|Siva| 22|
+---+----+---+