team.csv:
---------
id,fname,lname,designation,technology,salary
100,Sankara,Narayanan,Developer,Spark,50000
101,Ramesh,Kumar,Tester,Android,40000
102,Ganapathy,Govindan,Tester,Android,25000
103,Raja,Mani,Developer,Spark,50000
104,Pavithra,Lokesh,Developer,iOS,20000
105,Kamala,Kamesh,Developer,iOS,98000
106,Arun,Pandian,Tester,Spark,25000
107,Praveen,Mani,Analyst,iOS,50000
108,Anbu,Sudha,Tester,iOS,12000
109,Nagarethinam,Dhanukkodi,Tester,Node JS,20000
110,Veeraiah,Dhanukkodi,Analyst,iOS,40000
111,Arivu,Madhi,Developer,Spark,50000
112,Muthu,Kumar,Analyst,Node JS,45000
113,Sikkandar,Bhai,Developer,Spark,40000
114,Salman,Khan,Analyst,Spark,67000
115,Donald,Trump,Analyst,Android,45000
116,Anitha,Kuppusamy,Developer,Android,45000
117,Pushpavanam,Kuppusamy,Analyst,Android,38000
118,Latha,Rajinikanth,Tester,Node JS,45000
119,Aishwarya,Dhanush,Developer,Node JS,69000
120,Vijay,Kumar,Developer,Node JS,50000
/FileStore/tables/team.csv
val df = spark.read.format("csv").option("inferSchema","true").option("header",true).load("/FileStore/tables/team.csv")
df.printSchema()
root
|-- id: integer (nullable = true)
|-- fname: string (nullable = true)
|-- lname: string (nullable = true)
|-- designation: string (nullable = true)
|-- technology: string (nullable = true)
|-- salary: integer (nullable = true)
df.show()
+---+------------+-----------+-----------+----------+------+
| id| fname| lname|designation|technology|salary|
+---+------------+-----------+-----------+----------+------+
|100| Sankara| Narayanan| Developer| Spark| 50000|
|101| Ramesh| Kumar| Tester| Android| 40000|
|102| Ganapathy| Govindan| Tester| Android| 25000|
|103| Raja| Mani| Developer| Spark| 50000|
|104| Pavithra| Lokesh| Developer| iOS| 20000|
|105| Kamala| Kamesh| Developer| iOS| 98000|
|106| Arun| Pandian| Tester| Spark| 25000|
|107| Praveen| Mani| Analyst| iOS| 50000|
|108| Anbu| Sudha| Tester| iOS| 12000|
|109|Nagarethinam| Dhanukkodi| Tester| Node JS| 20000|
|110| Veeraiah| Dhanukkodi| Analyst| iOS| 40000|
|111| Arivu| Madhi| Developer| Spark| 50000|
|112| Muthu| Kumar| Analyst| Node JS| 45000|
|113| Sikkandar| Bhai| Developer| Spark| 40000|
|114| Salman| Khan| Analyst| Spark| 67000|
|115| Donald| Trump| Analyst| Android| 45000|
|116| Anitha| Kuppusamy| Developer| Android| 45000|
|117| Pushpavanam| Kuppusamy| Analyst| Android| 38000|
|118| Latha|Rajinikanth| Tester| Node JS| 45000|
|119| Aishwarya| Dhanush| Developer| Node JS| 69000|
|120| Vijay| Kumar| Developer| Node JS| 50000|
+---+------------+-----------+-----------+----------+------+
df.createOrReplaceTempView("teams")
spark.sql("select * from teams").show(21)
+---+------------+-----------+-----------+----------+------+
| id| fname| lname|designation|technology|salary|
+---+------------+-----------+-----------+----------+------+
|100| Sankara| Narayanan| Developer| Spark| 50000|
|101| Ramesh| Kumar| Tester| Android| 40000|
|102| Ganapathy| Govindan| Tester| Android| 25000|
|103| Raja| Mani| Developer| Spark| 50000|
|104| Pavithra| Lokesh| Developer| iOS| 20000|
|105| Kamala| Kamesh| Developer| iOS| 98000|
|106| Arun| Pandian| Tester| Spark| 25000|
|107| Praveen| Mani| Analyst| iOS| 50000|
|108| Anbu| Sudha| Tester| iOS| 12000|
|109|Nagarethinam| Dhanukkodi| Tester| Node JS| 20000|
|110| Veeraiah| Dhanukkodi| Analyst| iOS| 40000|
|111| Arivu| Madhi| Developer| Spark| 50000|
|112| Muthu| Kumar| Analyst| Node JS| 45000|
|113| Sikkandar| Bhai| Developer| Spark| 40000|
|114| Salman| Khan| Analyst| Spark| 67000|
|115| Donald| Trump| Analyst| Android| 45000|
|116| Anitha| Kuppusamy| Developer| Android| 45000|
|117| Pushpavanam| Kuppusamy| Analyst| Android| 38000|
|118| Latha|Rajinikanth| Tester| Node JS| 45000|
|119| Aishwarya| Dhanush| Developer| Node JS| 69000|
|120| Vijay| Kumar| Developer| Node JS| 50000|
+---+------------+-----------+-----------+----------+------+
// ROW_NUMBER -- to add row number sequence
spark.sql("SELECT id,fname,lname,technology,designation,salary, ROW_NUMBER() OVER (PARTITION BY designation ORDER BY salary desc) AS row_num FROM teams").show(21)
+---+------------+-----------+----------+-----------+------+-------+
| id| fname| lname|technology|designation|salary|row_num|
+---+------------+-----------+----------+-----------+------+-------+
|118| Latha|Rajinikanth| Node JS| Tester| 45000| 1|
|101| Ramesh| Kumar| Android| Tester| 40000| 2|
|102| Ganapathy| Govindan| Android| Tester| 25000| 3|
|106| Arun| Pandian| Spark| Tester| 25000| 4|
|109|Nagarethinam| Dhanukkodi| Node JS| Tester| 20000| 5|
|108| Anbu| Sudha| iOS| Tester| 12000| 6|
|105| Kamala| Kamesh| iOS| Developer| 98000| 1|
|119| Aishwarya| Dhanush| Node JS| Developer| 69000| 2|
|100| Sankara| Narayanan| Spark| Developer| 50000| 3|
|103| Raja| Mani| Spark| Developer| 50000| 4|
|111| Arivu| Madhi| Spark| Developer| 50000| 5|
|120| Vijay| Kumar| Node JS| Developer| 50000| 6|
|116| Anitha| Kuppusamy| Android| Developer| 45000| 7|
|113| Sikkandar| Bhai| Spark| Developer| 40000| 8|
|104| Pavithra| Lokesh| iOS| Developer| 20000| 9|
|114| Salman| Khan| Spark| Analyst| 67000| 1|
|107| Praveen| Mani| iOS| Analyst| 50000| 2|
|112| Muthu| Kumar| Node JS| Analyst| 45000| 3|
|115| Donald| Trump| Android| Analyst| 45000| 4|
|110| Veeraiah| Dhanukkodi| iOS| Analyst| 40000| 5|
|117| Pushpavanam| Kuppusamy| Android| Analyst| 38000| 6|
+---+------------+-----------+----------+-----------+------+-------+
// ROW_NUMBER()
spark.sql("SELECT id,fname,lname,designation,technology,salary, ROW_NUMBER() OVER (PARTITION BY technology ORDER BY salary desc) AS row_num FROM teams").show(21)
+---+------------+-----------+-----------+----------+------+-------+
| id| fname| lname|designation|technology|salary|row_num|
+---+------------+-----------+-----------+----------+------+-------+
|105| Kamala| Kamesh| Developer| iOS| 98000| 1|
|107| Praveen| Mani| Analyst| iOS| 50000| 2|
|110| Veeraiah| Dhanukkodi| Analyst| iOS| 40000| 3|
|104| Pavithra| Lokesh| Developer| iOS| 20000| 4|
|108| Anbu| Sudha| Tester| iOS| 12000| 5|
|119| Aishwarya| Dhanush| Developer| Node JS| 69000| 1|
|120| Vijay| Kumar| Developer| Node JS| 50000| 2|
|112| Muthu| Kumar| Analyst| Node JS| 45000| 3|
|118| Latha|Rajinikanth| Tester| Node JS| 45000| 4|
|109|Nagarethinam| Dhanukkodi| Tester| Node JS| 20000| 5|
|115| Donald| Trump| Analyst| Android| 45000| 1|
|116| Anitha| Kuppusamy| Developer| Android| 45000| 2|
|101| Ramesh| Kumar| Tester| Android| 40000| 3|
|117| Pushpavanam| Kuppusamy| Analyst| Android| 38000| 4|
|102| Ganapathy| Govindan| Tester| Android| 25000| 5|
|114| Salman| Khan| Analyst| Spark| 67000| 1|
|100| Sankara| Narayanan| Developer| Spark| 50000| 2|
|103| Raja| Mani| Developer| Spark| 50000| 3|
|111| Arivu| Madhi| Developer| Spark| 50000| 4|
|113| Sikkandar| Bhai| Developer| Spark| 40000| 5|
|106| Arun| Pandian| Tester| Spark| 25000| 6|
+---+------------+-----------+-----------+----------+------+-------+
// RANK()
spark.sql("SELECT id,fname,lname,technology,designation,salary, RANK() OVER (PARTITION BY designation ORDER BY salary desc) AS rank FROM teams").show(21)
+---+------------+-----------+----------+-----------+------+----+
| id| fname| lname|technology|designation|salary|rank|
+---+------------+-----------+----------+-----------+------+----+
|118| Latha|Rajinikanth| Node JS| Tester| 45000| 1|
|101| Ramesh| Kumar| Android| Tester| 40000| 2|
|102| Ganapathy| Govindan| Android| Tester| 25000| 3|
|106| Arun| Pandian| Spark| Tester| 25000| 3| -- 4 skipped
|109|Nagarethinam| Dhanukkodi| Node JS| Tester| 20000| 5|
|108| Anbu| Sudha| iOS| Tester| 12000| 6|
|105| Kamala| Kamesh| iOS| Developer| 98000| 1|
|119| Aishwarya| Dhanush| Node JS| Developer| 69000| 2|
|100| Sankara| Narayanan| Spark| Developer| 50000| 3|
|103| Raja| Mani| Spark| Developer| 50000| 3|
|111| Arivu| Madhi| Spark| Developer| 50000| 3|
|120| Vijay| Kumar| Node JS| Developer| 50000| 3|
|116| Anitha| Kuppusamy| Android| Developer| 45000| 7| -- 4,5,6 skipped
|113| Sikkandar| Bhai| Spark| Developer| 40000| 8|
|104| Pavithra| Lokesh| iOS| Developer| 20000| 9|
|114| Salman| Khan| Spark| Analyst| 67000| 1|
|107| Praveen| Mani| iOS| Analyst| 50000| 2|
|112| Muthu| Kumar| Node JS| Analyst| 45000| 3|
|115| Donald| Trump| Android| Analyst| 45000| 3| -- 4 skipped
|110| Veeraiah| Dhanukkodi| iOS| Analyst| 40000| 5|
|117| Pushpavanam| Kuppusamy| Android| Analyst| 38000| 6|
+---+------------+-----------+----------+-----------+------+----+
// RANK()
spark.sql("SELECT id,fname,lname,designation,technology,salary, RANK() OVER (PARTITION BY technology ORDER BY salary desc) AS rank FROM teams").show(21)
+---+------------+-----------+-----------+----------+------+----+
| id| fname| lname|designation|technology|salary|rank|
+---+------------+-----------+-----------+----------+------+----+
|105| Kamala| Kamesh| Developer| iOS| 98000| 1|
|107| Praveen| Mani| Analyst| iOS| 50000| 2|
|110| Veeraiah| Dhanukkodi| Analyst| iOS| 40000| 3|
|104| Pavithra| Lokesh| Developer| iOS| 20000| 4|
|108| Anbu| Sudha| Tester| iOS| 12000| 5|
|119| Aishwarya| Dhanush| Developer| Node JS| 69000| 1|
|120| Vijay| Kumar| Developer| Node JS| 50000| 2|
|112| Muthu| Kumar| Analyst| Node JS| 45000| 3|
|118| Latha|Rajinikanth| Tester| Node JS| 45000| 3| -- 4 is skipped
|109|Nagarethinam| Dhanukkodi| Tester| Node JS| 20000| 5|
|115| Donald| Trump| Analyst| Android| 45000| 1|
|116| Anitha| Kuppusamy| Developer| Android| 45000| 1| -- 2 is skipped
|101| Ramesh| Kumar| Tester| Android| 40000| 3|
|117| Pushpavanam| Kuppusamy| Analyst| Android| 38000| 4|
|102| Ganapathy| Govindan| Tester| Android| 25000| 5|
|114| Salman| Khan| Analyst| Spark| 67000| 1|
|100| Sankara| Narayanan| Developer| Spark| 50000| 2|
|103| Raja| Mani| Developer| Spark| 50000| 2| -- 3 is skipped
|111| Arivu| Madhi| Developer| Spark| 50000| 2|
|113| Sikkandar| Bhai| Developer| Spark| 40000| 5|
|106| Arun| Pandian| Tester| Spark| 25000| 6|
+---+------------+-----------+-----------+----------+------+----+
// DENSE_RANK()
spark.sql("SELECT id,fname,lname,technology,designation,salary, DENSE_RANK() OVER (PARTITION BY designation ORDER BY salary desc) AS dense_rank FROM teams").show(21)
+---+------------+-----------+----------+-----------+------+----------+
| id| fname| lname|technology|designation|salary|dense_rank|
+---+------------+-----------+----------+-----------+------+----------+
|118| Latha|Rajinikanth| Node JS| Tester| 45000| 1|
|101| Ramesh| Kumar| Android| Tester| 40000| 2|
|102| Ganapathy| Govindan| Android| Tester| 25000| 3|
|106| Arun| Pandian| Spark| Tester| 25000| 3|
|109|Nagarethinam| Dhanukkodi| Node JS| Tester| 20000| 4|
|108| Anbu| Sudha| iOS| Tester| 12000| 5|
|105| Kamala| Kamesh| iOS| Developer| 98000| 1|
|119| Aishwarya| Dhanush| Node JS| Developer| 69000| 2|
|100| Sankara| Narayanan| Spark| Developer| 50000| 3|
|103| Raja| Mani| Spark| Developer| 50000| 3|
|111| Arivu| Madhi| Spark| Developer| 50000| 3|
|120| Vijay| Kumar| Node JS| Developer| 50000| 3|
|116| Anitha| Kuppusamy| Android| Developer| 45000| 4|
|113| Sikkandar| Bhai| Spark| Developer| 40000| 5|
|104| Pavithra| Lokesh| iOS| Developer| 20000| 6|
|114| Salman| Khan| Spark| Analyst| 67000| 1|
|107| Praveen| Mani| iOS| Analyst| 50000| 2|
|112| Muthu| Kumar| Node JS| Analyst| 45000| 3|
|115| Donald| Trump| Android| Analyst| 45000| 3|
|110| Veeraiah| Dhanukkodi| iOS| Analyst| 40000| 4|
|117| Pushpavanam| Kuppusamy| Android| Analyst| 38000| 5|
+---+------------+-----------+----------+-----------+------+----------+
// dense_rank
spark.sql("SELECT id,fname,lname,designation,technology,salary, DENSE_RANK() OVER (PARTITION BY technology ORDER BY salary desc) AS dense_rank FROM teams").show(21)
+---+------------+-----------+-----------+----------+------+----------+
| id| fname| lname|designation|technology|salary|dense_rank|
+---+------------+-----------+-----------+----------+------+----------+
|105| Kamala| Kamesh| Developer| iOS| 98000| 1|
|107| Praveen| Mani| Analyst| iOS| 50000| 2|
|110| Veeraiah| Dhanukkodi| Analyst| iOS| 40000| 3|
|104| Pavithra| Lokesh| Developer| iOS| 20000| 4|
|108| Anbu| Sudha| Tester| iOS| 12000| 5|
|119| Aishwarya| Dhanush| Developer| Node JS| 69000| 1|
|120| Vijay| Kumar| Developer| Node JS| 50000| 2|
|112| Muthu| Kumar| Analyst| Node JS| 45000| 3|
|118| Latha|Rajinikanth| Tester| Node JS| 45000| 3|
|109|Nagarethinam| Dhanukkodi| Tester| Node JS| 20000| 4|
|115| Donald| Trump| Analyst| Android| 45000| 1|
|116| Anitha| Kuppusamy| Developer| Android| 45000| 1|
|101| Ramesh| Kumar| Tester| Android| 40000| 2|
|117| Pushpavanam| Kuppusamy| Analyst| Android| 38000| 3|
|102| Ganapathy| Govindan| Tester| Android| 25000| 4|
|114| Salman| Khan| Analyst| Spark| 67000| 1|
|100| Sankara| Narayanan| Developer| Spark| 50000| 2|
|103| Raja| Mani| Developer| Spark| 50000| 2|
|111| Arivu| Madhi| Developer| Spark| 50000| 2|
|113| Sikkandar| Bhai| Developer| Spark| 40000| 3|
|106| Arun| Pandian| Tester| Spark| 25000| 4|
+---+------------+-----------+-----------+----------+------+----------+
// ROW_NUMBER, RANK, DENSE_RANK all together
spark.sql("SELECT id,fname,lname,designation,technology,salary, ROW_NUMBER() OVER (PARTITION BY technology ORDER BY salary desc) AS row_num,RANK() OVER (PARTITION BY technology ORDER BY salary desc) AS rank,DENSE_RANK() OVER (PARTITION BY technology ORDER BY salary desc) AS dense_rank FROM teams").show(21)
+---+------------+-----------+-----------+----------+------+-------+----+----------+
| id| fname| lname|designation|technology|salary|row_num|rank|dense_rank|
+---+------------+-----------+-----------+----------+------+-------+----+----------+
|105| Kamala| Kamesh| Developer| iOS| 98000| 1| 1| 1|
|107| Praveen| Mani| Analyst| iOS| 50000| 2| 2| 2|
|110| Veeraiah| Dhanukkodi| Analyst| iOS| 40000| 3| 3| 3|
|104| Pavithra| Lokesh| Developer| iOS| 20000| 4| 4| 4|
|108| Anbu| Sudha| Tester| iOS| 12000| 5| 5| 5|
|119| Aishwarya| Dhanush| Developer| Node JS| 69000| 1| 1| 1|
|120| Vijay| Kumar| Developer| Node JS| 50000| 2| 2| 2|
|112| Muthu| Kumar| Analyst| Node JS| 45000| 3| 3| 3|
|118| Latha|Rajinikanth| Tester| Node JS| 45000| 4| 3| 3|
|109|Nagarethinam| Dhanukkodi| Tester| Node JS| 20000| 5| 5| 4|
|115| Donald| Trump| Analyst| Android| 45000| 1| 1| 1|
|116| Anitha| Kuppusamy| Developer| Android| 45000| 2| 1| 1|
|101| Ramesh| Kumar| Tester| Android| 40000| 3| 3| 2|
|117| Pushpavanam| Kuppusamy| Analyst| Android| 38000| 4| 4| 3|
|102| Ganapathy| Govindan| Tester| Android| 25000| 5| 5| 4|
|114| Salman| Khan| Analyst| Spark| 67000| 1| 1| 1|
|100| Sankara| Narayanan| Developer| Spark| 50000| 2| 2| 2|
|103| Raja| Mani| Developer| Spark| 50000| 3| 2| 2|
|111| Arivu| Madhi| Developer| Spark| 50000| 4| 2| 2|
|113| Sikkandar| Bhai| Developer| Spark| 40000| 5| 5| 3|
|106| Arun| Pandian| Tester| Spark| 25000| 6| 6| 4|
+---+------------+-----------+-----------+----------+------+-------+----+----------+