// Ranking Calculation using Spark with Scala with the help of Windowing Function
Ranking.csv:
------------
RollNo,TotalMarks
100,495
101,494
102,494
103,492
104,494
105,493
106,492
107,494
108,495
109,493
110,492
111,491
112,490
113,494
114,491
115,490
116,495
116,493
117,490
118,493
119,492
120,491
scala> val df = spark.read.format("csv").option("inferSchema","True").option("header","True").load("D:\\Ex\\Ranking.
df: org.apache.spark.sql.DataFrame = [RollNo: int, TotalMarks: int]
scala> df.printSchema()
root
|-- RollNo: integer (nullable = true)
|-- TotalMarks: integer (nullable = true)
scala> df.show(200)
+------+----------+
|RollNo|TotalMarks|
+------+----------+
| 100| 495|
| 101| 494|
| 102| 494|
| 103| 492|
| 104| 494|
| 105| 493|
| 106| 492|
| 107| 494|
| 108| 495|
| 109| 493|
| 110| 492|
| 111| 491|
| 112| 490|
| 113| 494|
| 114| 491|
| 115| 490|
| 116| 495|
| 116| 493|
| 117| 490|
| 118| 493|
| 119| 492|
| 120| 491|
+------+----------+
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions.rank
val rank_result = df.withColumn("Rank",rank().over(Window.orderBy(desc("TotalMarks"))))
scala> rank_result.show(200)
+------+----------+----+
|RollNo|TotalMarks|Rank|
+------+----------+----+
| 100| 495| 1|
| 108| 495| 1|
| 116| 495| 1|
| 101| 494| 4|
| 102| 494| 4|
| 104| 494| 4|
| 107| 494| 4|
| 113| 494| 4|
| 105| 493| 9|
| 109| 493| 9|
| 116| 493| 9|
| 118| 493| 9|
| 103| 492| 13|
| 106| 492| 13|
| 110| 492| 13|
| 119| 492| 13|
| 111| 491| 17|
| 114| 491| 17|
| 120| 491| 17|
| 112| 490| 20|
| 115| 490| 20|
| 117| 490| 20|
+------+----------+----+
No comments:
Post a Comment