marks.csv has 7 different columns which are StudentNo, Mark1, Mark2, Mark3, Mark4, Mark5, Mark6
We are going to find the total and average of each student using Spark with Scala.
Input file
marks.csv:
------------
1001,76,72,85,75,84,75
1002,77,51,90,61,76,69
1003,91,86,52,81,64,87
1004,71,82,59,96,82,73
1005,97,52,72,49,85,64
1006,82,80,71,82,83,69
1007,67,93,93,54,61,56
1008,81,73,82,70,85,74
1009,63,89,81,73,88,60
1010,73,66,70,69,77,68
1011,70,84,55,86,66,72
1012,73,85,85,53,65,56
1013,93,94,61,55,62,58
1014,69,95,73,58,72,89
1015,87,71,86,85,65,60
1016,90,59,77,90,66,55
1017,72,51,84,80,71,77
1018,94,86,59,66,83,56
1019,79,90,56,83,76,56
1020,68,81,89,73,59,91
1021,93,94,90,63,81,67
1022,79,61,85,93,65,84
1023,50,52,52,90,83,70
1024,58,78,79,88,87,61
1025,89,80,70,70,64,56
1026,85,90,76,87,60,85
1027,74,86,68,71,65,69
1028,88,79,65,64,77,65
1029,56,91,74,67,69,55
1030,90,80,54,81,70,73
1031,90,69,55,58,72,55
1032,58,55,87,87,80,88
1033,76,68,60,97,76,62
1034,61,83,76,50,75,65
1035,94,58,59,70,68,60
1036,54,77,91,93,71,57
1037,74,55,83,69,84,85
1038,68,65,91,77,83,57
1039,88,91,85,77,64,68
1040,55,62,92,76,75,82
1041,69,77,55,76,85,62
1042,97,66,77,62,70,65
1043,63,82,59,52,83,55
1044,83,62,88,83,75,70
1045,55,85,93,89,63,59
1046,75,92,81,76,79,87
1047,90,54,60,49,62,84
1048,52,92,73,59,66,81
1049,70,91,54,64,72,77
1050,89,81,62,61,81,61
1051,79,57,75,56,58,60
1052,91,92,78,92,88,71
1053,91,77,60,54,60,85
1054,67,67,85,86,62,88
1055,67,55,54,71,63,55
1056,72,62,87,90,61,66
1057,93,56,84,83,58,66
1058,56,86,80,97,88,85
1059,95,69,59,92,85,55
1060,76,82,54,66,60,85
1061,77,74,61,68,88,76
1062,80,52,58,93,72,60
1063,96,69,60,86,61,56
1064,74,91,82,85,64,76
1065,88,95,94,72,70,84
1066,77,73,70,62,69,70
1067,92,80,68,66,65,61
1068,83,88,55,67,69,60
1069,92,88,75,68,72,75
1070,96,54,91,77,64,63
1071,72,63,65,92,67,90
1072,95,58,82,84,62,57
1073,65,56,82,92,58,63
1074,52,87,70,62,78,81
1075,74,92,83,83,64,64
1076,65,78,57,51,63,90
1077,62,85,69,54,63,57
1078,91,65,94,87,84,55
1079,64,79,79,51,76,61
1080,64,90,56,77,67,61
1081,72,54,60,87,66,70
1082,65,68,90,60,73,55
1083,72,55,94,53,87,59
1084,88,55,54,49,83,66
1085,59,89,93,61,63,82
1086,68,94,81,67,64,85
1087,76,56,88,53,66,84
1088,84,72,62,56,59,58
1089,73,61,52,86,85,81
1090,92,55,58,95,67,85
1091,60,55,72,85,81,88
1092,95,61,81,63,87,57
1093,56,86,83,55,69,59
1094,71,60,73,63,65,85
1095,91,57,71,59,76,56
1096,79,91,83,96,87,75
1097,88,67,56,66,74,56
1098,90,92,62,74,61,79
1099,72,79,59,76,84,57
1100,87,51,66,92,75,61
scala> val marks = sc.textFile("E:\\pocs\\marks.csv")
marks: org.apache.spark.rdd.RDD[String] = E:\pocs\marks.csv MapPartitionsRDD[59] at textFile at <console>:24
scala> marks.take(5).foreach(println)
1001,76,72,85,75,84,75
1002,77,51,90,61,76,69
1003,91,86,52,81,64,87
1004,71,82,59,96,82,73
1005,97,52,72,49,85,64
scala> val lstMarks = marks.map ( x => {
val line = x.split(",")
val studentno = line(0)
val m1 = line(1).toFloat
val m2 = line(2).toFloat
val m3 = line(3).toFloat
val m4 = line(4).toFloat
val m5 = line(5).toFloat
val m6 = line(6).toFloat
val total = m1 + m2 + m3 + m4 + m5 + m6
val avg = total / 6
(studentno,m1,m2,m3,m4,m5,m6,total,avg)
})
scala> lstMarks.take(5).foreach(println)
(1001,76.0,72.0,85.0,75.0,84.0,75.0,467.0,77.833336)
(1002,77.0,51.0,90.0,61.0,76.0,69.0,424.0,70.666664)
(1003,91.0,86.0,52.0,81.0,64.0,87.0,461.0,76.833336)
(1004,71.0,82.0,59.0,96.0,82.0,73.0,463.0,77.166664)
(1005,97.0,52.0,72.0,49.0,85.0,64.0,419.0,69.833336)
scala> val result = lstMarks.map (line => {
| val studentno = line._1.toString
| val total = line._8.toString
| val avg = line._9.toString
| (studentno,total,avg)
| })
result: org.apache.spark.rdd.RDD[(String, String, String)] = MapPartitionsRDD[62] at map at <console>:25
scala> result.take(5).foreach(println)
(1001,467.0,77.833336)
(1002,424.0,70.666664)
(1003,461.0,76.833336)
(1004,463.0,77.166664)
(1005,419.0,69.833336)
scala> val resultDF = result.toDF("StudentNo","Total","Average")
resultDF: org.apache.spark.sql.DataFrame = [StudentNo: string, Total: string ... 1 more field]
Output:
----------
No comments:
Post a Comment