Saturday, 2 February 2019

Airports data analysis using SparkSQL

Input file
airports.csv:
---------------

AirportID,Name,City,Country,FAA,ICAO,Latitude,Longitude,Altitude,Timezone,DST,Tz
1,"Goroka","Goroka","Papua New Guinea","GKA","AYGA",-6.081689,145.391881,5282,10,"U","Pacific/Port_Moresby"
2,"Madang","Madang","Papua New Guinea","MAG","AYMD",-5.207083,145.7887,20,10,"U","Pacific/Port_Moresby"
3,"Mount Hagen","Mount Hagen","Papua New Guinea","HGU","AYMH",-5.826789,144.295861,5388,10,"U","Pacific/Port_Moresby"
4,"Nadzab","Nadzab","Papua New Guinea","LAE","AYNZ",-6.569828,146.726242,239,10,"U","Pacific/Port_Moresby"
5,"Port Moresby Jacksons Intl","Port Moresby","Papua New Guinea","POM","AYPY",-9.443383,147.22005,146,10,"U","Pacific/Port_Moresby"
6,"Wewak Intl","Wewak","Papua New Guinea","WWK","AYWK",-3.583828,143.669186,19,10,"U","Pacific/Port_Moresby"
7,"Narsarsuaq","Narssarssuaq","Greenland","UAK","BGBW",61.160517,-45.425978,112,-3,"E","America/Godthab"
8,"Nuuk","Godthaab","Greenland","GOH","BGGH",64.190922,-51.678064,283,-3,"E","America/Godthab"
9,"Sondre Stromfjord","Sondrestrom","Greenland","SFJ","BGSF",67.016969,-50.689325,165,-3,"E","America/Godthab"
10,"Thule Air Base","Thule","Greenland","THU","BGTL",76.531203,-68.703161,251,-4,"E","America/Thule"
11,"Akureyri","Akureyri","Iceland","AEY","BIAR",65.659994,-18.072703,6,0,"N","Atlantic/Reykjavik"
12,"Egilsstadir","Egilsstadir","Iceland","EGS","BIEG",65.283333,-14.401389,76,0,"N","Atlantic/Reykjavik"
13,"Hornafjordur","Hofn","Iceland","HFN","BIHN",64.295556,-15.227222,24,0,"N","Atlantic/Reykjavik"
14,"Husavik","Husavik","Iceland","HZK","BIHU",65.952328,-17.425978,48,0,"N","Atlantic/Reykjavik"
15,"Isafjordur","Isafjordur","Iceland","IFJ","BIIS",66.058056,-23.135278,8,0,"N","Atlantic/Reykjavik"
16,"Keflavik International Airport","Keflavik","Iceland","KEF","BIKF",63.985,-22.605556,171,0,"N","Atlantic/Reykjavik"
17,"Patreksfjordur","Patreksfjordur","Iceland","PFJ","BIPA",65.555833,-23.965,11,0,"N","Atlantic/Reykjavik"
18,"Reykjavik","Reykjavik","Iceland","RKV","BIRK",64.13,-21.940556,48,0,"N","Atlantic/Reykjavik"
19,"Siglufjordur","Siglufjordur","Iceland","SIJ","BISI",66.133333,-18.916667,10,0,"N","Atlantic/Reykjavik"
20,"Vestmannaeyjar","Vestmannaeyjar","Iceland","VEY","BIVM",63.424303,-20.278875,326,0,"N","Atlantic/Reykjavik"
21,"Sault Ste Marie","Sault Sainte Marie","Canada","YAM","CYAM",46.485001,-84.509445,630,-5,"A","America/Toronto"
22,"Winnipeg St Andrews","Winnipeg","Canada","YAV","CYAV",50.056389,-97.0325,760,-6,"A","America/Winnipeg"
23,"Shearwater","Halifax","Canada","YAW","CYAW",44.639721,-63.499444,167,-4,"A","America/Halifax"
24,"St Anthony","St. Anthony","Canada","YAY","CYAY",51.391944,-56.083056,108,-3.5,"A","America/St_Johns"
25,"Tofino","Tofino","Canada","YAZ","CYAZ",49.082222,-125.7725,80,-8,"A","America/Vancouver"
26,"Kugaaruk","Pelly Bay","Canada","YBB","CYBB",68.534444,-89.808056,56,-7,"A","America/Edmonton"
27,"Baie Comeau","Baie Comeau","Canada","YBC","CYBC",49.1325,-68.204444,71,-5,"A","America/Toronto"
28,"Bagotville","Bagotville","Canada","YBG","CYBG",48.330555,-70.996391,522,-5,"A","America/Toronto"
29,"Baker Lake","Baker Lake","Canada","YBK","CYBK",64.298889,-96.077778,59,-6,"A","America/Winnipeg"
30,"Campbell River","Campbell River","Canada","YBL","CYBL",49.950832,-125.270833,346,-8,"A","America/Vancouver"
31,"Brandon Muni","Brandon","Canada","YBR","CYBR",49.91,-99.951944,1343,-6,"A","America/Winnipeg"
32,"Cambridge Bay","Cambridge Bay","Canada","YCB","CYCB",69.108055,-105.138333,90,-7,"A","America/Edmonton"
33,"Nanaimo","Nanaimo","Canada","YCD","CYCD",49.052333,-123.870167,93,-8,"A","America/Vancouver"
34,"Castlegar","Castlegar","Canada","YCG","CYCG",49.296389,-117.6325,1624,-8,"A","America/Vancouver"
35,"Miramichi","Chatham","Canada","YCH","CYCH",47.007778,-65.449167,108,-4,"A","America/Halifax"
36,"Charlo","Charlo","Canada","YCL","CYCL",47.990833,-66.330278,132,-4,"A","America/Halifax"
37,"Kugluktuk","Coppermine","Canada","YCO","CYCO",67.816667,-115.143889,74,-7,"A","America/Edmonton"
38,"Coronation","Coronation","Canada","YCT","CYCT",52.075001,-111.445278,2595,-7,"A","America/Edmonton"
39,"Chilliwack","Chilliwack","Canada","YCW","CYCW",49.152779,-121.93889,32,-8,"A","America/Vancouver"
40,"Clyde River","Clyde River","Canada","YCY","CYCY",70.486111,-68.516667,87,-5,"A","America/Toronto"
41,"Fairmont Hot Springs","Coral Harbour","Canada","YZS","CYCZ",64.193333,-83.359444,2661,-5,"A","America/Coral_Harbour"
42,"Dawson City","Dawson","Canada","YDA","CYDA",64.043056,-139.127778,1215,-8,"A","America/Vancouver"
43,"Burwash","Burwash","Canada","YDB","CYDB",61.371111,-139.040556,2647,-8,"A","America/Vancouver"
44,"Princeton","Princeton","Canada","YDC","CYDC",49.468056,-120.511389,2298,-8,"A","America/Vancouver"
45,"Deer Lake","Deer Lake","Canada","YDF","CYDF",49.210833,-57.391388,72,-3.5,"A","America/St_Johns"
46,"Dease Lake","Dease Lake","Canada","YDL","CYDL",58.422222,-130.032222,2600,-8,"A","America/Vancouver"
47,"Dauphin Barker","Dauphin","Canada","YDN","CYDN",51.100834,-100.0525,999,-6,"A","America/Winnipeg"
48,"Dawson Creek","Dawson Creek","Canada","YDQ","CYDQ",55.742333,-120.183,2148,-7,"A","America/Dawson_Creek"
49,"Edmonton Intl","Edmonton","Canada","YEG","CYEG",53.309723,-113.579722,2373,-7,"A","America/Edmonton"
50,"Arviat","Eskimo Point","Canada","YEK","CYEK",61.094166,-94.070833,32,-6,"A","America/Winnipeg"
51,"Estevan","Estevan","Canada","YEN","CYEN",49.210278,-102.965833,1905,-6,"N","America/Regina"
52,"Edson","Edson","Canada","YET","CYET",53.578888,-116.465,3041,-7,"A","America/Edmonton"
53,"Eureka","Eureka","Canada","YEU","CYEU",79.994722,-85.814167,256,-6,"A","America/Winnipeg"
54,"Inuvik Mike Zubko","Inuvik","Canada","YEV","CYEV",68.304167,-133.482778,224,-7,"A","America/Edmonton"
55,"Iqaluit","Iqaluit","Canada","YFB","CYFB",63.75639,-68.555832,110,-5,"A","America/Toronto"
56,"Fredericton","Fredericton","Canada","YFC","CYFC",45.868889,-66.537222,68,-4,"A","America/Halifax"
57,"Forestville","Forestville","Canada","","CYFE",48.746111,-69.097222,293,-5,"A","America/Toronto"
58,"Flin Flon","Flin Flon","Canada","YFO","CYFO",54.678055,-101.681667,997,-6,"A","America/Winnipeg"
59,"Fort Resolution","Fort Resolution","Canada","YFR","CYFR",61.180832,-113.689722,526,-7,"A","America/Edmonton"
60,"Fort Simpson","Fort Simpson","Canada","YFS","CYFS",61.760153,-121.236525,555,-7,"A","America/Edmonton"
61,"Kingston","Kingston","Canada","YGK","CYGK",44.225277,-76.596944,305,-5,"A","America/Toronto"
62,"La Grande Riviere","La Grande Riviere","Canada","YGL","CYGL",53.625278,-77.704167,639,-5,"A","America/Toronto"
63,"Gaspe","Gaspe","Canada","YGP","CYGP",48.775278,-64.478611,108,-5,"A","America/Toronto"
64,"Geraldton Greenstone Regional","Geraldton","Canada","YGQ","CYGQ",49.778332,-86.939445,1144,-5,"A","America/Toronto"
65,"Iles De La Madeleine","Iles De La Madeleine","Canada","YGR","CYGR",47.424721,-61.778056,35,-5,"A","America/Toronto"
66,"Hudson Bay","Hudson Bay","Canada","YHB","CYHB",52.816666,-102.31139,1175,-6,"N","America/Regina"
67,"Dryden Rgnl","Dryden","Canada","YHD","CYHD",49.831667,-92.744167,1354,-6,"A","America/Winnipeg"
68,"Ulukhaktok Holman","Holman Island","Canada","YHI","CYHI",70.762778,-117.806111,117,-7,"A","America/Edmonton"
69,"Gjoa Haven","Gjoa Haven","Canada","YHK","CYHK",68.635556,-95.849722,152,-7,"A","America/Edmonton"
70,"Hamilton","Hamilton","Canada","YHM","CYHM",43.173611,-79.935,780,-5,"A","America/Toronto"
71,"St Hubert","Montreal","Canada","YHU","CYHU",45.5175,-73.416944,90,-5,"A","America/Toronto"
72,"Hay River","Hay River","Canada","YHY","CYHY",60.839722,-115.782778,543,-7,"A","America/Edmonton"
73,"Halifax Intl","Halifax","Canada","YHZ","CYHZ",44.880833,-63.50861,477,-4,"A","America/Halifax"
74,"Atikokan Muni","Atikokan","Canada","YIB","CYIB",48.773888,-91.638611,1408,-5,"A","America/Coral_Harbour"
75,"Pond Inlet","Pond Inlet","Canada","YIO","CYIO",72.683334,-77.966667,181,-5,"A","America/Toronto"
76,"St Jean","St. Jean","Canada","YJN","CYJN",45.294445,-73.281111,136,-5,"A","America/Toronto"
77,"Stephenville","Stephenville","Canada","YJT","CYJT",48.544167,-58.549999,80,-3.5,"A","America/St_Johns"
78,"Kamloops","Kamloops","Canada","YKA","CYKA",50.702222,-120.444444,1133,-8,"A","America/Vancouver"
79,"Waterloo","Waterloo","Canada","YKF","CYKF",43.460833,-80.378611,1054,-5,"A","America/Toronto"
80,"Schefferville","Schefferville","Canada","YKL","CYKL",54.805278,-66.805278,1709,-5,"A","America/Toronto"
81,"Kindersley","Kindersley","Canada","YKY","CYKY",51.5175,-109.180833,2277,-6,"N","America/Regina"
82,"Buttonville Muni","Toronto","Canada","YKZ","CYKZ",43.862221,-79.37,650,-5,"A","America/Toronto"
83,"Chapleau","Chapleau","Canada","YLD","CYLD",47.82,-83.346667,1470,-5,"A","America/Toronto"
84,"Meadow Lake","Meadow Lake","Canada","YLJ","CYLJ",54.125278,-108.522778,1576,-6,"N","America/Regina"
85,"Lloydminster","Lloydminster","Canada","YLL","CYLL",53.309166,-110.0725,2193,-7,"A","America/Edmonton"
86,"Alert","Alert","Canada","YLT","CYLT",82.517778,-62.280556,100,-5,"A","America/Toronto"
87,"Kelowna","Kelowna","Canada","YLW","CYLW",49.956112,-119.377778,1409,-8,"A","America/Vancouver"
88,"Mayo","Mayo","Canada","YMA","CYMA",63.616389,-135.868333,1653,-8,"A","America/Vancouver"
89,"Moose Jaw Air Vice Marshal C M Mcewen","Moose Jaw","Canada","YMJ","CYMJ",50.330278,-105.559167,1892,-6,"N","America/Regina"
90,"Fort Mcmurray","Fort Mcmurray","Canada","YMM","CYMM",56.653333,-111.221944,1211,-7,"A","America/Edmonton"
91,"Moosonee","Moosonee","Canada","YMO","CYMO",51.291111,-80.607778,30,-5,"A","America/Toronto"
92,"Maniwaki","Maniwaki","Canada","YMW","CYMW",46.272778,-75.990556,656,-5,"A","America/Toronto"
93,"Montreal Intl Mirabel","Montreal","Canada","YMX","CYMX",45.681944,-74.005278,270,-5,"A","America/Toronto"
94,"Natashquan","Natashquan","Canada","YNA","CYNA",50.19,-61.789167,39,-5,"A","America/Toronto"
95,"Gatineau","Gatineau","Canada","YND","CYND",45.521694,-75.563589,211,-5,"A","America/Toronto"
96,"Matagami","Matagami","Canada","YNM","CYNM",49.761667,-77.802778,918,-5,"A","America/Toronto"
97,"Old Crow","Old Crow","Canada","YOC","CYOC",67.570556,-139.839167,824,-8,"A","America/Vancouver"
98,"Cold Lake","Cold Lake","Canada","YOD","CYOD",54.404999,-110.279444,1775,-7,"A","America/Edmonton"
99,"High Level","High Level","Canada","YOJ","CYOJ",58.621389,-117.164722,1110,-7,"A","America/Edmonton"
100,"Ottawa Macdonald Cartier Intl","Ottawa","Canada","YOW","CYOW",45.3225,-75.669167,374,-5,"A","America/Toronto"
101,"Prince Albert Glass Field","Prince Albert","Canada","YPA","CYPA",53.214167,-105.672778,1405,-6,"N","America/Regina"
102,"Peace River","Peace River","Canada","YPE","CYPE",56.226944,-117.447222,1873,-7,"A","America/Edmonton"
103,"Southport","Portage-la-prairie","Canada","YPG","CYPG",49.903056,-98.273889,885,-6,"A","America/Winnipeg"
104,"Pitt Meadows","Pitt Meadows","Canada","","CYPK",49.21611,-122.71,11,-8,"A","America/Vancouver"
105,"Pickle Lake","Pickle Lake","Canada","YPL","CYPL",51.446388,-90.214167,1267,-5,"A","America/Coral_Harbour"
106,"Port Menier","Port Menier","Canada","YPN","CYPN",49.836389,-64.288611,167,-5,"A","America/Toronto"
107,"Peterborough","Peterborough","Canada","YPQ","CYPQ",44.23,-78.363333,628,-5,"A","America/Toronto"
108,"Prince Rupert","Prince Pupert","Canada","YPR","CYPR",54.28611,-130.444722,116,-8,"A","America/Vancouver"
109,"Fort Chipewyan","Fort Chipewyan","Canada","YPY","CYPY",58.767223,-111.117222,761,-7,"A","America/Edmonton"
110,"Muskoka","Muskoka","Canada","YQA","CYQA",44.974722,-79.303333,925,-5,"A","America/Toronto"
111,"Quebec Jean Lesage Intl","Quebec","Canada","YQB","CYQB",46.791111,-71.393333,244,-5,"A","America/Toronto"
112,"Red Deer Regional","Red Deer Industrial","Canada","YQF","CYQF",52.182222,-113.894444,2968,-7,"A","America/Edmonton"
113,"Windsor","Windsor","Canada","YQG","CYQG",42.275556,-82.955556,622,-5,"A","America/Toronto"
114,"Watson Lake","Watson Lake","Canada","YQH","CYQH",60.116389,-128.8225,2255,-8,"A","America/Vancouver"
115,"Kenora","Kenora","Canada","YQK","CYQK",49.788334,-94.363056,1332,-6,"A","America/Winnipeg"
116,"Lethbridge","Lethbridge","Canada","YQL","CYQL",49.630278,-112.799722,3047,-7,"A","America/Edmonton"
117,"Greater Moncton Intl","Moncton","Canada","YQM","CYQM",46.112221,-64.678611,232,-4,"A","America/Halifax"
119,"Comox","Comox","Canada","YQQ","CYQQ",49.710833,-124.886667,84,-8,"A","America/Vancouver"
120,"Regina Intl","Regina","Canada","YQR","CYQR",50.431944,-104.665833,1894,-6,"N","America/Regina"
121,"Thunder Bay","Thunder Bay","Canada","YQT","CYQT",48.371944,-89.323889,653,-5,"A","America/Toronto"
122,"Grande Prairie","Grande Prairie","Canada","YQU","CYQU",55.179722,-118.885,2195,-7,"A","America/Edmonton"
123,"Yorkton Muni","Yorkton","Canada","YQV","CYQV",51.264721,-102.461667,1635,-6,"N","America/Regina"
124,"North Battleford","North Battleford","Canada","YQW","CYQW",52.769167,-108.24361,1799,-6,"N","America/Regina"
125,"Gander Intl","Gander","Canada","YQX","CYQX",48.936944,-54.568056,496,-3.5,"A","America/St_Johns"
126,"Sydney","Sydney","Canada","YQY","CYQY",46.161388,-60.047779,203,-4,"A","America/Halifax"
127,"Quesnel","Quesnel","Canada","YQZ","CYQZ",53.026112,-122.510278,1789,-8,"A","America/Vancouver"
128,"Resolute Bay","Resolute","Canada","YRB","CYRB",74.716944,-94.969444,215,-6,"A","America/Winnipeg"
129,"Riviere Du Loup","Riviere Du Loup","Canada","YRI","CYRI",47.764444,-69.584722,427,-5,"A","America/Toronto"
130,"Roberval","Roberval","Canada","YRJ","CYRJ",48.52,-72.265556,586,-5,"A","America/Toronto"
131,"Rocky Mountain House","Rocky Mountain House","Canada","YRM","CYRM",52.429722,-114.904167,3244,-7,"A","America/Edmonton"
132,"Rankin Inlet","Rankin Inlet","Canada","YRT","CYRT",62.81139,-92.115833,94,-6,"A","America/Winnipeg"
133,"Sudbury","Sudbury","Canada","YSB","CYSB",46.625,-80.798889,1141,-5,"A","America/Toronto"
134,"Sherbrooke","Sherbrooke","Canada","YSC","CYSC",45.438611,-71.691389,792,-5,"A","America/Toronto"
135,"Saint John","St. John","Canada","YSJ","CYSJ",45.316111,-65.890278,357,-4,"A","America/Halifax"
136,"Fort Smith","Fort Smith","Canada","YSM","CYSM",60.020278,-111.961944,671,-7,"A","America/Edmonton"
137,"Nanisivik","Nanisivik","Canada","YSR","CYSR",72.982222,-84.613611,2106,-5,"A","America/Toronto"
138,"Summerside","Summerside","Canada","YSU","CYSU",46.440556,-63.833611,56,-4,"A","America/Halifax"
139,"Sachs Harbour","Sachs Harbour","Canada","YSY","CYSY",71.993889,-125.2425,282,-7,"A","America/Edmonton"
140,"Cape Dorset","Cape Dorset","Canada","YTE","CYTE",64.23,-76.526667,164,-5,"A","America/Toronto"
141,"Thompson","Thompson","Canada","YTH","CYTH",55.801111,-97.864166,729,-6,"A","America/Winnipeg"
142,"Trenton","Trenton","Canada","YTR","CYTR",44.118889,-77.528056,283,-5,"A","America/Toronto"
143,"Timmins","Timmins","Canada","YTS","CYTS",48.569721,-81.376667,967,-5,"A","America/Toronto"
144,"City Centre","Toronto","Canada","YTZ","CYTZ",43.627499,-79.396167,251,-5,"A","America/Toronto"
145,"Tuktoyaktuk","Tuktoyaktuk","Canada","YUB","CYUB",69.433334,-133.026389,15,-7,"A","America/Edmonton"
146,"Pierre Elliott Trudeau Intl","Montreal","Canada","YUL","CYUL",45.470556,-73.740833,118,-5,"A","America/Toronto"
147,"Repulse Bay","Repulse Bay","Canada","YUT","CYUT",66.521389,-86.224722,80,-6,"A","America/Winnipeg"
148,"Hall Beach","Hall Beach","Canada","YUX","CYUX",68.776111,-81.243611,27,-5,"A","America/Toronto"
149,"Rouyn Noranda","Rouyn","Canada","YUY","CYUY",48.206111,-78.835556,988,-5,"A","America/Toronto"
150,"La Ronge","La Ronge","Canada","YVC","CYVC",55.15139,-105.261944,1242,-6,"N","America/Regina"
151,"Vermilion","Vermillion","Canada","YVG","CYVG",53.355833,-110.82389,2025,-7,"A","America/Edmonton"
152,"Qikiqtarjuaq","Broughton Island","Canada","YVM","CYVM",67.545833,-64.031389,21,-5,"A","America/Toronto"
153,"Val D Or","Val D'or","Canada","YVO","CYVO",48.053333,-77.782778,1107,-5,"A","America/Toronto"
154,"Kuujjuaq","Quujjuaq","Canada","YVP","CYVP",58.096111,-68.426944,129,-5,"A","America/Toronto"
155,"Norman Wells","Norman Wells","Canada","YVQ","CYVQ",65.281617,-126.798219,238,-7,"A","America/Edmonton"
156,"Vancouver Intl","Vancouver","Canada","YVR","CYVR",49.193889,-123.184444,14,-8,"A","America/Vancouver"


hdfs dfs -copyFromLocal airports.csv /user/

scala> val airportDF = spark.read.format("csv").option("header","true").load("hdfs://localhost:9000/user/airports.csv")
airportDF: org.apache.spark.sql.DataFrame = [AirportID: string, Name: string ... 10 more fields]

scala> airportDF.printSchema
root
 |-- AirportID: string (nullable = true)
 |-- Name: string (nullable = true)
 |-- City: string (nullable = true)
 |-- Country: string (nullable = true)
 |-- FAA: string (nullable = true)
 |-- ICAO: string (nullable = true)
 |-- Latitude: string (nullable = true)
 |-- Longitude: string (nullable = true)
 |-- Altitude: string (nullable = true)
 |-- Timezone: string (nullable = true)
 |-- DST: string (nullable = true)
 |-- Tz: string (nullable = true)


scala> airportDF.show(3)
+---------+-----------+-----------+----------------+---+----+---------+----------+--------+--------+---+--------------------+
|AirportID|       Name|       City|         Country|FAA|ICAO| Latitude| Longitude|Altitude|Timezone|DST|                  Tz|
+---------+-----------+-----------+----------------+---+----+---------+----------+--------+--------+---+--------------------+
|        1|     Goroka|     Goroka|Papua New Guinea|GKA|AYGA|-6.081689|145.391881|    5282|      10|  U|Pacific/Port_Moresby|
|        2|     Madang|     Madang|Papua New Guinea|MAG|AYMD|-5.207083|  145.7887|      20|      10|  U|Pacific/Port_Moresby|
|        3|Mount Hagen|Mount Hagen|Papua New Guinea|HGU|AYMH|-5.826789|144.295861|    5388|      10|  U|Pacific/Port_Moresby|
+---------+-----------+-----------+----------------+---+----+---------+----------+--------+--------+---+--------------------+
only showing top 3 rows

scala> airportDF.createOrReplaceTempView("airports")

scala> val southEast = spark.sql("select AirportID, Name, Latitude, Longitude from airports where Latitude<0 and Longitude>0")

scala> southEast.show
+---------+--------------------+---------+----------+
|AirportID|                Name| Latitude| Longitude|
+---------+--------------------+---------+----------+
|        1|              Goroka|-6.081689|145.391881|
|        2|              Madang|-5.207083|  145.7887|
|        3|         Mount Hagen|-5.826789|144.295861|
|        4|              Nadzab|-6.569828|146.726242|
|        5|Port Moresby Jack...|-9.443383| 147.22005|
|        6|          Wewak Intl|-3.583828|143.669186|
+---------+--------------------+---------+----------+


scala> val uniqueCitiesInEachCountry = spark.sql("select Country, count(distinct(City)) from airports group by Country")
uniqueCitiesInEachCountry: org.apache.spark.sql.DataFrame = [Country: string, count(DISTINCT City): bigint]

scala> uniqueCitiesInEachCountry.show
+----------------+--------------------+                                       
|         Country|count(DISTINCT City)|
+----------------+--------------------+
|         Iceland|                  10|
|          Canada|                 131|
|       Greenland|                   4|
|Papua New Guinea|                   6|
+----------------+--------------------+


scala> val averageAltitude = spark.sql("select Country , avg(Altitude) from airports group by Country")
averageAltitude: org.apache.spark.sql.DataFrame = [Country: string, avg(CAST(Altitude AS DOUBLE)): double]

scala> averageAltitude.show
+----------------+-----------------------------+                             
|         Country|avg(CAST(Altitude AS DOUBLE))|
+----------------+-----------------------------+
|         Iceland|                         72.8|
|          Canada|            852.6666666666666|
|       Greenland|                       202.75|
|Papua New Guinea|                       1849.0|
+----------------+-----------------------------+

val airportsCountForEachTimeZones = spark.sql("select Tz , count(Tz) from airports group by Tz")

scala> airportsCountForEachTimeZones.show
+--------------------+---------+
|                  Tz|count(Tz)|
+--------------------+---------+
|  Atlantic/Reykjavik|       10|
|   America/Vancouver|       19|
|     America/Toronto|       48|
|America/Coral_Har...|        3|
|     America/Halifax|        9|
|    America/Edmonton|       27|
|     America/Godthab|        3|
|      America/Regina|       10|
|       America/Thule|        1|
|America/Dawson_Creek|        1|
|    America/St_Johns|        4|
|Pacific/Port_Moresby|        6|
|    America/Winnipeg|       14|
+--------------------+---------+


scala> val averageLatLongForEachCountry = spark.sql("select Country, avg(Latitude), avg(Longitude) from airports group by Country")
averageLatLongForEachCountry: org.apache.spark.sql.DataFrame = [Country: string, avg(CAST(Latitude AS DOUBLE)): double ... 1 more field]

scala> averageLatLongForEachCountry.show
+----------------+-----------------------------+------------------------------+
|         Country|avg(CAST(Latitude AS DOUBLE))|avg(CAST(Longitude AS DOUBLE))|
+----------------+-----------------------------+------------------------------+
|         Iceland|                   65.0477736|                   -19.5969224|
|          Canada|            53.94868565185185|            -93.95003623703704|
|       Greenland|                  67.22490275|           -54.124131999999996|
|Papua New Guinea|           -6.118766666666666|                     145.51532|
+----------------+-----------------------------+------------------------------+

scala> val countOfDSTs = spark.sql("select count(distinct(DST)) from airports")
countOfDSTs: org.apache.spark.sql.DataFrame = [count(DISTINCT DST): bigint]

scala> countOfDSTs.show
+-------------------+                                                         
|count(DISTINCT DST)|
+-------------------+
|                  4|
+-------------------+

scala> val NorthWestAirports = spark.sql("select AirportID, Name, Latitude, Longitude from airports where Latitude>0 and Longitude<0")
NorthWestAirports: org.apache.spark.sql.DataFrame = [AirportID: string, Name: string ... 2 more fields]

scala> NorthWestAirports.write.format("csv").save("nwOutput")

scala> NorthWestAirports.write.format("csv").option("header","true").save("nwOutputwithHeader")

No comments:

Post a Comment

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...