Tuesday, 11 August 2020

Read JSON file using SparkContext

customers.json:
---------------

{"userId":1,"firstName":"Krish","lastName":"Lee","phoneNumber":"123456","emailAddress":"krish.lee@learningcontainer.com"},
{"userId":2,"firstName":"racks","lastName":"jacson","phoneNumber":"123456","emailAddress":"racks.jacson@learningcontainer.com"},
{"userId":3,"firstName":"denial","lastName":"roast","phoneNumber":"33333333","emailAddress":"denial.roast@learningcontainer.com"},
{"userId":4,"firstName":"devid","lastName":"neo","phoneNumber":"222222222","emailAddress":"devid.neo@learningcontainer.com"},
{"userId":5,"firstName":"jone","lastName":"mac","phoneNumber":"111111111","emailAddress":"jone.mac@learningcontainer.com"}


// read json file 
scala> val jsonDF = sqlContext.read.json("/user/data/customers.json")
 


scala> jsonDF.show(5)
+--------------------+---------+--------+-----------+------+
|        emailAddress|firstName|lastName|phoneNumber|userId|
+--------------------+---------+--------+-----------+------+
|krish.lee@learnin...|    Krish|     Lee|     123456|     1|
|racks.jacson@lear...|    racks|  jacson|     123456|     2|
|denial.roast@lear...|   denial|   roast|   33333333|     3|
|devid.neo@learnin...|    devid|     neo|  222222222|     4|
|jone.mac@learning...|     jone|     mac|  111111111|     5|
+--------------------+---------+--------+-----------+------+

scala> jsonDF.show(false)
+----------------------------------+---------+--------+-----------+------+
|emailAddress                      |firstName|lastName|phoneNumber|userId|
+----------------------------------+---------+--------+-----------+------+
|krish.lee@learningcontainer.com   |Krish    |Lee     |123456     |1     |
|racks.jacson@learningcontainer.com|racks    |jacson  |123456     |2     |
|denial.roast@learningcontainer.com|denial   |roast   |33333333   |3     |
|devid.neo@learningcontainer.com   |devid    |neo     |222222222  |4     |
|jone.mac@learningcontainer.com    |jone     |mac     |111111111  |5     |
+----------------------------------+---------+--------+-----------+------+


scala> jsonDF.printSchema()
root
 |-- emailAddress: string (nullable = true)
 |-- firstName: string (nullable = true)
 |-- lastName: string (nullable = true)
 |-- phoneNumber: string (nullable = true)
 |-- userId: long (nullable = true)
 
 
 scala> jsonDF.select(col("lastName"), col("firstName")  ).show()
 
 +--------+---------+
|lastName|firstName|
+--------+---------+
|     Lee|    Krish|
|  jacson|    racks|
|   roast|   denial|
|     neo|    devid|
|     mac|     jone|
+--------+---------+

// write the data frame into JSON
scala> jsonDF.write.format("json").save("/user/data/customer_exported")


$ hdfs dfs -ls /user/data/customer_exported
Found 2 items
-rw-r--r--   1 cloudera supergroup          0 2020-08-11 04:06 /user/data/customer_exported/_SUCCESS
-rw-r--r--   1 cloudera supergroup        628 2020-08-11 04:06 /user/data/customer_exported/part-r-00000-f30f4e83-a78e-43f1-8e31-320c2d9a868f


$ hdfs dfs -cat /user/data/customer_exported/part-r-00000-f30f4e83-a78e-43f1-8e31-320c2d9a868f
{"emailAddress":"krish.lee@learningcontainer.com","firstName":"Krish","lastName":"Lee","phoneNumber":"123456","userId":1}
{"emailAddress":"racks.jacson@learningcontainer.com","firstName":"racks","lastName":"jacson","phoneNumber":"123456","userId":2}
{"emailAddress":"denial.roast@learningcontainer.com","firstName":"denial","lastName":"roast","phoneNumber":"33333333","userId":3}
{"emailAddress":"devid.neo@learningcontainer.com","firstName":"devid","lastName":"neo","phoneNumber":"222222222","userId":4}
{"emailAddress":"jone.mac@learningcontainer.com","firstName":"jone","lastName":"mac","phoneNumber":"111111111","userId":5}

No comments:

Post a Comment

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...