customers.json:
---------------
{"userId":1,"firstName":"Krish","lastName":"Lee","phoneNumber":"123456","emailAddress":"krish.lee@learningcontainer.com"},
{"userId":2,"firstName":"racks","lastName":"jacson","phoneNumber":"123456","emailAddress":"racks.jacson@learningcontainer.com"},
{"userId":3,"firstName":"denial","lastName":"roast","phoneNumber":"33333333","emailAddress":"denial.roast@learningcontainer.com"},
{"userId":4,"firstName":"devid","lastName":"neo","phoneNumber":"222222222","emailAddress":"devid.neo@learningcontainer.com"},
{"userId":5,"firstName":"jone","lastName":"mac","phoneNumber":"111111111","emailAddress":"jone.mac@learningcontainer.com"}
// read json file
scala> val jsonDF = sqlContext.read.json("/user/data/customers.json")
scala> jsonDF.show(5)
+--------------------+---------+--------+-----------+------+
| emailAddress|firstName|lastName|phoneNumber|userId|
+--------------------+---------+--------+-----------+------+
|krish.lee@learnin...| Krish| Lee| 123456| 1|
|racks.jacson@lear...| racks| jacson| 123456| 2|
|denial.roast@lear...| denial| roast| 33333333| 3|
|devid.neo@learnin...| devid| neo| 222222222| 4|
|jone.mac@learning...| jone| mac| 111111111| 5|
+--------------------+---------+--------+-----------+------+
scala> jsonDF.show(false)
+----------------------------------+---------+--------+-----------+------+
|emailAddress |firstName|lastName|phoneNumber|userId|
+----------------------------------+---------+--------+-----------+------+
|krish.lee@learningcontainer.com |Krish |Lee |123456 |1 |
|racks.jacson@learningcontainer.com|racks |jacson |123456 |2 |
|denial.roast@learningcontainer.com|denial |roast |33333333 |3 |
|devid.neo@learningcontainer.com |devid |neo |222222222 |4 |
|jone.mac@learningcontainer.com |jone |mac |111111111 |5 |
+----------------------------------+---------+--------+-----------+------+
scala> jsonDF.printSchema()
root
|-- emailAddress: string (nullable = true)
|-- firstName: string (nullable = true)
|-- lastName: string (nullable = true)
|-- phoneNumber: string (nullable = true)
|-- userId: long (nullable = true)
scala> jsonDF.select(col("lastName"), col("firstName") ).show()
+--------+---------+
|lastName|firstName|
+--------+---------+
| Lee| Krish|
| jacson| racks|
| roast| denial|
| neo| devid|
| mac| jone|
+--------+---------+
// write the data frame into JSON
scala> jsonDF.write.format("json").save("/user/data/customer_exported")
$ hdfs dfs -ls /user/data/customer_exported
Found 2 items
-rw-r--r-- 1 cloudera supergroup 0 2020-08-11 04:06 /user/data/customer_exported/_SUCCESS
-rw-r--r-- 1 cloudera supergroup 628 2020-08-11 04:06 /user/data/customer_exported/part-r-00000-f30f4e83-a78e-43f1-8e31-320c2d9a868f
$ hdfs dfs -cat /user/data/customer_exported/part-r-00000-f30f4e83-a78e-43f1-8e31-320c2d9a868f
{"emailAddress":"krish.lee@learningcontainer.com","firstName":"Krish","lastName":"Lee","phoneNumber":"123456","userId":1}
{"emailAddress":"racks.jacson@learningcontainer.com","firstName":"racks","lastName":"jacson","phoneNumber":"123456","userId":2}
{"emailAddress":"denial.roast@learningcontainer.com","firstName":"denial","lastName":"roast","phoneNumber":"33333333","userId":3}
{"emailAddress":"devid.neo@learningcontainer.com","firstName":"devid","lastName":"neo","phoneNumber":"222222222","userId":4}
{"emailAddress":"jone.mac@learningcontainer.com","firstName":"jone","lastName":"mac","phoneNumber":"111111111","userId":5}
No comments:
Post a Comment