Sunday, 2 August 2020

Create Avro, Parquet, Sequence files using the SQOOP output

Import data from MySQL and save the output as : Avro, Parquet, Sequence files

// Save as Avro file
$ sqoop import \
-connect jdbc:mysql://localhost/retail_db \
-username root \
-password cloudera \
-table customers \
-target-dir /user/cloudera/cust_avro \
-as-avrodatafile \
-delete-target-dir
 

hdfs dfs -ls /user/cloudera/cust_avro
Found 5 items
-rw-r--r--   1 cloudera cloudera          0 2020-08-02 22:37 /user/cloudera/cust_avro/_SUCCESS
-rw-r--r--   1 cloudera cloudera     258086 2020-08-02 22:37 /user/cloudera/cust_avro/part-m-00000.avro
-rw-r--r--   1 cloudera cloudera     257862 2020-08-02 22:37 /user/cloudera/cust_avro/part-m-00001.avro
-rw-r--r--   1 cloudera cloudera     259118 2020-08-02 22:37 /user/cloudera/cust_avro/part-m-00002.avro
-rw-r--r--   1 cloudera cloudera     260893 2020-08-02 22:37 /user/cloudera/cust_avro/part-m-00003.avro


//Save as parquet file
$ sqoop import \
-connect jdbc:mysql://localhost/retail_db \
-username root \
-password cloudera \
-table customers \
-target-dir /user/cloudera/cust_parquet \
-as-parquetfile \
-delete-target-dir


hdfs dfs -ls /user/cloudera/cust_parquet
Found 6 items
drwxr-xr-x   - cloudera cloudera          0 2020-08-02 22:38 /user/cloudera/cust_parquet/.metadata
drwxr-xr-x   - cloudera cloudera          0 2020-08-02 22:39 /user/cloudera/cust_parquet/.signals
-rw-r--r--   1 cloudera cloudera      88944 2020-08-02 22:39 /user/cloudera/cust_parquet/0395a40c-1223-45e3-bff7-2bc061edfb5c.parquet
-rw-r--r--   1 cloudera cloudera      89047 2020-08-02 22:39 /user/cloudera/cust_parquet/10c94461-e193-49fc-9f4e-c0c3be93c7e3.parquet
-rw-r--r--   1 cloudera cloudera      88762 2020-08-02 22:39 /user/cloudera/cust_parquet/1ac81c8a-7422-402f-ade5-01173f580a1a.parquet
-rw-r--r--   1 cloudera cloudera      89163 2020-08-02 22:39 /user/cloudera/cust_parquet/505b1ef1-fdde-4e25-9acb-ead834a6d4af.parquet



//Save as sequence file
$ sqoop import \
-connect jdbc:mysql://localhost/retail_db \
-username root \
-password cloudera \
-table customers \
-target-dir /user/cloudera/cust_sequence \
-as-sequencefile \
-delete-target-dir


hdfs dfs -ls /user/cloudera/cust_sequence
Found 5 items
-rw-r--r--   1 cloudera cloudera          0 2020-08-02 22:41 /user/cloudera/cust_sequence/_SUCCESS
-rw-r--r--   1 cloudera cloudera     315998 2020-08-02 22:41 /user/cloudera/cust_sequence/part-m-00000
-rw-r--r--   1 cloudera cloudera     315711 2020-08-02 22:41 /user/cloudera/cust_sequence/part-m-00001
-rw-r--r--   1 cloudera cloudera     315814 2020-08-02 22:41 /user/cloudera/cust_sequence/part-m-00002
-rw-r--r--   1 cloudera cloudera     315633 2020-08-02 22:41 /user/cloudera/cust_sequence/part-m-00003

No comments:

Post a Comment

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...