// Import Data from MySQL to HDFS using SQOOP
//mapper option not specified - so default mapper is 4
// target-dir is not specified - so it will be imported in user/cloudera folder
sqoop import \
-connect jdbc:mysql://localhost:3306/retail_db \
-username root \
-password cloudera \
-table customers;
SELECT t.* FROM `customers` AS t LIMIT 1
Num splits: 4 from: 1 to: 12435
Retrieved 12435 records.
URL to check the log : http://quickstart.cloudera:8088/proxy/application_1596428369104_0001/
Result in hdfs:
hdfs dfs -ls /user/cloudera/customers
Found 5 items
-rw-r--r-- 1 cloudera cloudera 0 2020-08-02 21:42 /user/cloudera/customers/_SUCCESS
-rw-r--r-- 1 cloudera cloudera 237145 2020-08-02 21:42 /user/cloudera/customers/part-m-00000
-rw-r--r-- 1 cloudera cloudera 237965 2020-08-02 21:42 /user/cloudera/customers/part-m-00001
-rw-r--r-- 1 cloudera cloudera 238092 2020-08-02 21:42 /user/cloudera/customers/part-m-00002
-rw-r--r-- 1 cloudera cloudera 240323 2020-08-02 21:42 /user/cloudera/customers/part-m-00003
mysql> select count(1) from customers;
+----------+
| count(1) |
+----------+
| 12435 |
+----------+
1 row in set (0.02 sec)
// Line count checking in hdfs files
hdfs dfs -cat /user/cloudera/customers/* | wc -l
12435
// get 1st 10 lines..
[cloudera@quickstart ~]$ hdfs dfs -cat /user/cloudera/customers/part-m-00000 | head
1,Richard,Hernandez,XXXXXXXXX,XXXXXXXXX,6303 Heather Plaza,Brownsville,TX,78521
2,Mary,Barrett,XXXXXXXXX,XXXXXXXXX,9526 Noble Embers Ridge,Littleton,CO,80126
3,Ann,Smith,XXXXXXXXX,XXXXXXXXX,3422 Blue Pioneer Bend,Caguas,PR,00725
4,Mary,Jones,XXXXXXXXX,XXXXXXXXX,8324 Little Common,San Marcos,CA,92069
5,Robert,Hudson,XXXXXXXXX,XXXXXXXXX,10 Crystal River Mall ,Caguas,PR,00725
6,Mary,Smith,XXXXXXXXX,XXXXXXXXX,3151 Sleepy Quail Promenade,Passaic,NJ,07055
7,Melissa,Wilcox,XXXXXXXXX,XXXXXXXXX,9453 High Concession,Caguas,PR,00725
8,Megan,Smith,XXXXXXXXX,XXXXXXXXX,3047 Foggy Forest Plaza,Lawrence,MA,01841
9,Mary,Perez,XXXXXXXXX,XXXXXXXXX,3616 Quaking Street,Caguas,PR,00725
10,Melissa,Smith,XXXXXXXXX,XXXXXXXXX,8598 Harvest Beacon Plaza,Stafford,VA,22554
// increase the number of mappers to 8:
sqoop import \
-connect jdbc:mysql://localhost:3306/retail_db \
-username root \
-password cloudera \
-table customers
-m 8
// We get the below error
Output directory hdfs://quickstart.cloudera:8020/user/cloudera/customers already exists
// delete the output folder
$ hdfs dfs -rm -r hdfs://quickstart.cloudera:8020/user/cloudera/customers
Deleted hdfs://quickstart.cloudera:8020/user/cloudera/customers
$ hdfs dfs -ls hdfs://quickstart.cloudera:8020/user/cloudera/customers
Found 9 items
-rw-r--r-- 1 cloudera cloudera 0 2020-08-02 21:58 hdfs://quickstart.cloudera:8020/user/cloudera/customers/_SUCCESS
-rw-r--r-- 1 cloudera cloudera 118147 2020-08-02 21:57 hdfs://quickstart.cloudera:8020/user/cloudera/customers/part-m-00000
-rw-r--r-- 1 cloudera cloudera 119075 2020-08-02 21:57 hdfs://quickstart.cloudera:8020/user/cloudera/customers/part-m-00001
-rw-r--r-- 1 cloudera cloudera 119057 2020-08-02 21:57 hdfs://quickstart.cloudera:8020/user/cloudera/customers/part-m-00002
-rw-r--r-- 1 cloudera cloudera 118831 2020-08-02 21:57 hdfs://quickstart.cloudera:8020/user/cloudera/customers/part-m-00003
-rw-r--r-- 1 cloudera cloudera 118798 2020-08-02 21:57 hdfs://quickstart.cloudera:8020/user/cloudera/customers/part-m-00004
-rw-r--r-- 1 cloudera cloudera 119294 2020-08-02 21:57 hdfs://quickstart.cloudera:8020/user/cloudera/customers/part-m-00005
-rw-r--r-- 1 cloudera cloudera 119892 2020-08-02 21:58 hdfs://quickstart.cloudera:8020/user/cloudera/customers/part-m-00006
-rw-r--r-- 1 cloudera cloudera 120431 2020-08-02 21:58 hdfs://quickstart.cloudera:8020/user/cloudera/customers/part-m-00007
// reduced number of mappers as 2
//delete target directory if already exists
sqoop import \
-connect jdbc:mysql://localhost:3306/retail_db \
-username root \
-password cloudera \
-table customers \
-m 2 \
-target-dir /user/cloudera/customer_new \
-delete-target-dir
// only 2 mappers
$ hdfs dfs -ls /user/cloudera/customer_new
Found 3 items
-rw-r--r-- 1 cloudera cloudera 0 2020-08-02 22:12 /user/cloudera/customer_new/_SUCCESS
-rw-r--r-- 1 cloudera cloudera 475038 2020-08-02 22:12 /user/cloudera/customer_new/part-m-00000
-rw-r--r-- 1 cloudera cloudera 478487 2020-08-02 22:12 /user/cloudera/customer_new/part-m-00001
//re-run the same sqoop command again we wont get any error
//as target directory already exists