//Handling nulls while doing SQOOP import
use retail_db;
CREATE TABLE suppliers
( id int NOT NULL,
name char(50),
age int
);
mysql> describe suppliers;
+-------+----------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+-------+----------+------+-----+---------+-------+
| id | int(11) | NO | | NULL | |
| name | char(50) | YES | | NULL | |
| age | int(11) | YES | | NULL | |
+-------+----------+------+-----+---------+-------+
insert into retail_db.suppliers (id,name,age) values (100,NULL,NULL),(101,'Raja',NULL),(103,NULL,NULL),(104,'Manoj',NULL), (105,NULL,30), (106,NULL,NULL);
mysql> select * from retail_db.suppliers;
+-----+-------+------+
| id | name | age |
+-----+-------+------+
| 100 | NULL | NULL |
| 101 | Raja | NULL |
| 103 | NULL | NULL |
| 104 | Manoj | NULL |
| 105 | NULL | 30 |
| 106 | NULL | NULL |
+-----+-------+------+
// String column which is having NULL will be replaced with 'xxxx'
// Non String column which has NULL will be replaced with 'yyy'
sqoop import \
-connect jdbc:mysql://localhost/retail_db \
-username root \
-password cloudera \
-table suppliers \
-target-dir /user/cloudera/suppliers_null_tr \
-delete-target-dir \
-null-string "xxxx" \
-null-non-string 'yyy' \
-split-by id
Retrieved 6 records.
hdfs dfs -ls /user/cloudera/suppliers_null_tr
Found 5 items
-rw-r--r-- 1 cloudera cloudera 0 2020-08-03 02:02 /user/cloudera/suppliers_null_tr/_SUCCESS
-rw-r--r-- 1 cloudera cloudera 28 2020-08-03 02:02 /user/cloudera/suppliers_null_tr/part-m-00000
-rw-r--r-- 1 cloudera cloudera 14 2020-08-03 02:02 /user/cloudera/suppliers_null_tr/part-m-00001
-rw-r--r-- 1 cloudera cloudera 15 2020-08-03 02:02 /user/cloudera/suppliers_null_tr/part-m-00002
-rw-r--r-- 1 cloudera cloudera 26 2020-08-03 02:02 /user/cloudera/suppliers_null_tr/part-m-00003
$ hdfs dfs -cat /user/cloudera/suppliers_null_tr/*
100,xxxx,yyy
101,Raja,yyy
103,xxxx,yyy
104,Manoj,yyy
105,xxxx,30
106,xxxx,yyy
No comments:
Post a Comment