Monday, 3 August 2020

NULL Handling while SQOOP import

//Handling nulls while doing SQOOP import

use retail_db;

CREATE TABLE suppliers
( id int NOT NULL,
  name char(50),
  age int
);

mysql> describe suppliers;
+-------+----------+------+-----+---------+-------+
| Field | Type     | Null | Key | Default | Extra |
+-------+----------+------+-----+---------+-------+
| id    | int(11)  | NO   |     | NULL    |       |
| name  | char(50) | YES  |     | NULL    |       |
| age   | int(11)  | YES  |     | NULL    |       |
+-------+----------+------+-----+---------+-------+

insert into retail_db.suppliers (id,name,age) values (100,NULL,NULL),(101,'Raja',NULL),(103,NULL,NULL),(104,'Manoj',NULL), (105,NULL,30), (106,NULL,NULL);
 
mysql>  select * from retail_db.suppliers;
+-----+-------+------+
| id  | name  | age  |
+-----+-------+------+
| 100 | NULL  | NULL |
| 101 | Raja  | NULL |
| 103 | NULL  | NULL |
| 104 | Manoj | NULL |
| 105 | NULL  |   30 |
| 106 | NULL  | NULL |
+-----+-------+------+

// String column which is having NULL will be replaced with 'xxxx'
// Non String column which has NULL will be replaced with 'yyy'

sqoop import \
-connect jdbc:mysql://localhost/retail_db \
-username root \
-password cloudera \
-table suppliers \
-target-dir /user/cloudera/suppliers_null_tr \
-delete-target-dir \
-null-string "xxxx" \
-null-non-string 'yyy' \
-split-by id

Retrieved 6 records.


 hdfs dfs -ls /user/cloudera/suppliers_null_tr 
Found 5 items
-rw-r--r--   1 cloudera cloudera          0 2020-08-03 02:02 /user/cloudera/suppliers_null_tr/_SUCCESS
-rw-r--r--   1 cloudera cloudera         28 2020-08-03 02:02 /user/cloudera/suppliers_null_tr/part-m-00000
-rw-r--r--   1 cloudera cloudera         14 2020-08-03 02:02 /user/cloudera/suppliers_null_tr/part-m-00001
-rw-r--r--   1 cloudera cloudera         15 2020-08-03 02:02 /user/cloudera/suppliers_null_tr/part-m-00002
-rw-r--r--   1 cloudera cloudera         26 2020-08-03 02:02 /user/cloudera/suppliers_null_tr/part-m-00003

$ hdfs dfs -cat /user/cloudera/suppliers_null_tr/* 
100,xxxx,yyy
101,Raja,yyy
103,xxxx,yyy
104,Manoj,yyy
105,xxxx,30
106,xxxx,yyy


No comments:

Post a Comment

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...