Friday, 28 December 2018

Incremental append in SQOOP import

[cloudera@quickstart ~]$ mysql -uroot -pcloudera

mysql> show databases;
+--------------------+
| Database           |
+--------------------+
| information_schema |
| TRVL_DB            |
| cm                 |
| firehose           |
| hue                |
| metastore          |
| mysql              |
| nav                |
| navms              |
| oozie              |
| retail_db          |
| rman               |
| sentry             |
| world              |
+--------------------+
14 rows in set (0.00 sec)

mysql> use world
Database changed
mysql> show tables;
+-----------------+
| Tables_in_world |
+-----------------+
| City            |
| Country         |
| CountryLanguage |
+-----------------+
3 rows in set (0.00 sec)

mysql> select * from City;


| 4075 | Khan Yunis                         | PSE         | Khan Yunis             |     123175 |
| 4076 | Hebron                             | PSE         | Hebron                 |     119401 |
| 4077 | Jabaliya                           | PSE         | North Gaza             |     113901 |
| 4078 | Nablus                             | PSE         | Nablus                 |     100231 |
| 4079 | Rafah                              | PSE         | Rafah                  |      92020 |
+------+------------------------------------+-------------+------------------------+------------+
4079 rows in set (0.00 sec)

// here we have 4079 Rows in City table. We are going to do SQOOP import (full load)

// doing folder cleanup
[cloudera@quickstart ~]$ hdfs dfs -ls
Found 8 items
drwxr-xr-x   - cloudera cloudera          0 2018-12-27 10:17 City
drwxr-xr-x   - cloudera cloudera          0 2018-12-27 10:17 Country
drwxr-xr-x   - cloudera cloudera          0 2018-12-27 10:17 CountryLanguage
drwxr-xr-x   - cloudera cloudera          0 2018-12-27 10:34 city2018
drwxr-xr-x   - cloudera cloudera          0 2018-12-22 01:11 flume
drwxr-xr-x   - cloudera cloudera          0 2018-12-20 02:50 log
drwxr-xr-x   - cloudera cloudera          0 2018-12-26 09:18 person
drwxr-xr-x   - cloudera cloudera          0 2018-12-26 17:48 result123
[cloudera@quickstart ~]$ hdfs dfs -rm -r City;
Deleted City
[cloudera@quickstart ~]$ hdfs dfs -rm -r Country
hdfs dfDeleted Country
[cloudera@quickstart ~]$ hdfs dfs -rm -r CountryLanguage
Deleted CountryLanguage
[cloudera@quickstart ~]$ hdfs dfs -rm -r city2018
Deleted city2018


[cloudera@quickstart ~]$ sqoop list-databases --connect jdbc:mysql://localhost/ -username root -password cloudera

TRVL_DB
cm
firehose
hue
metastore
mysql
nav
navms
oozie
retail_db
rman
sentry
world


[cloudera@quickstart ~]$ sqoop list-tables --connect jdbc:mysql://localhost/world -username root -password cloudera
City
Country
CountryLanguage

// import City table into hdfs
[cloudera@quickstart ~]$ sqoop import --connect jdbc:mysql://localhost/world --username root -password cloudera --table City;

//BoundingValsQuery: SELECT MIN(`ID`), MAX(`ID`) FROM `City`
/*
Map-Reduce Framework
Map input records=4079
Map output records=4079
Input split bytes=414
*/

[cloudera@quickstart ~]$ hdfs dfs -ls /user/cloudera/
Found 5 items
drwxr-xr-x   - cloudera cloudera          0 2018-12-28 06:00 /user/cloudera/City
drwxr-xr-x   - cloudera cloudera          0 2018-12-22 01:11 /user/cloudera/flume
drwxr-xr-x   - cloudera cloudera          0 2018-12-20 02:50 /user/cloudera/log
drwxr-xr-x   - cloudera cloudera          0 2018-12-26 09:18 /user/cloudera/person
drwxr-xr-x   - cloudera cloudera          0 2018-12-26 17:48 /user/cloudera/result123


[cloudera@quickstart ~]$ hdfs dfs -ls /user/cloudera/City
Found 5 items
-rw-r--r--   1 cloudera cloudera          0 2018-12-28 06:00 /user/cloudera/City/_SUCCESS
-rw-r--r--   1 cloudera cloudera      37088 2018-12-28 06:00 /user/cloudera/City/part-m-00000
-rw-r--r--   1 cloudera cloudera      35361 2018-12-28 06:00 /user/cloudera/City/part-m-00001
-rw-r--r--   1 cloudera cloudera      35884 2018-12-28 06:00 /user/cloudera/City/part-m-00002
-rw-r--r--   1 cloudera cloudera      36148 2018-12-28 06:00 /user/cloudera/City/part-m-00003


[cloudera@quickstart ~]$ hdfs dfs -tail /user/cloudera/City/part-m-00003

4075,Khan Yunis,PSE,Khan Yunis,123175
4076,Hebron,PSE,Hebron,119401
4077,Jabaliya,PSE,North Gaza,113901
4078,Nablus,PSE,Nablus,100231
4079,Rafah,PSE,Rafah,92020

// full load import is successfully completed ( total #rows = 4079)


// insert brand new rows in City tables manually..

mysql> insert into City (ID, Name, CountryCode, District,Population) VALUES (4080,"Trichy","IND","Trichy",100000);
Query OK, 1 row affected (0.00 sec)

mysql> insert into City (ID, Name, CountryCode, District,Population) VALUES (4081,"CBE","IND","CBE",100000);
Query OK, 1 row affected (0.00 sec)

mysql> insert into City (ID, Name, CountryCode, District,Population) VALUES (4082,"MDR","IND","MDR",100001);
Query OK, 1 row affected (0.00 sec)

// new row IDs 4080,4081,4082

// incremental load
[cloudera@quickstart ~]$ sqoop import --connect jdbc:mysql://localhost/world --table City --username root --password cloudera --append --check-column "ID" --incremental append --last-value 4079

Map-Reduce Framework
Map input records=3
Map output records=3
Input split bytes=313


8/12/28 06:14:40 INFO tool.ImportTool:  --incremental append
18/12/28 06:14:40 INFO tool.ImportTool:   --check-column ID
18/12/28 06:14:40 INFO tool.ImportTool:   --last-value 4082
18/12/28 06:14:40 INFO tool.ImportTool: (Consider saving this with 'sqoop job --create')

[cloudera@quickstart ~]$ hdfs dfs -ls /user/cloudera/City/
Found 8 items
-rw-r--r--   1 cloudera cloudera          0 2018-12-28 06:00 /user/cloudera/City/_SUCCESS
-rw-r--r--   1 cloudera cloudera      37088 2018-12-28 06:00 /user/cloudera/City/part-m-00000
-rw-r--r--   1 cloudera cloudera      35361 2018-12-28 06:00 /user/cloudera/City/part-m-00001
-rw-r--r--   1 cloudera cloudera      35884 2018-12-28 06:00 /user/cloudera/City/part-m-00002
-rw-r--r--   1 cloudera cloudera      36148 2018-12-28 06:00 /user/cloudera/City/part-m-00003
-rw-r--r--   1 cloudera cloudera         32 2018-12-28 06:14 /user/cloudera/City/part-m-00004
-rw-r--r--   1 cloudera cloudera         24 2018-12-28 06:14 /user/cloudera/City/part-m-00005
-rw-r--r--   1 cloudera cloudera         24 2018-12-28 06:14 /user/cloudera/City/part-m-00006

[cloudera@quickstart ~]$ hdfs dfs -cat /user/cloudera/City/part-m-00004

4080,Trichy,IND,[ATrichy,100000
[cloudera@quickstart ~]$
[cloudera@quickstart ~]$ hdfs dfs -cat /user/cloudera/City/part-m-00005
4081,CBE,IND,CBE,100000
[cloudera@quickstart ~]$ hdfs dfs -cat /user/cloudera/City/part-m-00006
4082,MDR,IND,MDR,100001


No comments:

Post a Comment

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...