Monday, 3 August 2020

Incremental append : MySQL to HDFS

#1. In the very first phase, we are doing FULL LOAD (init version)
#2. In the 2nd phase, we are doing INCREMENTAL APPEND 
$ mysql -uroot -pcloudera -hlocalhost

mysql>use ohm; 

mysql>create table person (id int, name varchar(50));

mysql>insert into person (id,name) values (100,'Raja'),(101,'Siva'),('102','Lal');

mysql> select * from ohm.person;
+------+------+
| id   | name |
+------+------+
|  100 | Raja |
|  101 | Siva |
|  102 | Lal  |
+------+------+
3 rows in set (0.00 sec)


// Initial full load
sqoop import \
-connect jdbc:mysql://localhost:3306/ohm \
-username root \
-password cloudera \
-table person \
-target-dir  user/cloudera/person_incremental \
-split-by id


hdfs dfs -ls user/cloudera/person_incremental
Found 4 items
-rw-r--r--   1 cloudera cloudera          0 2020-08-03 04:49 user/cloudera/person_incremental/_SUCCESS
-rw-r--r--   1 cloudera cloudera          9 2020-08-03 04:49 user/cloudera/person_incremental/part-m-00000
-rw-r--r--   1 cloudera cloudera          9 2020-08-03 04:49 user/cloudera/person_incremental/part-m-00001
-rw-r--r--   1 cloudera cloudera          8 2020-08-03 04:49 user/cloudera/person_incremental/part-m-00002

$ hdfs dfs -cat user/cloudera/person_incremental/*
100,Raja
101,Siva
102,Lal


//Adding 4 more records in ohm.person
mysql>insert into ohm.person (id,name) values (103,'Nila'),(104,'Kalai'),('105','Bharani'),('106','Sara');
Query OK, 4 rows affected (0.00 sec)
Records: 4  Duplicates: 0  Warnings: 0

mysql> select * from ohm.person;
+------+---------+
| id   | name    |
+------+---------+
|  100 | Raja    | $ Existing Row
|  101 | Siva    | $ Existing Row
|  102 | Lal     | $ Existing Row
|  103 | Nila    | * Newly inserted row
|  104 | Kalai   | * Newly inserted row
|  105 | Bharani | * Newly inserted row
|  106 | Sara    | * Newly inserted row
+------+---------+



// incremental append using SQOOP Import
// feeding last-value as 102 and check-column as id
sqoop import \
-connect jdbc:mysql://localhost:3306/ohm \
-username root \
-password cloudera \
-table person \
-incremental append \
-check-column id \
-last-value 102 \
-target-dir  user/cloudera/person_incremental \
-split-by id


$ hdfs dfs -ls  user/cloudera/person_incremental
Found 8 items
-rw-r--r--   1 cloudera cloudera          0 2020-08-03 04:49 user/cloudera/person_incremental/_SUCCESS
-rw-r--r--   1 cloudera cloudera          9 2020-08-03 04:49 user/cloudera/person_incremental/part-m-00000
-rw-r--r--   1 cloudera cloudera          9 2020-08-03 04:49 user/cloudera/person_incremental/part-m-00001
-rw-r--r--   1 cloudera cloudera          8 2020-08-03 04:49 user/cloudera/person_incremental/part-m-00002
-rw-r--r--   1 cloudera cloudera          9 2020-08-03 05:10 user/cloudera/person_incremental/part-m-00003
-rw-r--r--   1 cloudera cloudera         10 2020-08-03 05:10 user/cloudera/person_incremental/part-m-00004
-rw-r--r--   1 cloudera cloudera         12 2020-08-03 05:10 user/cloudera/person_incremental/part-m-00005
-rw-r--r--   1 cloudera cloudera          9 2020-08-03 05:10 user/cloudera/person_incremental/part-m-00006


$ hdfs dfs -cat user/cloudera/person_incremental/*
100,Raja
101,Siva
102,Lal
103,Nila  -- Newly appended row
104,Kalai  -- Newly appended row
105,Bharani  -- Newly appended row
106,Sara  -- Newly appended row

No comments:

Post a Comment

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...