// I have a ohm.person table in MySQL with the following records
mysql> select * from ohm.person;
+----+-----------------+---------------------+
| id | name | last_mod_dt |
+----+-----------------+---------------------+
| 1 | Raja | 2020-08-07 01:17:17 |
| 2 | Ravi | 2020-08-07 01:17:30 |
| 3 | Kalai | 2020-08-07 01:17:34 |
| 4 | Sudha | 2020-08-07 01:17:39 |
| 5 | Priya | 2020-08-07 01:31:28 |
| 6 | Vanitha | 2020-08-07 01:31:34 |
| 7 | Kasturi | 2020-08-07 01:31:40 |
| 8 | Lakshmi | 2020-08-07 01:31:45 |
| 9 | Suriya Devi | 2020-08-07 01:31:51 |
| 10 | Nanjil Vijayan | 2020-08-07 01:40:53 |
| 11 | Elizabeth Helen | 2020-08-07 01:41:14 |
| 12 | Peter Paul | 2020-08-07 01:41:20 |
| 13 | Ravindran | 2020-08-07 01:41:35 |
+----+-----------------+---------------------+
// Create a new Sqoop Job:
$ sqoop job \
-create person_inc_job \
-- import \
-connect jdbc:mysql://localhost:3306/ohm \
-table person \
-username root \
-password cloudera \
-check-column last_mod_dt \
-incremental append \
-last-value '01:17:17' \
-target-dir /user/cloudera/person_test \
-m 1
// Display all job names already created
$ sqoop job --list
Warning: /usr/lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
20/08/07 02:05:54 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.13.0
Available jobs:
person_inc_job
// delete the target folder if exists
$ hdfs dfs -rm -r /user/cloudera/person_test
rm: `/user/cloudera/person_test': No such file or directory
// run the sqoop job for the first time
$ sqoop job --exec person_inc_job
Retrieved 13 records.
$ hdfs dfs -cat /user/cloudera/person_test/*
1,Raja,2020-08-07 01:17:17.0
2,Ravi,2020-08-07 01:17:30.0
3,Kalai,2020-08-07 01:17:34.0
4,Sudha,2020-08-07 01:17:39.0
5,Priya,2020-08-07 01:31:28.0
6,Vanitha,2020-08-07 01:31:34.0
7,Kasturi,2020-08-07 01:31:40.0
8,Lakshmi,2020-08-07 01:31:45.0
9,Suriya Devi,2020-08-07 01:31:51.0
10,Nanjil Vijayan,2020-08-07 01:40:53.0
11,Elizabeth Helen,2020-08-07 01:41:14.0
12,Peter Paul,2020-08-07 01:41:20.0
13,Ravindran,2020-08-07 01:41:35.0
// Now again add some more new records in MySQL
mysql> insert into ohm.person(name) values ('Anbu');
Query OK, 1 row affected (0.00 sec)
mysql> insert into ohm.person(name) values ('Sudha');
Query OK, 1 row affected (0.01 sec)
mysql> insert into ohm.person(name) values ('Aish');
Query OK, 1 row affected (0.00 sec)
mysql> insert into ohm.person(name) values ('Vijay');
Query OK, 1 row affected (0.00 sec)
mysql> insert into ohm.person(name) values ('Balaji');
Query OK, 1 row affected (0.01 sec)
mysql> select * from ohm.person; -- MySQL console
+----+-----------------+---------------------+
| id | name | last_mod_dt |
+----+-----------------+---------------------+
| 1 | Raja | 2020-08-07 01:17:17 |
| 2 | Ravi | 2020-08-07 01:17:30 |
| 3 | Kalai | 2020-08-07 01:17:34 |
| 4 | Sudha | 2020-08-07 01:17:39 |
| 5 | Priya | 2020-08-07 01:31:28 |
| 6 | Vanitha | 2020-08-07 01:31:34 |
| 7 | Kasturi | 2020-08-07 01:31:40 |
| 8 | Lakshmi | 2020-08-07 01:31:45 |
| 9 | Suriya Devi | 2020-08-07 01:31:51 |
| 10 | Nanjil Vijayan | 2020-08-07 01:40:53 |
| 11 | Elizabeth Helen | 2020-08-07 01:41:14 |
| 12 | Peter Paul | 2020-08-07 01:41:20 |
| 13 | Ravindran | 2020-08-07 01:41:35 | -- last value
| 14 | Anbu | 2020-08-07 02:23:25 | -- newly added records
| 15 | Sudha | 2020-08-07 02:23:29 |
| 16 | Aish | 2020-08-07 02:23:36 |
| 17 | Vijay | 2020-08-07 02:23:44 |
| 18 | Balaji | 2020-08-07 02:23:47 |
+----+-----------------+---------------------+
18 rows in set (0.00 sec)
// run the sqoop job again to fetch incremental records
$ sqoop job --exec person_inc_job
Retrieved 5 records.
// Display the imported data available in HDFS
$ hdfs dfs -cat /user/cloudera/person_test/*
1,Raja,2020-08-07 01:17:17.0
2,Ravi,2020-08-07 01:17:30.0
3,Kalai,2020-08-07 01:17:34.0
4,Sudha,2020-08-07 01:17:39.0
5,Priya,2020-08-07 01:31:28.0
6,Vanitha,2020-08-07 01:31:34.0
7,Kasturi,2020-08-07 01:31:40.0
8,Lakshmi,2020-08-07 01:31:45.0
9,Suriya Devi,2020-08-07 01:31:51.0
10,Nanjil Vijayan,2020-08-07 01:40:53.0
11,Elizabeth Helen,2020-08-07 01:41:14.0
12,Peter Paul,2020-08-07 01:41:20.0
13,Ravindran,2020-08-07 01:41:35.0 -- loaded from FULL LOAD
14,Anbu,2020-08-07 02:23:25.0 -- incremental records
15,Sudha,2020-08-07 02:23:29.0
16,Aish,2020-08-07 02:23:36.0
17,Vijay,2020-08-07 02:23:44.0
18,Balaji,2020-08-07 02:23:47.0
// To delete the sqoop job :
$ sqoop job --delete person_inc_job;
No comments:
Post a Comment