Import data from MySQL to HDFS using SQOOP with conditional data importing
//Conditional import using Where
sqoop import \
-connect jdbc:mysql://localhost/retail_db \
-username root \
-password cloudera \
-table customers \
-target-dir /user/cloudera/cust_nm_mary \
-delete-target-dir \
-where 'customer_fname="Mary"'
Retrieved 4741 records.
hdfs dfs -ls /user/cloudera/cust_nm_mary
Found 5 items
-rw-r--r-- 1 cloudera cloudera 0 2020-08-03 00:23 /user/cloudera/cust_nm_mary/_SUCCESS
-rw-r--r-- 1 cloudera cloudera 89617 2020-08-03 00:23 /user/cloudera/cust_nm_mary/part-m-00000
-rw-r--r-- 1 cloudera cloudera 88116 2020-08-03 00:23 /user/cloudera/cust_nm_mary/part-m-00001
-rw-r--r-- 1 cloudera cloudera 89590 2020-08-03 00:23 /user/cloudera/cust_nm_mary/part-m-00002
-rw-r--r-- 1 cloudera cloudera 90219 2020-08-03 00:23 /user/cloudera/cust_nm_mary/part-m-00003
// Look here : All the records have Mary but, here we have selected all the columns
hdfs dfs -cat /user/cloudera/cust_nm_mary/part-m-00000 | head
2,Mary,Barrett,XXXXXXXXX,XXXXXXXXX,9526 Noble Embers Ridge,Littleton,CO,80126
4,Mary,Jones,XXXXXXXXX,XXXXXXXXX,8324 Little Common,San Marcos,CA,92069
6,Mary,Smith,XXXXXXXXX,XXXXXXXXX,3151 Sleepy Quail Promenade,Passaic,NJ,07055
9,Mary,Perez,XXXXXXXXX,XXXXXXXXX,3616 Quaking Street,Caguas,PR,00725
11,Mary,Huffman,XXXXXXXXX,XXXXXXXXX,3169 Stony Woods,Caguas,PR,00725
13,Mary,Baldwin,XXXXXXXXX,XXXXXXXXX,7922 Iron Oak Gardens,Caguas,PR,00725
17,Mary,Robinson,XXXXXXXXX,XXXXXXXXX,1325 Noble Pike,Taylor,MI,48180
20,Mary,Ellis,XXXXXXXXX,XXXXXXXXX,4703 Old Route,West New York,NJ,07093
24,Mary,Smith,XXXXXXXXX,XXXXXXXXX,9417 Emerald Towers,Caguas,PR,00725
27,Mary,Vincent,XXXXXXXXX,XXXXXXXXX,1768 Sleepy Zephyr Place,Caguas,PR,00725
// Retrieve data of specific columns only along with where condition
sqoop import \
-connect jdbc:mysql://localhost/retail_db \
-username root \
-password cloudera \
-table customers \
-target-dir /user/cloudera/cust_col_sel \
-delete-target-dir \
-columns "customer_fname, customer_lname, customer_city" \
-where 'customer_fname="Mary"'
hdfs dfs -ls /user/cloudera/cust_col_sel
Found 5 items
-rw-r--r-- 1 cloudera cloudera 0 2020-08-03 00:43 /user/cloudera/cust_col_sel/_SUCCESS
-rw-r--r-- 1 cloudera cloudera 24361 2020-08-03 00:43 /user/cloudera/cust_col_sel/part-m-00000
-rw-r--r-- 1 cloudera cloudera 23816 2020-08-03 00:43 /user/cloudera/cust_col_sel/part-m-00001
-rw-r--r-- 1 cloudera cloudera 24280 2020-08-03 00:43 /user/cloudera/cust_col_sel/part-m-00002
-rw-r--r-- 1 cloudera cloudera 24254 2020-08-03 00:43 /user/cloudera/cust_col_sel/part-m-00003
// Retrieved First Name, Last name, city only
$ hdfs dfs -cat /user/cloudera/cust_col_sel/part-m-00000 | head
Mary,Barrett,Littleton
Mary,Jones,San Marcos
Mary,Smith,Passaic
Mary,Perez,Caguas
Mary,Huffman,Caguas
Mary,Baldwin,Caguas
Mary,Robinson,Taylor
Mary,Ellis,West New York
Mary,Smith,Caguas
Mary,Vincent,Caguas
mysql> select customer_fname, customer_lname, customer_city from retail_db.customers where customer_id between 100 and 110;
+----------------+----------------+-------------------+
| customer_fname | customer_lname | customer_city |
+----------------+----------------+-------------------+
| George | Barrett | Caguas |
| Mary | Briggs | Dorchester Center |
| Gregory | Smith | Caguas |
| Mary | Williams | Broomfield |
| Debra | Wood | Saint Paul |
| Mary | Smith | Caguas |
| Lauren | Freeman | Napa |
| Mary | Weaver | Caguas |
| Joshua | Smith | Bronx |
| Mary | Thompson | Caguas |
| David | Miranda | Beaverton |
+----------------+----------------+-------------------+
// Cannot specify --query and --table together.
//AND \$CONDITIONS must be included as part of the SELECT query
// Retrieve data based on the select query
sqoop import \
-connect jdbc:mysql://localhost/retail_db \
-username root \
-password cloudera \
-target-dir /user/cloudera/cust_qry_rstl \
-delete-target-dir \
-query "select customer_id,customer_fname, customer_lname, customer_city from retail_db.customers where customer_id between 100 and 110 AND \$CONDITIONS" \
-split-by customer_id
hdfs dfs -ls /user/cloudera/cust_qry_rstl
Found 5 items
-rw-r--r-- 1 cloudera cloudera 0 2020-08-03 01:11 /user/cloudera/cust_qry_rstl/_SUCCESS
-rw-r--r-- 1 cloudera cloudera 85 2020-08-03 01:11 /user/cloudera/cust_qry_rstl/part-m-00000
-rw-r--r-- 1 cloudera cloudera 77 2020-08-03 01:11 /user/cloudera/cust_qry_rstl/part-m-00001
-rw-r--r-- 1 cloudera cloudera 47 2020-08-03 01:11 /user/cloudera/cust_qry_rstl/part-m-00002
-rw-r--r-- 1 cloudera cloudera 76 2020-08-03 01:11 /user/cloudera/cust_qry_rstl/part-m-00003
// display the data written in HDFS
$ hdfs dfs -cat /user/cloudera/cust_qry_rstl/*
100,George,Barrett,Caguas
101,Mary,Briggs,Dorchester Center
102,Gregory,Smith,Caguas
103,Mary,Williams,Broomfield
104,Debra,Wood,Saint Paul
105,Mary,Smith,Caguas
106,Lauren,Freeman,Napa
107,Mary,Weaver,Caguas
108,Joshua,Smith,Bronx
109,Mary,Thompson,Caguas
110,David,Miranda,Beaverton
No comments:
Post a Comment