// Here we are going to create a bucketed table and populate the data into it using orders table in Hive
CREATE EXTERNAL TABLE ohm.orders_bucketed
(
orderid int,
date string,
custid int,
status string
)
clustered by (custid) into 10 buckets
row format delimited
fields terminated by ',';
Location: hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed
// while doing partition we wont add the partition column in the structure
// But in bucketing we will be adding bucket column in the table structure
//enable the bucketing property
set hive.enforce.bucketing = true;
// populate the data taken from orders table
hive> insert overwrite table ohm.orders_bucketed select order_id, order_date, order_customer_id, order_status from ohm.orders;
number of mappers: 1; number of reducers: 10
Loading data to table ohm.orders_bucketed
Table ohm.orders_bucketed stats: [numFiles=10, numRows=68883, totalSize=2448880, rawDataSize=2379997]
hdfs dfs -ls hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed
Found 10 items
-rwxrwxrwx 1 cloudera supergroup 246834 2020-08-08 02:12 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed/000000_0
-rwxrwxrwx 1 cloudera supergroup 242162 2020-08-08 02:12 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed/000001_0
-rwxrwxrwx 1 cloudera supergroup 246462 2020-08-08 02:12 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed/000002_0
-rwxrwxrwx 1 cloudera supergroup 244385 2020-08-08 02:12 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed/000003_0
-rwxrwxrwx 1 cloudera supergroup 243974 2020-08-08 02:12 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed/000004_0
-rwxrwxrwx 1 cloudera supergroup 242629 2020-08-08 02:12 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed/000005_0
-rwxrwxrwx 1 cloudera supergroup 241932 2020-08-08 02:12 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed/000006_0
-rwxrwxrwx 1 cloudera supergroup 247100 2020-08-08 02:12 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed/000007_0
-rwxrwxrwx 1 cloudera supergroup 248135 2020-08-08 02:12 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed/000008_0
-rwxrwxrwx 1 cloudera supergroup 245267 2020-08-08 02:12 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/orders_bucketed/000009_0
No comments:
Post a Comment