GZip, Snappy, Uncompresed compression formats supported in Hive
set the compression properties in Hive Environment
hive> SET hive.exec.compress.output=true;
hive> SET parquet.compression=GZIP;
//parquet format with Snappy compression
create table customer_parquet_snappy
stored as parquet
location '/user/cloudera/cust_parquet_snappy'
as select * from customers;
$ hdfs dfs -ls '/user/cloudera/cust_parquet_snappy';
Found 1 items
-rwxr-xr-x 1 cloudera cloudera 166435 2020-08-06 19:18 hdfs://localhost:8020/user/cloudera/cust_parquet_snappy/000000_0
$ parquet-tools head -n1 hdfs://localhost:8020/user/cloudera/cust_parquet_snappy/000000_0
id = 1
fname = Richard
lname = Hernandez
email = XXXXXXXXX
password = XXXXXXXXX
street = 6303 Heather Plaza
city = Brownsville
state = TX
zipcode = 78521
$ parquet-tools meta hdfs://localhost:8020/user/cloudera/cust_parquet_snappy/000000_0;
creator: parquet-mr version 1.5.0-cdh5.13.0 (build ${buildNumber})
file schema: hive_schema
-------------------------------------------------------------------------------------------------------------------------------------------------------------
id: OPTIONAL INT32 R:0 D:1
fname: OPTIONAL BINARY O:UTF8 R:0 D:1
lname: OPTIONAL BINARY O:UTF8 R:0 D:1
email: OPTIONAL BINARY O:UTF8 R:0 D:1
password: OPTIONAL BINARY O:UTF8 R:0 D:1
street: OPTIONAL BINARY O:UTF8 R:0 D:1
city: OPTIONAL BINARY O:UTF8 R:0 D:1
state: OPTIONAL BINARY O:UTF8 R:0 D:1
zipcode: OPTIONAL BINARY O:UTF8 R:0 D:1
row group 1: RC:12435 TS:333854
-------------------------------------------------------------------------------------------------------------------------------------------------------------
id: INT32 GZIP DO:0 FPO:4 SZ:17317/49787/2.88 VC:12435 ENC:PLAIN,RLE,BIT_PACKED
fname: BINARY GZIP DO:0 FPO:17321 SZ:10531/14512/1.38 VC:12435 ENC:PLAIN_DICTIONARY,RLE,BIT_PACKED
lname: BINARY GZIP DO:0 FPO:27852 SZ:18157/25572/1.41 VC:12435 ENC:PLAIN_DICTIONARY,RLE,BIT_PACKED
email: BINARY GZIP DO:0 FPO:46009 SZ:114/83/0.73 VC:12435 ENC:PLAIN_DICTIONARY,RLE,BIT_PACKED
password: BINARY GZIP DO:0 FPO:46123 SZ:114/83/0.73 VC:12435 ENC:PLAIN_DICTIONARY,RLE,BIT_PACKED
street: BINARY GZIP DO:0 FPO:46237 SZ:78850/186626/2.37 VC:12435 ENC:PLAIN_DICTIONARY,RLE,BIT_PACKED
city: BINARY GZIP DO:0 FPO:125087 SZ:16495/22872/1.39 VC:12435 ENC:PLAIN_DICTIONARY,RLE,BIT_PACKED
state: BINARY GZIP DO:0 FPO:141582 SZ:7213/9677/1.34 VC:12435 ENC:PLAIN_DICTIONARY,RLE,BIT_PACKED
zipcode: BINARY GZIP DO:0 FPO:148795 SZ:16847/24642/1.46 VC:12435 ENC:PLAIN_DICTIONARY,RLE,BIT_PACKED
No comments:
Post a Comment