// Create a new database in Hive
hive> create database retail_db;
// create a new table with the data taken from some other table of some other database
hive> create table retail_db.customer_temp as select customer_id as id, customer_fname as fname, customer_lname as lname from ohm.customers;
hive> use retail_db;
OK
Time taken: 0.014 seconds
hive> show tables;
OK
customer_temp
Time taken: 0.01 seconds, Fetched: 1 row(s)
hive> describe customer_temp;
OK
id int
fname string
lname string
Time taken: 0.05 seconds, Fetched: 3 row(s)
hive> describe formatted customer_temp;
OK
# col_name data_type comment
id int
fname string
lname string
# Detailed Table Information
Database: retail_db
Owner: cloudera
CreateTime: Fri Aug 07 05:44:14 PDT 2020
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: hdfs://quickstart.cloudera:8020/user/hive/warehouse/retail_db.db/customer_temp
Table Type: MANAGED_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE true
numFiles 1
numRows 12435
rawDataSize 211716
totalSize 224151
transient_lastDdlTime 1596804254
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
Time taken: 0.066 seconds, Fetched: 33 row(s)
// Renaming the database is not applicable in Hive (But work around is there)
ALTER DATABASE retail_db RENAME TO retail_new; // not possible in CDH 5.13
// Rename the table
hive> alter table customer_temp rename to cust;
OK
Time taken: 0.114 seconds
hive> show tables;
OK
cust
Time taken: 0.018 seconds, Fetched: 1 row(s)
hive> describe cust;
OK
id int
fname string
lname string
Time taken: 0.057 seconds, Fetched: 3 row(s)
// How to rename the column?
hive> alter table cust change id custid int;
OK
Time taken: 0.128 seconds
hive> describe cust;
OK
custid int
fname string
lname string
Time taken: 0.051 seconds, Fetched: 3 row(s)
// How to replace column names for set of columns?
hive> alter table cust replace columns (id int, firstname string, lastname string);
OK
Time taken: 0.088 seconds
hive> describe cust;
OK
id int
firstname string
lastname string
Time taken: 0.048 seconds, Fetched: 3 row(s)
// How to add new columns to the existing table?
hive> alter table cust add columns (age int, email string);
OK
Time taken: 0.069 seconds
hive> describe cust;
OK
id int
firstname string
lastname string
age int -------> New column
email string -------> New column
Time taken: 0.047 seconds, Fetched: 5 row(s)
// Newly created columns will be having NULL values
hive> select * from cust limit 10;
1 Richard Hernandez NULL NULL
2 Mary Barrett NULL NULL
3 Ann Smith NULL NULL
4 Mary Jones NULL NULL
5 Robert Hudson NULL NULL
6 Mary Smith NULL NULL
7 Melissa Wilcox NULL NULL
8 Megan Smith NULL NULL
9 Mary Perez NULL NULL
10 Melissa Smith NULL NULL
Time taken: 0.063 seconds, Fetched: 10 row(s)
// How to change the file format of existing table?
hive> alter table cust set fileformat parquet;
OK
Time taken: 0.089 seconds
SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
hive> alter table cust set fileformat avro;
OK
Time taken: 0.108 seconds
hive> describe formatted cust;
SerDe Library: org.apache.hadoop.hive.serde2.avro.AvroSerDe
InputFormat: org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat
Location: hdfs://quickstart.cloudera:8020/user/hive/warehouse/retail_db.db/cust
// How to change the location of a table?
hive> alter table cust set location '/user/cloudera/customer';
OK
hive> describe formatted cust;
Location: hdfs://quickstart.cloudera:8020/user/cloudera/customer
No comments:
Post a Comment