Tuesday, 4 August 2020

Internal or Managed Tables in Hive

$ hive

// Display all the databases available in Hive
hive> show databases;
OK
default
ohm


hive>create database if not exists testdb;

hive>create database if not exists moviedb comment "All about Movies"

hive>create database accountsdb location '/user/cloudera'


hive> show databases;
OK
accountsdb
default
moviedb
ohm
testdb


hive> show databases like 'm*';
OK
moviedb
Time taken: 0.015 seconds, Fetched: 1 row(s)


hive> describe database ohm;
OK
ohm hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db cloudera USER
Time taken: 0.009 seconds, Fetched: 1 row(s)

hive> describe database extended ohm;
OK
ohm hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db cloudera USER
Time taken: 0.01 seconds, Fetched: 1 row(s)

hive> describe database accountsdb;
OK
accountsdb hdfs://quickstart.cloudera:8020/user/cloudera cloudera USER


hive> use ohm;
OK
Time taken: 0.016 seconds

hive> show tables;
OK
cust_from_mysql
employee
sample_1
sare
tbl_avro
tbl_header_removed
tbl_parquet
tblfirstmiddlelastnames
tblgreatest
tmptbl_fullname
Time taken: 0.066 seconds, Fetched: 10 row(s)


Create managed table (Internal table)

CREATE TABLE ohm.customers (id int,fname string,lname string,city string) row format delimited fields terminated by "|" stored as textfile;


hive> describe ohm.customers;
OK
id                  int                                      
fname                string                                  
lname                string                                  
city                string                                  
Time taken: 0.215 seconds, Fetched: 4 row(s)


// get additional details about a table
hive> describe extended ohm.customers;
OK
id                  int                                      
fname                string                                  
lname                string                                  
city                string                                  
 
Detailed Table Information Table(tableName:customers, dbName:ohm, owner:cloudera, createTime:1596530419, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:int, comment:null), FieldSchema(name:fname, type:string, comment:null), FieldSchema(name:lname, type:string, comment:null), FieldSchema(name:city, type:string, comment:null)], location:hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/customers, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=|, field.delim=|}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1596530419}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
Time taken: 0.307 seconds, Fetched: 6 row(s)




// format the output
hive> describe formatted ohm.customers;
OK
# col_name            data_type            comment             
 
id                  int                                      
fname                string                                  
lname                string                                  
city                string                                  
 
# Detailed Table Information  
Database:            ohm                   
Owner:              cloudera             
CreateTime:          Tue Aug 04 01:40:19 PDT 2020  
LastAccessTime:      UNKNOWN               
Protect Mode:        None                 
Retention:          0                     
Location:            hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/customers  
Table Type:          MANAGED_TABLE         
Table Parameters:  
transient_lastDdlTime 1596530419          
 
# Storage Information  
SerDe Library:      org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe  
InputFormat:        org.apache.hadoop.mapred.TextInputFormat  
OutputFormat:        org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat  
Compressed:          No                   
Num Buckets:        -1                   
Bucket Columns:      []                   
Sort Columns:        []                   
Storage Desc Params:  
field.delim          |                   
serialization.format |                   
Time taken: 0.36 seconds, Fetched: 30 row(s)


// Create a text file with the following contents
$ cat /home/cloudera/customers/customers.txt
id|fname|lname|city
100|Raja|Ravindar|Chennai
101|Suresh|Rahul|Trichy
102|Arun|Murali|Madurai
103|Kali|Das|Peravurani
104|Kalai|Selvi|Pattukottai

// load local linux file into hive internal table: 
// Don't specify the file name here but specify the folder where the data file is located.

hive> load data local inpath "/home/cloudera/customers/" overwrite into table ohm.customers;
Loading data to table ohm.customers
Table ohm.customers stats: [numFiles=1, numRows=0, totalSize=146, rawDataSize=0]
OK
Time taken: 1.796 seconds

// Display the records after data load
hive> select * from ohm.customers;
OK
NULL fname lname city
100 Raja Ravindar Chennai
101 Suresh Rahul Trichy
102 Arun Murali Madurai
103 Kali Das Peravurani
104 Kalai Selvi Pattukottai
Time taken: 0.184 seconds, Fetched: 6 row(s)



hdfs dfs -ls hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/customers
Found 1 items
-rwxrwxrwx   1 cloudera supergroup        146 2020-08-04 01:50 hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/customers/customers.txt


// Removing the internal table
hive> drop table customers;
OK
Time taken: 2.061 seconds

// Try to list it in warehouse folder. Here the internal table data file also deleted
$ hdfs dfs -ls hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/customers
ls: `hdfs://quickstart.cloudera:8020/user/hive/warehouse/ohm.db/customers': No such file or directory


No comments:

Post a Comment

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...