Tuesday, 4 August 2020

External table creation in Hive

// Have a data file in local linux
$ cat /home/cloudera/customers/customers.txt
id|fname|lname|city
100|Raja|Ravindar|Chennai
101|Suresh|Rahul|Trichy
102|Arun|Murali|Madurai
103|Kali|Das|Peravurani
104|Kalai|Selvi|Pattukottai

$ hdfs dfs -mkdir /user/cloudera/customer-ext/

// copy the local file into hdfs
$ hdfs dfs -copyFromLocal /home/cloudera/customers/customers.txt /user/cloudera/customer-ext/

// display the hdfs file
hdfs dfs -cat /user/cloudera/customer-ext/customers.txt
id|fname|lname|city
100|Raja|Ravindar|Chennai
101|Suresh|Rahul|Trichy
102|Arun|Murali|Madurai
103|Kali|Das|Peravurani
104|Kalai|Selvi|Pattukottai

hive> use ohm;
OK
Time taken: 0.022 seconds

// create external table with location specified for the data file in hdfs
// exclude header line
create external table ohm.customers
(
id int,
fname string,
lname string,
city string
)
row format delimited
fields terminated by '|'
stored as textfile
location '/user/cloudera/customer-ext/'
tblproperties ("skip.header.line.count"="1");


hive> select * from ohm.customers;
OK
100 Raja Ravindar Chennai
101 Suresh Rahul Trichy
102 Arun Murali Madurai
103 Kali Das Peravurani
104 Kalai Selvi Pattukottai


hive> describe formatted ohm.customers;
OK
# col_name            data_type            comment             
 
id                  int                                      
fname                string                                  
lname                string                                  
city                string                                  
 
# Detailed Table Information  
Database:            ohm                   
Owner:              cloudera             
CreateTime:          Tue Aug 04 04:07:35 PDT 2020  
LastAccessTime:      UNKNOWN               
Protect Mode:        None                 
Retention:          0                     
Location:            hdfs://quickstart.cloudera:8020/user/cloudera/customer-ext  
Table Type:          EXTERNAL_TABLE       
Table Parameters:  
COLUMN_STATS_ACCURATE false               
EXTERNAL            TRUE                
numFiles            1                   
numRows              -1                  
rawDataSize          -1                  
skip.header.line.count 1                   
totalSize            146                 
transient_lastDdlTime 1596539255          
 
# Storage Information  
SerDe Library:      org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe  
InputFormat:        org.apache.hadoop.mapred.TextInputFormat  
OutputFormat:        org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat  
Compressed:          No                   
Num Buckets:        -1                   
Bucket Columns:      []                   
Sort Columns:        []                   
Storage Desc Params:  
field.delim          |                   
serialization.format |                   
Time taken: 0.051 seconds, Fetched: 37 row(s)


// Removing the table in Hive (External table)

hive> drop table ohm.customers;
OK
Time taken: 0.661 seconds


// Even if we drop the table in Hive, only the metastore will be deleted
// Data file will not be deleted
$ hdfs dfs -ls /user/cloudera/customer-ext
Found 1 items
-rw-r--r--   1 cloudera supergroup        146 2020-08-04 04:05 /user/cloudera/customer-ext/customers.txt


No comments:

Post a Comment

Flume - Simple Demo

// create a folder in hdfs : $ hdfs dfs -mkdir /user/flumeExa // Create a shell script which generates : Hadoop in real world <n>...