// Have a data file in local linux
$ cat /home/cloudera/customers/customers.txt
id|fname|lname|city
100|Raja|Ravindar|Chennai
101|Suresh|Rahul|Trichy
102|Arun|Murali|Madurai
103|Kali|Das|Peravurani
104|Kalai|Selvi|Pattukottai
$ hdfs dfs -mkdir /user/cloudera/customer-ext/
// copy the local file into hdfs
$ hdfs dfs -copyFromLocal /home/cloudera/customers/customers.txt /user/cloudera/customer-ext/
// display the hdfs file
hdfs dfs -cat /user/cloudera/customer-ext/customers.txt
id|fname|lname|city
100|Raja|Ravindar|Chennai
101|Suresh|Rahul|Trichy
102|Arun|Murali|Madurai
103|Kali|Das|Peravurani
104|Kalai|Selvi|Pattukottai
hive> use ohm;
OK
Time taken: 0.022 seconds
// create external table with location specified for the data file in hdfs
// exclude header line
create external table ohm.customers
(
id int,
fname string,
lname string,
city string
)
row format delimited
fields terminated by '|'
stored as textfile
location '/user/cloudera/customer-ext/'
tblproperties ("skip.header.line.count"="1");
hive> select * from ohm.customers;
OK
100 Raja Ravindar Chennai
101 Suresh Rahul Trichy
102 Arun Murali Madurai
103 Kali Das Peravurani
104 Kalai Selvi Pattukottai
hive> describe formatted ohm.customers;
OK
# col_name data_type comment
id int
fname string
lname string
city string
# Detailed Table Information
Database: ohm
Owner: cloudera
CreateTime: Tue Aug 04 04:07:35 PDT 2020
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: hdfs://quickstart.cloudera:8020/user/cloudera/customer-ext
Table Type: EXTERNAL_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE false
EXTERNAL TRUE
numFiles 1
numRows -1
rawDataSize -1
skip.header.line.count 1
totalSize 146
transient_lastDdlTime 1596539255
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
field.delim |
serialization.format |
Time taken: 0.051 seconds, Fetched: 37 row(s)
// Removing the table in Hive (External table)
hive> drop table ohm.customers;
OK
Time taken: 0.661 seconds
// Even if we drop the table in Hive, only the metastore will be deleted
// Data file will not be deleted
$ hdfs dfs -ls /user/cloudera/customer-ext
Found 1 items
-rw-r--r-- 1 cloudera supergroup 146 2020-08-04 04:05 /user/cloudera/customer-ext/customers.txt
No comments:
Post a Comment