External table creation in Hive
$ cat > sample_1.csv
18282782,NW
1929SEGH2,BSTN
172u8562,PLA
121232,JHK
3443453,AG
$ hdfs dfs -put sample_1.csv /sankara/
// Display the content of a file which is already available in hdfs
hdfs dfs -cat hdfs://localhost:8020/sankara/sample_1.csv
18282782,NW
1929SEGH2,BSTN
172u8562,PLA
121232,JHK
3443453,AG
// Create a schema in hive
create schema if not exists ohm;
use ohm;
// do not specify csv file in the location. just specify only the folder name
// make a table in ohm.
create external table if not exists sample_1
(
id string,
code string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION 'hdfs://localhost:8020/sankara/'
// display the structure
hive> describe ohm.sample_1;
OK
id string
code string
Time taken: 0.122 seconds, Fetched: 2 row(s)
hive> select * from sample_1;
OK
18282782 NW
1929SEGH2 BSTN
172u8562 PLA
121232 JHK
3443453 AG
Time taken: 1.009 seconds, Fetched: 5 row(s)
hive> describe formatted ohm.sample_1;
OK
# col_name data_type comment
id string
code string
# Detailed Table Information
Database: ohm
Owner: cloudera
CreateTime: Sat Aug 01 00:07:36 PDT 2020
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: hdfs://localhost:8020/sankara
Table Type: EXTERNAL_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE false
EXTERNAL TRUE
numFiles 1
numRows -1
rawDataSize -1
totalSize 62
transient_lastDdlTime 1596265656
# Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
field.delim ,
serialization.format ,
Time taken: 0.099 seconds, Fetched: 34 row(s)
hive> select * from sample_1 where code like '%A%';
OK
172u8562 PLA
3443453 AG
No comments:
Post a Comment