// we have single column : FullName with multiple commas (,) inside
// FirstName, MiddleName, LastName
input file : col1tocol3.txt:
------------------------------
id|fullname
100|Sankara,Narayanan,Ramaiah
101|Anbu,Sudha,Dhanukkodi
102|Naga,Rathinam,Dhanukkodi
103|Bala,Subramanian,Ramaiah
105|Seetha,Raman,Balasubramanian
106|Subramanian,Nagappan,Pallathur
107|Swaroop,Venkatesh,Bangalore
hdfs dfs -cat hdfs://localhost:8020/sankara/fullnameexa/fullnamefile.txt
id|fullname
100|Sankara,Narayanan,Ramaiah
101|Anbu,Sudha,Dhanukkodi
102|Naga,Rathinam,Dhanukkodi
103|Bala,Subramanian,Ramaiah
105|Seetha,Raman,Balasubramanian
106|Subramanian,Nagappan,Pallathur
107|Swaroop,Venkatesh,Bangalore
CREATE EXTERNAL TABLE IF NOT EXISTS ohm.tmptbl_fullname
(
id int,
fullname string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS TEXTFILE
LOCATION 'hdfs://localhost:8020/sankara/fullnameexa/'
TBLPROPERTIES("skip.header.line.count"="1");
hive> select * from ohm.tmptbl_fullname;
OK
100 Sankara,Narayanan,Ramaiah
101 Anbu,Sudha,Dhanukkodi
102 Naga,Rathinam,Dhanukkodi
103 Bala,Subramanian,Ramaiah
105 Seetha,Raman,Balasubramanian
106 Subramanian,Nagappan,Pallathur
107 Swaroop,Venkatesh,Bangalore
Time taken: 0.689 seconds, Fetched: 7 row(s)
//select query with split logic
hive> select split(fullname,',') as fmlname from ohm.tmptbl_fullname;
OK
["Sankara","Narayanan","Ramaiah"]
["Anbu","Sudha","Dhanukkodi"]
["Naga","Rathinam","Dhanukkodi"]
["Bala","Subramanian","Ramaiah"]
["Seetha","Raman","Balasubramanian"]
["Subramanian","Nagappan","Pallathur"]
["Swaroop","Venkatesh","Bangalore"]
hive> create table ohm.tblfirstmiddlelastnames (id int, first string, middle string, last string);
OK
Time taken: 0.252 seconds
hive> show tables;
OK
employee
sample_1
tbl_avro
tbl_header_removed
tbl_parquet
tblfirstmiddlelastnames
tmptbl_fullname
Time taken: 0.022 seconds, Fetched: 7 row(s)
hive> select tblfull.id as id, tblfull.fmlname[0] as first, tblfull.fmlname[1] as middle, tblfull.fmlname[2] as last from (select id,split(fullname,',') as fmlname from ohm.tmptbl_fullname) as tblfull;
OK
100 Sankara Narayanan Ramaiah
101 Anbu Sudha Dhanukkodi
102 Naga Rathinam Dhanukkodi
103 Bala Subramanian Ramaiah
105 Seetha Raman Balasubramanian
106 Subramanian Nagappan Pallathur
107 Swaroop Venkatesh Bangalore
set mapred.job.queue.name=long_running;
insert into ohm.tblfirstmiddlelastnames select tblfull.id as id, tblfull.fmlname[0] as first, tblfull.fmlname[1] as middle, tblfull.fmlname[2] as last from (select id,split(fullname,',') as fmlname from ohm.tmptbl_fullname) as tblfull;
hive> select * from ohm.tblfirstmiddlelastnames;
OK
100 Sankara Narayanan Ramaiah
101 Anbu Sudha Dhanukkodi
102 Naga Rathinam Dhanukkodi
103 Bala Subramanian Ramaiah
105 Seetha Raman Balasubramanian
106 Subramanian Nagappan Pallathur
107 Swaroop Venkatesh Bangalore
Time taken: 0.085 seconds, Fetched: 7 row(s)
No comments:
Post a Comment