How to parse fixed format text file using Regular Expression SerDe in Hive?
// we have a text file with fixed column width for each data
id,name,age columns with 5,10,2 are the length (width) of these columns.
$ hdfs dfs -cat /user/cloudera/person-fixed/p1.txt
00001Amar 28
00002Biswal 32
00003AnilKumble36
00004Bilal 53
00005KalaiSelvi32
// Here we use regular expression first 5 characters for id,
// next 10 characters for name and last 2 characters for age : "(.{5})(.{10})(.{2})")
// Specify the hdfs folder location only. do not specify the file name
create external table person_fixed
(
id int,
name string,
age int
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
with SERDEPROPERTIES("input.regex"="(.{5})(.{10})(.{2})")
location '/user/cloudera/person-fixed';
hive> select * from person_fixed;
OK
1 Amar 28
2 Biswal 32
3 AnilKumble 36
4 Bilal 53
5 KalaiSelvi 32
Time taken: 0.045 seconds, Fetched: 5 row(s)
No comments:
Post a Comment