Sankara's Big Data Notes: Hive Table Types

Hive Tables :

Managed tables : data managed by Hive and stored in Warehouse directory
If you drop table within Hive, the file system will also delete the files

External : Data not fully managed by Hive and exists outside the warehouse directory.
If you drop table within hive, the file system wont delete the files.

External tables:
Share the data across with other technologies
Hadoop, pig, HBase all of these may access and edit those files.
Deleting an external table deletes only the metadata.

external table:
create external table salary
(
salary_id string,
employee_id string,
payment double,
date string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' location '/user/sample_data/salary'

LOAD DATA INPATH '/user/sample_data/salary/salary_201501.txt' INTO TABLE salary;

SELECT * FROM external_table;

DROP TABLE external_table;

internal table:
- data is temporary
create table salary
(
salary_id string,
employee_id string,
payment double,
date string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','

LOAD DATA INPATH '/user/sample_data/salary/salary_201501.txt' INTO TABLE salary

create database retail;
show databases;
use retail;

create table txnrecords (txtnno INT, txndate STRING, custno INT, amount DOUBLE, category STRING,
product STRING, city STRING, state STRING, spendby STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' stored as textfile;

DESCRIBE txnrecords;

LOAD DATA INPATH '/txns1.txt' OVERWRITE INTO TABLE txnrecords;

create table customer(custno STRING, firstname STRING, lastname STRING, age INT, profession STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
LOAD DATA LOCAL INPATH '/home/cloudera/desktop/blog/custs' into table customer;

select count(*) from txnrecords;

create external table example_customer (custno string, firstname string, lastname string, age int, profession string)
ROW FORMAT DELIMITED FIELDS BY ',' LOCATION '/user/external'

from customer c INSERT OVERWRITE TABLE example_customer SELECT c.custno, c.firstname, c.lastname, c.age, c.profession;

create table txnrecsByCat (txnno INT, txndate STRING, custno INT, amount DOUBLE, product STRING, city STRING, state STRING, spendby STRING)
partitioned by (category STRING) clustered by (state) INTO 10 buckets ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' stored as textfile;

set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
from txnrecords txn INSERT OVERWRITE TABLE record PARTITION (category) SELECT txn.txnno, txn.txndate, txn.custno, txn.amount, txn.product, txn.city, txn.state, txn.spendby, txn.category;

drop table customer;

load data local inpath 'aru.txt' into table employee1;

select count(*) from employee1;

select count(distinct f1) from cate;

create table result as select * from cate;

create table maildid(name string, email string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

Sankara's Big Data Notes

Monday, 10 December 2018

Hive Table Types

No comments:

Post a Comment

Flume - Simple Demo