Hive | 基本操作

作者: icebreakeros | 来源:发表于2019-07-04 07:56 被阅读0次

HIVE
Hive之同比环比的计算
Hive sql常见操作
Hive 基本操作
Hive 基本操作
Hive基本操作
Hive基本操作
Hive基本操作
hive 基本操作
Hive基本操作

基本操作

数据类型

tinyint/smallint/bigint/float/double/boolean/string
Array/Map/Struct
Date/Timestamp

表操作

create table t(pid int, pname string, married boolean, salary double);
create table t(sid int, sname string, grade array<float>);
create table t(sid int, sname string, grade map<string, float>);
create table t(sid int, sname string, grade array<map<string, float>>);
create table t(sid int, info struct<name:string,age:int,sex:string>);
select unix_timestamp();

create table t(id int, name string, age int);
create table t(id int, name string, age int) location '/data/hive/t';
create table t(id int, name string, age int) \
row format delimited fields terminated by ',';

create table t as select * from sample_data;
create table t row format delimited fields terminated by ',' \
as select * from sample_data;

alter table t add columns (english int);
drop table t;

creata table p_t(sid int, sname string) \
partitioned by (gender string) \
row format delimited fields terminated by ',';

create external table e_t(sid int, sname string, age int) \
row format delimited fields terminated by ',' location '/input';

create table b_t(sid int, sname string, age int) \
clustered by (sname) into 5 buckets;

select [all|distinct] select_expr from table_reference \
[where where_condition] \
[group by col_list]
[cluster by col_list | \
distribute by col_list | sort by col_list | order by col_list]
[limit number]

一个实例

create table userinfo(id int, name string) \
row format delimited fields terminated by '\t';

create table classinfo(teacher string, classname string) \
row format delimited fields terminated by '\t';

create table choice(userid int, classname string) \
row format delimited fields terminated by '\t';

load data local inpath '/opt/userinfo.data' overwrite into table userinfo;

# 分区
create table ptest(userid int) partitioned by (name string) \
row format delimited fields terminated by '\t';

# 桶
# 分桶比分区查询效率更高，并且便于进行数据采样
set hive.enforce.bucketing=true;

create table btest(id int, name string) \
clustered by(id) into 3 buckets \
row format delimited fields terminated by '\t';

insert overwrite table btest select * from userinfo;
dfs ls /data/hive/warehouse/btest
dfs -cat /data/hive/warehouse/btest/000000_0

视图

create view empinfo as \
select e.empno, e.ename, e.sal, d.dname from emp e, dept d \
where e.deptno = d.deptno;

select * from empinfo;

连接

内连接：select userinfo.*, choice.* from userinfo join choice on (userinfo.id = choice.userid);
左外连接：select userinfo.*, choice.* from userinfo left outer join choice on (userinfo.id = choice.userid);
右外连接：select userinfo.*, choice.* from userinfo right outer join choice on (userinfo.id = choice.userid);
全外连接：select userinfo.* from userinfo full outer join choice on (userinfo.id = choice.userid);
半连接：select userinfo.* from userinfo left semi join choice on (userinfo.id = choice.userid);

导入数据

load data [local] inpath 'filepath' \
[overwrite] into table tablename \
[partition (partcol1=val1)] # local表示本地

load data inpath '/data/hive/data/t.txt' into table t;
load data inpath '/data/hive/data/' overwrite into table t;

load data inpath '/data/hive/partition/data-01.txt' \
into table p_t partition (gender='M');

load data inpath '/data/hive/partition/data-02.txt' \
into table p_t partition (gender='F');

hive程序

vim pom.xml
<dependency>
    <groupId>org.apache.hive</groupId>
    <artifactId>hive-jdbc</artifactId>
    <version>2.3.3</version>
</dependency>

网友评论

工作生活

本文标题：Hive | 基本操作

本文链接：https://www.haomeiwen.com/subject/qnsvcctx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

Hive | 基本操作

基本操作

数据类型

表操作

一个实例

视图

连接

导入数据

hive程序

相关文章

HIVE

Hive之同比环比的计算

Hive sql常见操作

Hive 基本操作

Hive 基本操作

Hive基本操作

Hive基本操作

Hive基本操作

hive 基本操作

Hive基本操作

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读

工作生活