基本操作
数据类型
tinyint/smallint/bigint/float/double/boolean/string
Array/Map/Struct
Date/Timestamp
表操作
create table t(pid int, pname string, married boolean, salary double);
create table t(sid int, sname string, grade array<float>);
create table t(sid int, sname string, grade map<string, float>);
create table t(sid int, sname string, grade array<map<string, float>>);
create table t(sid int, info struct<name:string,age:int,sex:string>);
select unix_timestamp();
create table t(id int, name string, age int);
create table t(id int, name string, age int) location '/data/hive/t';
create table t(id int, name string, age int) \
row format delimited fields terminated by ',';
create table t as select * from sample_data;
create table t row format delimited fields terminated by ',' \
as select * from sample_data;
alter table t add columns (english int);
drop table t;
creata table p_t(sid int, sname string) \
partitioned by (gender string) \
row format delimited fields terminated by ',';
create external table e_t(sid int, sname string, age int) \
row format delimited fields terminated by ',' location '/input';
create table b_t(sid int, sname string, age int) \
clustered by (sname) into 5 buckets;
select [all|distinct] select_expr from table_reference \
[where where_condition] \
[group by col_list]
[cluster by col_list | \
distribute by col_list | sort by col_list | order by col_list]
[limit number]
一个实例
create table userinfo(id int, name string) \
row format delimited fields terminated by '\t';
create table classinfo(teacher string, classname string) \
row format delimited fields terminated by '\t';
create table choice(userid int, classname string) \
row format delimited fields terminated by '\t';
load data local inpath '/opt/userinfo.data' overwrite into table userinfo;
# 分区
create table ptest(userid int) partitioned by (name string) \
row format delimited fields terminated by '\t';
# 桶
# 分桶比分区查询效率更高,并且便于进行数据采样
set hive.enforce.bucketing=true;
create table btest(id int, name string) \
clustered by(id) into 3 buckets \
row format delimited fields terminated by '\t';
insert overwrite table btest select * from userinfo;
dfs ls /data/hive/warehouse/btest
dfs -cat /data/hive/warehouse/btest/000000_0
视图
create view empinfo as \
select e.empno, e.ename, e.sal, d.dname from emp e, dept d \
where e.deptno = d.deptno;
select * from empinfo;
连接
内连接:select userinfo.*, choice.* from userinfo join choice on (userinfo.id = choice.userid);
左外连接:select userinfo.*, choice.* from userinfo left outer join choice on (userinfo.id = choice.userid);
右外连接:select userinfo.*, choice.* from userinfo right outer join choice on (userinfo.id = choice.userid);
全外连接:select userinfo.* from userinfo full outer join choice on (userinfo.id = choice.userid);
半连接:select userinfo.* from userinfo left semi join choice on (userinfo.id = choice.userid);
导入数据
load data [local] inpath 'filepath' \
[overwrite] into table tablename \
[partition (partcol1=val1)] # local表示本地
load data inpath '/data/hive/data/t.txt' into table t;
load data inpath '/data/hive/data/' overwrite into table t;
load data inpath '/data/hive/partition/data-01.txt' \
into table p_t partition (gender='M');
load data inpath '/data/hive/partition/data-02.txt' \
into table p_t partition (gender='F');
hive程序
vim pom.xml
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>2.3.3</version>
</dependency>
网友评论