生成表语法:
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...)
[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[ROW FORMAT row_format]
[STORED AS file_format]
[LOCATION hdfs_path]
内部表(管理表)
1)普通创建表
create table if not exists student2(
id int, name string)
row format delimited
fields terminated by '\t'
stored as textfile
location '/user/hive/warehouse/student2';
2)根据查询结果创建表(查询的结果会添加到新创建的表中)
create table if not exists student3 as select id, name from student;
3)根据已经存在的表结构创建表
create table if not exists student4 like student;
4)查询表的类型
hive (default)> desc formatted student2;
Table Type: MANAGED_TABLE
外部表:
hive> create external table if not exists dept(deptid int, dname string, location int)
row format delimited fields terminated by '\t';
OK
hive> create external table if not exists default.emp(
> empno int,
> ename string,
> job string,
> mgr int,
> hiredate string,
> sal double,
> comm double,
> deptno int)
> row format delimited fields terminated by '\t';
OK
Time taken: 0.3 seconds
hive> load data local inpath '/data/hive/dept.txt' into table dept;
Loading data to table default.dept
OK
Time taken: 0.69 seconds
hive> load data local inpath '/data/hive/emp.txt' into table emp;
Loading data to table default.emp
OK
hive> alter table student set tblproperties('EXTERNAL=TRUE');
hive> alter table student set tblproperties('EXTERNAL'='FLASE');
hive> desc formatted student;
建立分区
hive> create table stu_partition(id int,name string)
> partitioned by(month string)
> row format delimited fields terminated by '\t';
hive> dfs -ls /user/hive/warehouse;
drwxr-xr-x - root supergroup 0 2019-06-13 14:23 /user/hive/warehouse/stu_partition
加载数据到分区表中
hive> load data local inpath '/data/hive/student.txt' into table stu_partition partition(month="20200623");
hive> load data local inpath '/data/hive/student.txt' into table stu_partition partition(month="20200624");
hive> select * from stu_partition where month="20200624";
hive> select * from stu_partition where month="20200624" or month="20200623";
hive > select * from stu_partition where month='20200624'
union
select * from stu_partition where month='20200623'
union
select * from stu_partition where month='20200622';
增加分区
hive> alter table stu_partition add partition(month="20200625");
hive> alter table stu_partition add partition(month="20200627") partition(month="20200628");
hive> alter table stu_partition drop partition(month="20200626");
hive> alter table stu_partition drop partition(month="20200626"),partition(month="20200627") ;
建立二级分区:
hive> create table stu2(id int,name string) partitioned by(month string,day string)
row format delimited fields terminated by '\t';
hive> load data local inpath '/data/hive/student.txt'
> into table stu2
> partition(month="202006",day="23");
hive> dfs -ls /user/hive/warehouse/stu2;
hive> select * from stu2;
10 lily 202006 23
20 tom 202006 23
Time taken: 0.298 seconds, Fetched: 2 row(s)
把数据直接上传到分区目录上,让分区表和数据产生关联的三种方式
1. 创建文件夹后load数据到分区
hive> dfs -mkdir -p /user/hive/warehouse/stu_partition/month=20200702
hive> load data local inpath '/data/hive/student.txt' into table stu_partition partition(month=20200702);
hive> select * from stu_partition where month=20200702;
2.上传数据后添加分区
hive> dfs -mkdir -p /user/hive/warehouse/stu_partition/month=20200701;
hive> dfs -put /data/hive/student.txt /user/hive/warehouse/stu_partition/month=20200701;
hive> alter table stu_partition add partition(month=20200701);
hive> select * from stu_partition where month=20200701;
3.上传数据后修复
hive> dfs -mkdir -p /user/hive/warehouse/stu_partition/month=20200630;
hive> dfs -put /data/hive/student /user/hive/warehouse/stu_partition/month=20200630;
hive> msck repair table stu_partition;
hive> select *from stu_partition where month=20200626;
修改表结构
hive> alter table student change column id stu_id int;
hive> alter table dept_partition replace columns(deptno string, dname
string, loc string);
``
网友评论