美文网首页
hive 定义表

hive 定义表

作者: 哈斯勒 | 来源:发表于2019-06-13 12:41 被阅读0次
生成表语法:
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name 

[(col_name data_type [COMMENT col_comment], ...)] 

[COMMENT table_comment] 

[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] 

[CLUSTERED BY (col_name, col_name, ...) 

[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] 

[ROW FORMAT row_format] 

[STORED AS file_format] 

[LOCATION hdfs_path]
内部表(管理表)
1)普通创建表
create table if not exists student2(
id int, name string)
row format delimited 
fields terminated by '\t'
stored as textfile
location '/user/hive/warehouse/student2';
2)根据查询结果创建表(查询的结果会添加到新创建的表中)
create table if not exists student3 as select id, name from student;
3)根据已经存在的表结构创建表
create table if not exists student4 like student;
4)查询表的类型
hive (default)> desc formatted student2;
Table Type:             MANAGED_TABLE  
外部表:
hive> create external table if not exists dept(deptid int, dname string, location int)
 row format delimited fields terminated by '\t';
OK
hive> create external table if not exists default.emp(
    > empno int,
    > ename string,
    > job string,
    > mgr int,
    > hiredate string, 
    > sal double, 
    > comm double,
    > deptno int)
    > row format delimited fields terminated by '\t';
OK
Time taken: 0.3 seconds
hive> load data local inpath '/data/hive/dept.txt' into table dept;
Loading data to table default.dept
OK
Time taken: 0.69 seconds
hive> load data local inpath '/data/hive/emp.txt' into table emp;
Loading data to table default.emp
OK

hive> alter table student set tblproperties('EXTERNAL=TRUE');
hive> alter table student set tblproperties('EXTERNAL'='FLASE');

hive> desc formatted student;
建立分区
hive> create table stu_partition(id int,name string)
       > partitioned by(month string)
       > row format delimited fields terminated by '\t';
hive> dfs -ls /user/hive/warehouse;
drwxr-xr-x   - root supergroup          0 2019-06-13 14:23 /user/hive/warehouse/stu_partition

加载数据到分区表中
hive> load data local inpath '/data/hive/student.txt' into table stu_partition partition(month="20200623");
hive> load data local inpath '/data/hive/student.txt' into table stu_partition partition(month="20200624");

hive> select * from stu_partition where month="20200624";
hive> select * from stu_partition where month="20200624" or month="20200623";
hive > select * from stu_partition where month='20200624'
              union
              select * from stu_partition where month='20200623'
              union
              select * from stu_partition where month='20200622';

增加分区
hive> alter table stu_partition add partition(month="20200625");
hive> alter table stu_partition add partition(month="20200627") partition(month="20200628");

hive> alter table stu_partition drop partition(month="20200626");
hive> alter table stu_partition drop partition(month="20200626"),partition(month="20200627") ;


建立二级分区:
hive> create table stu2(id int,name string) partitioned by(month string,day string)
 row format delimited fields terminated by '\t';

hive> load data local inpath '/data/hive/student.txt'
    > into table stu2
    > partition(month="202006",day="23");

hive> dfs -ls /user/hive/warehouse/stu2;
hive> select * from stu2;
10      lily    202006  23
20      tom     202006  23
Time taken: 0.298 seconds, Fetched: 2 row(s)

把数据直接上传到分区目录上,让分区表和数据产生关联的三种方式
1. 创建文件夹后load数据到分区
hive> dfs -mkdir -p /user/hive/warehouse/stu_partition/month=20200702
hive> load data local inpath '/data/hive/student.txt' into table stu_partition partition(month=20200702);
hive> select * from stu_partition where month=20200702;

2.上传数据后添加分区
hive> dfs -mkdir -p /user/hive/warehouse/stu_partition/month=20200701;
hive> dfs -put /data/hive/student.txt /user/hive/warehouse/stu_partition/month=20200701;
hive> alter table stu_partition add partition(month=20200701);
hive> select * from stu_partition where month=20200701;

3.上传数据后修复
hive> dfs -mkdir -p /user/hive/warehouse/stu_partition/month=20200630;
hive> dfs -put /data/hive/student  /user/hive/warehouse/stu_partition/month=20200630;
hive> msck repair table stu_partition;
hive> select *from stu_partition where month=20200626;

修改表结构
hive> alter table student change column id stu_id int;

hive> alter table dept_partition replace columns(deptno string, dname
string, loc string);

``

相关文章

网友评论

      本文标题:hive 定义表

      本文链接:https://www.haomeiwen.com/subject/xlpjfctx.html