美文网首页
hive 定义表

hive 定义表

作者: 哈斯勒 | 来源:发表于2019-06-13 12:41 被阅读0次
    生成表语法:
    CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name 
    
    [(col_name data_type [COMMENT col_comment], ...)] 
    
    [COMMENT table_comment] 
    
    [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] 
    
    [CLUSTERED BY (col_name, col_name, ...) 
    
    [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] 
    
    [ROW FORMAT row_format] 
    
    [STORED AS file_format] 
    
    [LOCATION hdfs_path]
    
    内部表(管理表)
    1)普通创建表
    create table if not exists student2(
    id int, name string)
    row format delimited 
    fields terminated by '\t'
    stored as textfile
    location '/user/hive/warehouse/student2';
    2)根据查询结果创建表(查询的结果会添加到新创建的表中)
    create table if not exists student3 as select id, name from student;
    3)根据已经存在的表结构创建表
    create table if not exists student4 like student;
    4)查询表的类型
    hive (default)> desc formatted student2;
    Table Type:             MANAGED_TABLE  
    
    外部表:
    hive> create external table if not exists dept(deptid int, dname string, location int)
     row format delimited fields terminated by '\t';
    OK
    hive> create external table if not exists default.emp(
        > empno int,
        > ename string,
        > job string,
        > mgr int,
        > hiredate string, 
        > sal double, 
        > comm double,
        > deptno int)
        > row format delimited fields terminated by '\t';
    OK
    Time taken: 0.3 seconds
    hive> load data local inpath '/data/hive/dept.txt' into table dept;
    Loading data to table default.dept
    OK
    Time taken: 0.69 seconds
    hive> load data local inpath '/data/hive/emp.txt' into table emp;
    Loading data to table default.emp
    OK
    
    hive> alter table student set tblproperties('EXTERNAL=TRUE');
    hive> alter table student set tblproperties('EXTERNAL'='FLASE');
    
    hive> desc formatted student;
    
    建立分区
    hive> create table stu_partition(id int,name string)
           > partitioned by(month string)
           > row format delimited fields terminated by '\t';
    hive> dfs -ls /user/hive/warehouse;
    drwxr-xr-x   - root supergroup          0 2019-06-13 14:23 /user/hive/warehouse/stu_partition
    
    加载数据到分区表中
    hive> load data local inpath '/data/hive/student.txt' into table stu_partition partition(month="20200623");
    hive> load data local inpath '/data/hive/student.txt' into table stu_partition partition(month="20200624");
    
    hive> select * from stu_partition where month="20200624";
    hive> select * from stu_partition where month="20200624" or month="20200623";
    hive > select * from stu_partition where month='20200624'
                  union
                  select * from stu_partition where month='20200623'
                  union
                  select * from stu_partition where month='20200622';
    
    增加分区
    hive> alter table stu_partition add partition(month="20200625");
    hive> alter table stu_partition add partition(month="20200627") partition(month="20200628");
    
    hive> alter table stu_partition drop partition(month="20200626");
    hive> alter table stu_partition drop partition(month="20200626"),partition(month="20200627") ;
    
    
    建立二级分区:
    hive> create table stu2(id int,name string) partitioned by(month string,day string)
     row format delimited fields terminated by '\t';
    
    hive> load data local inpath '/data/hive/student.txt'
        > into table stu2
        > partition(month="202006",day="23");
    
    hive> dfs -ls /user/hive/warehouse/stu2;
    hive> select * from stu2;
    10      lily    202006  23
    20      tom     202006  23
    Time taken: 0.298 seconds, Fetched: 2 row(s)
    
    
    把数据直接上传到分区目录上,让分区表和数据产生关联的三种方式
    1. 创建文件夹后load数据到分区
    hive> dfs -mkdir -p /user/hive/warehouse/stu_partition/month=20200702
    hive> load data local inpath '/data/hive/student.txt' into table stu_partition partition(month=20200702);
    hive> select * from stu_partition where month=20200702;
    
    2.上传数据后添加分区
    hive> dfs -mkdir -p /user/hive/warehouse/stu_partition/month=20200701;
    hive> dfs -put /data/hive/student.txt /user/hive/warehouse/stu_partition/month=20200701;
    hive> alter table stu_partition add partition(month=20200701);
    hive> select * from stu_partition where month=20200701;
    
    3.上传数据后修复
    hive> dfs -mkdir -p /user/hive/warehouse/stu_partition/month=20200630;
    hive> dfs -put /data/hive/student  /user/hive/warehouse/stu_partition/month=20200630;
    hive> msck repair table stu_partition;
    hive> select *from stu_partition where month=20200626;
    
    
    修改表结构
    hive> alter table student change column id stu_id int;
    
    hive> alter table dept_partition replace columns(deptno string, dname
    string, loc string);
    
    ``
    

    相关文章

      网友评论

          本文标题:hive 定义表

          本文链接:https://www.haomeiwen.com/subject/xlpjfctx.html