HIVE-DDL

作者: ZhangShiWen | 来源:发表于2021-01-05 10:36 被阅读0次

    DATABASE

    CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name

      [COMMENT database_comment]

      [LOCATION hdfs_path]   //外部部表默认路径

      [MANAGEDLOCATION hdfs_path] //管理表表默认路劲  4.0.0后出现的配置

      [WITH DBPROPERTIES (property_name=property_value, ...)];

    DROP (DATABASE|SCHEMA) [IF EXISTS] datebase_name [RESTRICT|CASCADE]

    //默认是RESTRICT 当database不为空,需要删除时,加上CASCADE

    ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...); 

    ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role; 

    ALTER (DATABASE|SCHEMA) database_name SET LOCATION hdfs_path;

    ALTER (DATABASE|SCHEMA) database_name SET MANAGEDLOCATION hdfs_path;

    TABLE

    CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name  //默认管理表

      [(col_name data_type [column_constraint_specification] [COMMENT col_comment], ... [constraint_specification])]

      [COMMENT table_comment]

      [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]

    /*

    The CLUSTERED BY and SORTED BY creation commands do not affect how data is inserted into a table – only

    how it is read. This means that users must be careful to insert data correctly by specifying the number of

    reducers to be equal to the number of buckets, and using CLUSTER BY and SORT BY commands in their query.

    */

      [CLUSTERED BY (col_name, col_name, ...)  ////分桶表排序

      [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]////分桶表

      [SKEWED BY (col_name, col_name, ...)               

        ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)

        [STORED AS DIRECTORIES]

      [

      [ROW FORMAT row_format] //表中数据每行的格式,定义数据字段的分隔符,集合元素的分隔符等

      [STORED AS file_format]

        | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] 

      ]

      [LOCATION hdfs_path]

      [TBLPROPERTIES (property_name=property_value, ...)] 

      [AS select_statement]; 

    //构建类似已有表的结构

    CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name

      LIKE existing_table_or_view_name

      [LOCATION hdfs_path];

      例子:

      create external table IF NOT EXIT table_name(col_name data_type COMMENT....)

      partition by (col_name data_type)

      ROW FORMAT DELIMITED

      FIELDS TERMINATED BY '\001'

      COLLECTION ITEMS TERMINATED BY '\002'  //数据类型:collection

      MAP KEYS TERMINATED BY '\003'  //数据类型:MAP

      LOCATION hdfs_path

      STORED AS

    注意:发现其实桶的概念就是MapReduce的分区的概念,两者完全相同。物理上每个桶就是目录里的一个文件,一个作业产生的桶(输出文件)数量和reduce任务个数相同。

    而分区表的概念,则是新的概念。分区代表了数据的仓库,也就是文件夹目录。每个文件夹下面可以放不同的数据文件。通过文件夹可以查询里面存放的文件。但文件夹本身和数据的内容毫无关系。

    桶则是按照数据内容的某个值进行分桶,把一个大文件散列称为一个个小文件。

    DROP TABLE [IF EXISTS] table_name [PURGE];

    TRUNCATE [TABLE] table_name [PARTITION partition_spec]

    ALTER TABLE table_name RENAME TO new_table_name

    ALTER TABLE table_name SET TBLPROPERTIES table_properties;

    table_properties:

      : (property_name = property_value, property_name = property_value, ... )

    ALTER TABLE table_name [PARTITION partition_spec] SET SERDE serde_class_name [WITH SERDEPROPERTIES serde_properties];

    ALTER TABLE table_name [PARTITION partition_spec] SET SERDEPROPERTIES serde_properties;

    serde_properties:

      : (property_name = property_value, property_name = property_value, ... )

    ALTER TABLE table_name SET SERDEPROPERTIES ('field.delim' = ',');

    ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec [LOCATION 'location'][, PARTITION partition_spec [LOCATION 'location'], ...];

    partition_spec:

      : (partition_column = partition_col_value, partition_column = partition_col_value, ...)

      eg.

      ALTER TABLE page_view ADD PARTITION (dt='2008-08-08', country='us') location '/path/to/us/part080808'

                              PARTITION (dt='2008-08-09', country='us') location '/path/to/us/part080809';

    ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec[, PARTITION partition_spec, ...]

      [IGNORE PROTECTION] [PURGE];

    ALTER TABLE page_view DROP PARTITION (dt='2008-08-08', country='us');

    ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type

      [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];

      eg.

      CREATE TABLE test_change (a int, b int, c int);

    // First change column a's name to a1.

    ALTER TABLE test_change CHANGE a a1 INT;

    ALTER TABLE test_change CHANGE a1 a2 STRING AFTER b;

    CREATE VIEW [IF NOT EXISTS] [db_name.]view_name [(column_name [COMMENT column_comment], ...) ]

      [COMMENT view_comment]

      [TBLPROPERTIES (property_name = property_value, ...)]

      AS SELECT ...;

      CREATE VIEW onion_referrers(url COMMENT 'URL of Referring page')

      COMMENT 'Referrers to The Onion website'

      AS

      SELECT DISTINCT referrer_url

      FROM page_view

      WHERE page_url='http://www.theonion.com';

    DROP VIEW [IF EXISTS] [db_name.]view_name;

    ALTER VIEW [db_name.]view_name SET TBLPROPERTIES table_properties;

    table_properties:

      : (property_name = property_value, property_name = property_value, ...)

      CREATE INDEX index_name

      ON TABLE base_table_name (col_name, ...)

      AS index_type

      [WITH DEFERRED REBUILD]

      [IDXPROPERTIES (property_name=property_value, ...)]

      [IN TABLE index_table_name]

      [

        [ ROW FORMAT ...] STORED AS ...

        | STORED BY ...

      ]

      [LOCATION hdfs_path]

      [TBLPROPERTIES (...)]

      [COMMENT "index comment"];

      DROP INDEX [IF EXISTS] index_name ON table_name;

    ALTER INDEX index_name ON table_name [PARTITION partition_spec] REBUILD;

    相关文章

      网友评论

          本文标题:HIVE-DDL

          本文链接:https://www.haomeiwen.com/subject/icrioktx.html