美文网首页
hive-复合数据类型

hive-复合数据类型

作者: 平头哥2 | 来源:发表于2019-06-13 13:31 被阅读0次

    数组

    需求:

    加入有一个表,其中的字段为array类型
    表数据:
    1,zhangsan,数学:语文:英语:生物
    2,lisi,数学:语文
    3,wangwu,化学:计算机:java编程
    

    建表:

    create table t_xuanxiu(uid string,name string,kc array<string>)
    row format delimited
    fields terminated by ','
    collection items terminated by ':';
    

    查看表信息:

    0: jdbc:hive2://slave01:10000> desc t_xuanxiu;
    +-----------+----------------+----------+
    | col_name  |   data_type    | comment  |
    +-----------+----------------+----------+
    | uid       | string         |          |
    | name      | string         |          |
    | kc        | array<string>  |          |
    +-----------+----------------+----------+
    3 rows selected (0.191 seconds)
    

    导入数据并查看:

    0: jdbc:hive2://slave01:10000> load data local inpath '/home/hadoop/test/xuanxiu.dat' into table t_xuanxiu;
    No rows affected (1.257 seconds)
    0: jdbc:hive2://slave01:10000> select * from t_xuanxiu;
    +----------------+-----------------+------------------------+
    | t_xuanxiu.uid  | t_xuanxiu.name  |      t_xuanxiu.kc      |
    +----------------+-----------------+------------------------+
    | 1              | zhangsan        | ["数学","语文","英语","生物"]  |
    | 2              | lisi            | ["数学","语文"]            |
    | 3              | wangwu          | ["化学","计算机","java编程"]  |
    +----------------+-----------------+------------------------+
    3 rows selected (0.302 seconds)
    -- 根据索引获取数组中的元素
    0: jdbc:hive2://slave01:10000> select kc[1] from t_xuanxiu;
    +------+
    | _c0  |
    +------+
    | 语文   |
    | 语文   |
    | 计算机  |
    +------+
    3 rows selected (0.306 seconds)
    -- 根据索引超出数组的范围,返回NULL
    0: jdbc:hive2://slave01:10000> select kc[2] from t_xuanxiu;
    +---------+
    |   _c0   |
    +---------+
    | 英语      |
    | NULL    |
    | java编程  |
    +---------+
    3 rows selected (0.302 seconds)
    
    -- 获取数组的大小
    0: jdbc:hive2://slave01:10000> select size(kc) from t_xuanxiu;
    +------+
    | _c0  |
    +------+
    | 4    |
    | 2    |
    | 3    |
    +------+
    3 rows selected (0.435 seconds)
    -- 判断数组中是否包含某个元素
    0: jdbc:hive2://slave01:10000> select array_contains(kc,'数学') from t_xuanxiu;
    +--------+
    |  _c0   |
    +--------+
    | true   |
    | true   |
    | false  |
    +--------+
    3 rows selected (0.309 seconds)
    

    Map类型

    需求:

    -- 有如下数据,希望存入到表结构中
    1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
    2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
    3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
    4,mayun,father:mayongzhen#mother:angelababy,26
    

    建表映射上述数据

    create table t_family(id int,name string,family_members map<string,string>,age int)
    row format delimited fields terminated by ',' -- 字段属性分隔符
    collection items terminated by '#' -- 集合元素的分隔符
    map keys terminated by ':'; -- map中key和value的分隔符
    
    -- 查看表结构
    0: jdbc:hive2://slave01:10000> desc t_family;
    +-----------------+---------------------+----------+
    |    col_name     |      data_type      | comment  |
    +-----------------+---------------------+----------+
    | id              | int                 |          |
    | name            | string              |          |
    | family_members  | map<string,string>  |          |
    | age             | int                 |          |
    +-----------------+---------------------+----------+
    4 rows selected (0.263 seconds)
    
    --导入数据并查询
    0: jdbc:hive2://slave01:10000> load data local inpath '/home/hadoop/test/map.dat' into table t_family;
    No rows affected (0.703 seconds)
    0: jdbc:hive2://slave01:10000> select * from t_family;
    +--------------+----------------+----------------------------------------------------+---------------+
    | t_family.id  | t_family.name  |              t_family.family_members               | t_family.age  |
    +--------------+----------------+----------------------------------------------------+---------------+
    | 1            | zhangsan       | {"father":"xiaoming","mother":"xiaohuang","brother":"xiaoxu"} | 28            |
    | 2            | lisi           | {"father":"mayun","mother":"huangyi","brother":"guanyu"} | 22            |
    | 3            | wangwu         | {"father":"wangjianlin","mother":"ruhua","sister":"jingtian"} | 29            |
    | 4            | mayun          | {"father":"mayongzhen","mother":"angelababy"}      | 26            |
    +--------------+----------------+----------------------------------------------------+---------------+
    4 rows selected (0.293 seconds)
    
    

    sql示例:

    -- 查出每个人的 爸爸、姐妹
    select id,name,family_members["father"] as father,family_members["sister"] as sister,age
    from t_family;
    
    -- 查出每个人有哪些亲属关系
    select id,name,map_keys(family_members) as relations,age
    from  t_family;
    
    -- 查出每个人的亲人名字
    select id,name,map_values(family_members) as relations,age
    from  t_family;
    
    -- 查出每个人的亲人数量
    select id,name,size(family_members) as relations,age
    from  t_family;
    
    -- 查出所有拥有兄弟的人及他的兄弟是谁
    -- 方案1:一句话写完
    select id,name,age,family_members['brother']
    from t_family  where array_contains(map_keys(family_members),'brother');
    -- 方案2:子查询
    select id,name,age,family_members['brother']
    from
    (select id,name,age,map_keys(family_members) as relations,family_members 
    from t_family) tmp 
    where array_contains(relations,'brother');
    

    结构体struct

    需求

    假如有以下数据:
    1,zhangsan,18:male:深圳
    2,lisi,28:female:北京
    3,wangwu,38:male:广州
    4,赵六,26:female:上海
    5,钱琪,35:male:杭州
    6,王八,48:female:南京
    

    建表映射上述数据

    create table t_user(id int,name string,info struct<age:int,sex:string,addr:string>)
    row format delimited fields terminated by ','
    collection items terminated by ':';
    

    查看结构:

    0: jdbc:hive2://slave01:10000> desc t_user;
    +-----------+-----------------------------------------+----------+
    | col_name  |                data_type                | comment  |
    +-----------+-----------------------------------------+----------+
    | id        | int                                     |          |
    | name      | string                                  |          |
    | info      | struct<age:int,sex:string,addr:string>  |          |
    +-----------+-----------------------------------------+----------+
    3 rows selected (0.219 seconds)
    

    导入数据并查看

    0: jdbc:hive2://slave01:10000> load data local inpath '/home/hadoop/test/struct.dat' into table t_user;
    0: jdbc:hive2://slave01:10000> select * from t_user;
    +------------+--------------+----------------------------------------+
    | t_user.id  | t_user.name  |              t_user.info               |
    +------------+--------------+----------------------------------------+
    | 1          | zhangsan     | {"age":18,"sex":"male","addr":"深圳"}    |
    | 2          | lisi         | {"age":28,"sex":"female","addr":"北京"}  |
    | 3          | wangwu       | {"age":38,"sex":"male","addr":"广州"}    |
    | 4          | 赵六           | {"age":26,"sex":"female","addr":"上海"}  |
    | 5          | 钱琪           | {"age":35,"sex":"male","addr":"杭州"}    |
    | 6          | 王八           | {"age":48,"sex":"female","addr":"南京"}  |
    +------------+--------------+----------------------------------------+
    6 rows selected (0.427 seconds)
    
    -- 查询每个人的id name和地址
    0: jdbc:hive2://slave01:10000> select id,name,info.addr
    . . . . . . . . . . . . . . .> from t_user;
    

    相关文章

      网友评论

          本文标题:hive-复合数据类型

          本文链接:https://www.haomeiwen.com/subject/rtzjfctx.html