美文网首页
hive的练习

hive的练习

作者: 薛落花随泪绽放 | 来源:发表于2017-10-20 21:32 被阅读10次
    #cd
    #vi relations
    
    Zhangsan Wangwu 01:01:01
    Zhangsan Zhaoliu 00:11:21
    Zhangsan Yuqi 00:19:01
    Zhangsan Jingba 00:21:01
    Zhangsan Wuxi 01:31:17
    Wangwu Zhaoliu 00:51:01
    Wangwu Zhaoliu 01:11:19
    Wangwu Yuqi 00:00:21
    Wangwu Yuqi 00:37:21
    Yuqi Zhaoliu 00:23:01
    Yuqi Wuxi 00:18:00
    Jingba Wangwu 00:01:01
    Jingba Wangwu 00:00:06
    Jingba Wangwu 00:02:03
    Jingba Wangwu 00:02:54
    Wangwu Yuqi 01:00:19
    
    #hadoop fs -put relations /
    #vi test_mapper.py
    
    import sys
    for line in sys.stdin:
            line = line.strip()
            fromstr,tostr,time =line.split('\t')
            hours,minutes,secondes = time.split(':')
            newtime = int(hours)*60*60+int(minutes)*60+int(secondes)
            if cmp(fromstr,tostr) == -1:
              #print(cmp(fromstr,tostr))
              fromstr,tostr = tostr,fromstr
            print ' ' .join([fromstr+tostr,str(newtime)])
    
    
    #vi test.hive
    
    CREATE TABLE relations_new (
    fromtostr STRING,
       duration INT)
    ROW FORMAT DELIMITED
    FIELDS TERMINATED BY ' ';
    
    add FILE test_mapper.py;
    
    INSERT OVERWRITE TABLE relations_new
    SELECT
      TRANSFORM (fromstr,tostr,duration)
      USING 'python test_mapper.py'
      AS (fromtostr,duration)
    FROM relations;
    
    #hive
    create table relations (
    fromstr string,
    tostr string,
    duration string)
    row format delimited fields terminated by ' ';
    
    load data inpath '/relations' overwrite into table relations;
    exit;
    #hive -f test.hive
    #hive
    
    select 
        fromtostr,
        duration,
        rank() over(order by duration desc) rank,
    dense_rank() over(order by duration) dense_rank,
    row_number() over(order by duration) row_number
    from relations_new;
    
    OK
    WangwuJingba    6   1   1   1
    YuqiWangwu  21  2   2   2
    WangwuJingba    61  3   3   3
    WangwuJingba    123 4   4   4
    WangwuJingba    174 5   5   5
    ZhaoliuZhangsan 681 6   6   6
    YuqiWuxi    1080    7   7   7
    YuqiWangwu  1109    8   8   8
    ZhangsanYuqi    1141    9   9   9
    ZhangsanJingba  1261    10  10  10
    ZhaoliuYuqi 1381    11  11  11
    YuqiWangwu  2241    12  12  12
    ZhaoliuWangwu   3061    13  13  13
    YuqiWangwu  3619    14  14  14
    ZhangsanWangwu  3661    15  15  15
    ZhaoliuWangwu   4279    16  16  16
    ZhangsanWangwu  4713    17  17  17
    ZhangsanWuxi    5477    18  18  18
    Time taken: 27.345 seconds, Fetched: 18 row(s)
    
    
    
    

    相关文章

      网友评论

          本文标题:hive的练习

          本文链接:https://www.haomeiwen.com/subject/cscbuxtx.html