美文网首页
一个关于hive的官网示例

一个关于hive的官网示例

作者: tonyemail_st | 来源:发表于2017-09-30 15:33 被阅读0次

    创建表格

    CREATE TABLE u_data (
      userid INT,
      movieid INT,
      rating INT,
      unixtime STRING)
    ROW FORMAT DELIMITED
    FIELDS TERMINATED BY '\t'
    STORED AS TEXTFILE;
    

    下载数据

    # wget http://files.grouplens.org/datasets/movielens/ml-100k.zip
    

    And load u.data into the table that was just created:

    hive> LOAD DATA LOCAL INPATH './ml-100k/u.data'
    OVERWRITE INTO TABLE u_data;
    

    Create weekday_mapper.py:

    [root@master hive]# cat weekday_mapper.py 
    import sys
    import datetime
    
    for line in sys.stdin:
      line = line.strip()
      userid, movieid, rating, unixtime = line.split('\t')
      weekday = datetime.datetime.fromtimestamp(float(unixtime)).isoweekday()
      print '\t'.join([userid, movieid, rating, str(weekday)])
    

    创建hive脚本如下
    Use the mapper script:

    [root@master hive]# cat offical_new_sample.hive 
    CREATE TABLE u_data_new (
      userid INT,
      movieid INT,
      rating INT,
      weekday INT)
    ROW FORMAT DELIMITED
    FIELDS TERMINATED BY '\t';
    
    add FILE weekday_mapper.py;
    
    INSERT OVERWRITE TABLE u_data_new
    SELECT
      TRANSFORM (userid, movieid, rating, unixtime)
      USING 'python weekday_mapper.py'
      AS (userid, movieid, rating, weekday)
    FROM u_data;
    
    SELECT weekday, COUNT(*)
    FROM u_data_new
    GROUP BY weekday;
    

    执行脚本

    [root@master hive]# hive -f offical_new_sample.hive 
    
    

    相关文章

      网友评论

          本文标题:一个关于hive的官网示例

          本文链接:https://www.haomeiwen.com/subject/zozgextx.html