Hive-SQL

作者: HouserLin | 来源:发表于2016-10-23 10:16 被阅读101次

    每天有多少用户首次流失

    select t4.aa,count(*)
    from
    (
    select  t3.role_id role_id,min(t3.date1) aa
    from
    (
    select t2.role_id role_id,t2.date1 date1,t2.date2 date2
    from
    (
    select t1.role_id role_id,t1.date date1,lag(t1.date,1) over (partition by role_id) date2
    from
    (
    select *,row_number() over (partition by role_id order by ta.date asc) as rn
    from 
    (select distinct role_id,split(time," ")[0] date
    from g37.logoutrole
    where date between 20160902 and 20161219) ta ) t1) t2
    where int(datediff(t2.date2,t2.date1)) > 7 ) t3
    group by t3.role_id
    ) t4
    group by t4.aa
    

    分位数

    from(
    select t1.date a,percentile(t1.a, array(0,0.25,0.5,0.75,1)) over (partition by t1.date) b
    from
      (select date, role_id,sum(cast(int(fn.json(source, "$.num")) as bigint)) a
       from g37.currency_xunzhang
       where date between 20161108 and 20161115
         and int(fn.json(source,"$.code_op"))= 1
       group by date, role_id) t1
    ) t2
    

    每天和每个月的去重登录id量
    SELECT
    month(time),
    date,
    COUNT(DISTINCT role_id) AS uv,
    GROUPING__ID
    FROM g17_loginrole_utf8_log
    where date >= 20161001
    GROUP BY month(time), date
    GROUPING SETS (month(time), date)

    正则提取

    select regexp_extract(source, "level=(\\d+)", 1),count(regexp_extract(source, "level=(\\d+)", 1))
    from xxx
    where date between 20160902 and 20160925
    and key rlike "xxx"
    group by regexp_extract(source, "level=(\\d+)", 1)
    

    转换时间格式

    select sum(t1.pj), sum(t1.hp)
    from
    (
    select fn.change_date_format(time, "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH"),regexp_extract(source, "game_server=(\\d+)", 1), regexp_extract(source, "game_uid=(\\d+)", 1), count(pingjia) as pj, sum(pingjia) as hp
    from xxx
    where date between 20160825 and 20160926
    group by 
    fn.change_date_format(time, "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH"),regexp_extract(source, "game_server=(\\d+)", 1), regexp_extract(source, "game_uid=(\\d+)",1)
      ) as t1
    where t1.pj - t1.hp >= 10
    

    连接字符串

    select role_id,split(max(concat(time,"|",get_json_object(source,"$.left_count"))) ,"|")[1]
    from xxx as aa
    where date between 20160925 and 20160927
    group by role_id
    

    最值

    select role_id,max(get_json_object(source,"$.left_count")) 
    from xxx 
    where get_json_object(source,"$.item_name") rlike "aaa" 
    and date between 20160925 and 20160926 
    group by role_id
    

    时间排序,取最后一条

    select t3.server, t3.role_id, t3.cou, t2.cou
    from
    (
    select server,
           role_id,
           max(int(get_json_object(source,"$.left_count"))) as cou
    from xxx
    where date between 20160915 and 20160926
      and get_json_object(source,"$.item_name") rlike "aaa"
    group by server,
             role_id
             ) as t3
    join
    
    (
    select t1.server, t1.role_id, int(get_json_object(t1.source,"$.left_count")) as cou from 
    (
    select * , ROW_NUMBER() over (partition by server, role_id ORDER BY time desc) as rn
    from xxx
    where date between 20160925 and 20160927
    and get_json_object(source,"$.item_name") rlike "aaa"
      ) as t1 where t1.rn = 1  ) as t2
    on t3.server = t2.server and t3.role_id = t2.role_id
    
    where t3.cou = t2.cou
    
    

    rlike正则(或)

    select player
    from xxx
    where player rlike "重叠|叠加|冲突|触发"
    and date between 20161001 and 20161008
    

    创建外部表

    create external table table_name (
      name string,
      urs string,
      ccid int,
      uid int,
      hn int,
      role_id int
      )
      row format delimited fields terminated by "\t"
      stored as textfile
      location '/path/file/'
    

    文件引用为表

    insert overwrite directory 'hdfs:///path/path/sjsj'
    select a.role_id,time,fn.json(source,"$.role_level") 
    from xxx as a where date >=20160401 and date <=20160430
    and fn.json(source,"$.reason") ="player-upgrade"
    and fn.json(source,"$.type") ="lv"
    and fn.json(source,"$.role_level")  in ('20','60')
    and a.role_id in
    (select server from xxx)
    ===
    and a.role_id in
    (select server from xxx)
    

    流失天数

    select t1.server,t1.account_id,t1.liushi,t1.cost,t2.role_id,t2.udid
    from
    (select server,account_id,(20161013-max(date)) liushi,max(int(fn.json(source, "$.total_cost"))) cost
    from loginrole
    where date between 20161006 and 20161013
     group by server,account_id
    ) t1
    join
    (
     select fn.json(source, "$.account_id") account_id,fn.json(source, "$.role_id") role_id,fn.json(source, "$.udid") udid,fn.json(source, "$.server") server
    from createrole
    where date between 20161006 and 20161012) t2
    on t1.server = t2.server
    where t1.account_id = t2.account_id
    

    实际登录天数

    select t4.server,t4.account_id,count(cc)
    from
     (
     select server,account_id,split(time, " ")[0] cc
     from loginrole
     where date between 20161006 and 20161015
     group by server,account_id,split(time, " ")[0]
      ) t4
      group by t4.server,t4.account_id
    

    key是数组

    select role_id,count(*)
    from xxx
    where fn.to_array(fn.json(source, "$.cost[0]"))[0] in ("490001", "490003")
    and date between 20160902 and 20160916
    group by role_id
    

    SUBSTRING ( expression, start, length )

    select distinct aa.account_id
    from xxx aa
    where date between 20161006 and 20161017
    and int(aa.server)<=20000
    and int(fn.json(source,"$.level"))>5
    and concat(aa.account_id,"|",concat(substring(aa.date,1,4),"-",substring(aa.date,5,2),"-",substring(aa.date,7,2)))
    in (
    select concat(account_id,"|",date_add(concat(substring(date,1,4),"-",substring(date,5,2),"-",substring(date,7,2)),3)) host_iid_seven
    from xxx
    where date between 20161006 and 20161017
    and int(server)<=20000
    )
    

    相关文章

      网友评论

          本文标题:Hive-SQL

          本文链接:https://www.haomeiwen.com/subject/xfjcuttx.html