美文网首页
四、Hive使用

四、Hive使用

作者: 人间桑 | 来源:发表于2020-07-01 16:19 被阅读0次

    一、每个用户截止到每月为止的最大交易金额和累计到该月的总交易金额

    over(partition by a order by b) 最后处理的窗口函数,partition by分组,order by排序,然后前面加聚合函数,如有排序,则聚合函数只处理b之前的项 row_number() over()顺序

    select

        customer_key,

        substr(create_date,0,7) umonth,

        count(distinct sales_order_key) ucount,

        sum(unit_price) as usum

    from ods_sales_orders

    group by customer_key,substr(create_date,0,7)

    limit 10;

    select

        customer_key,umonth,ucount,max(usum) over(partition by customer_key order by umonth) current_max,

        sum(usum) over(partition by customer_key order by umonth) current_sum

    from(select

    customer_key,

     substr(create_date,0,7) umonth,

    count(distinct sales_order_key) ucount,

        sum(unit_price) as usum

    from ods_sales_orders

    group by customer_key,substr(create_date,0,7)) a

    limit 10;

    二、计算用户的回购率和复购率

    复购率:

    select

        customer_key,

        substr(create_date,0,7) umonth,

        count(distinct sales_order_key) ucount

    from ods_sales_orders

    group by customer_key,substr(create_date,0,7)

    limit 10;

    select

        umonth,

        count(distinct customer_key) customer_num,

        sum(if(ucount>1,1,0)) Rep_num,

        sum(if(ucount>1,1,0))/count(distinct customer_key) rep_rate

    from(select

        customer_key,

        substr(create_date,0,7) umonth,

        count(distinct sales_order_key) ucount

    from ods_sales_orders

    group by customer_key,substr(create_date,0,7)) a

    group by umonth;

    回购率:

    select

        a_umonth,

        count(customer_key) as ct,

        count(b_umonth) as reb_num, 

        concat(round(count(b_umonth)/count(distinct customer_key)*100,2),'%') as reb_rate

    from(select

        a.customer_key,a.umonth a_umonth,b.umonth b_umonth

    from(select

        customer_key,

        substr(create_date,0,7) umonth

    from ods_sales_orders

    group by customer_key,substr(create_date,0,7)) a

    left join (select

        customer_key,

        substr(create_date,0,7) umonth

    from ods_sales_orders

    group by customer_key,substr(create_date,0,7)) b

    on a.customer_key=b.customer_key and

    ((substr(a.umonth,6,2)=substr(b.umonth,6,2)-1 and substr(b.umonth,1,4)=substr(a.umonth,1,4)) or (substr(a.umonth,6,2)='12' and substr(b.umonth,6,2)='01' and substr(b.umonth,1,4)=substr(a.umonth,1,4)+1))) c

    group by a_umonth;

    左连接连上月 hive不能用中文

    三、求用户号对应不同的产品

    用户好-产品1-产品2 按购买时间排序,只输出两个

    select

    concat(customer_key,'-',cpzl_zw,'-',if(isnull(cpzl_zw1),'null',cpzl_zw1))

    from(select

    *,

    row_number() over(partition by customer_key order by create_date asc) as rn

    from(select

        *

    from(select

        customer_key,cpzl_zw,create_date,

        lead(cpzl_zw) over(partition by customer_key order by create_date asc) cpzl_zw1

    from ods_sales_orders) a

    where cpzl_zw!=cpzl_zw1 or cpzl_zw1 is null) b) c

    where rn=1;

    四、统计各个省份所属城市下最受欢迎的Top 3产品和其销量(不能出现有null)

    select

        *

    from(select

        chinese_city,english_product_name, ucount,

        row_number() over(partition by chinese_city order by ucount desc) rn

    from(select

        chinese_city,english_product_name,count(*) ucount

    from ods_sales_orders a

    left join ods_customer b on a.customer_key=b.customer_key

    group by chinese_city,english_product_name) c) d

    where rn<=3 and chinese_city!='null';

    五、商品的销售数量top10,排名需考虑并列排名的情况

    select

        *

    from(select

        english_product_name,sale_amount,

        dense_rank() over(order by sale_amount desc) rank

    from (select

        english_product_name,count(*) sale_amount

    from ods_sales_orders

    group by english_product_name) a) b

    where rank<=10;

    六、计算累计和(统计2019年1-12月的累积销量,即1月为1月份的值,2月为1、2月份值的和,3月为1、2、3月份的和,12月为1-12月份值的和)

    select

        *,

        sum(sale_amount) over(order by umonth asc) sum_amount

    from(select

        substr(create_date,0,7) umonth,count(*) sale_amount

    from ods_sales_orders

    where substr(create_date,0,4)='2019'

    group by substr(create_date,0,7)) a;

    七、计算客户平均购买一次商品的间隔时间

    select

        customer_key,avg(date_diff) avg_date_diff

    from(select

        customer_key,create_date date0,

        lead(create_date,1,null) over(partition by customer_key order by create_date asc) date1,

        datediff(lead(create_date,1,null) over(partition by customer_key order by create_date asc),create_date) date_diff

    from ods_sales_orders) a

    where date_diff is not null

    group by customer_key;

    八、查询最近前20%时间的订单信息

    select

        *

    from(select 

        *,

        ntile(5) over(sort by create_date desc) sorted

    from ods_sales_orders) a

    where sorted=1;

    相关文章

      网友评论

          本文标题:四、Hive使用

          本文链接:https://www.haomeiwen.com/subject/jicfqktx.html