美文网首页
2022-03-07 开窗函数

2022-03-07 开窗函数

作者: Denholm | 来源:发表于2022-03-08 08:14 被阅读0次
---------开窗函数(窗口函数)-分组排序开窗函数------------------------------
-- 序号函数:row_number,rank,dense_rank

-- 1.1准备数据test1.txt
/*
cookie1,2018-04-10,1
cookie1,2018-04-11,5
cookie1,2018-04-12,7
cookie1,2018-04-13,3
cookie1,2018-04-14,2
cookie1,2018-04-15,4
cookie1,2018-04-16,4
cookie2,2018-04-10,2
cookie2,2018-04-11,3
cookie2,2018-04-12,5
cookie2,2018-04-13,6
cookie2,2018-04-14,3
cookie2,2018-04-15,9
cookie2,2018-04-16,7
*/

create table test_ordinal_func
(
    cookie_id   string comment 'cookie id',
    create_time string comment '创建时间',
    pv          int comment '访问次数'
) row format delimited fields terminated by ',';

load data local inpath '/export/data/test_window.txt' into table test_ordinal_func;

select *
from test_ordinal_func;

-- 使用开窗函数来实现分组并组内排序
-- 需求1:按照cookie_id进行分组,并且在每一组中按照pv进行降序排序
-- partition by:分组
select *,
       -- 1 2 3 4 5
       row_number() over (partition by cookie_id order by pv desc ) row_number,
       -- 1 2 3 3 5
       rank() over (partition by cookie_id order by pv desc )       rank,
       -- 1 2 3 3 4
       dense_rank() over (partition by cookie_id order by pv desc ) dense_rank
from test_ordinal_func;

-- 需求:按照cookie_id进行分组,并且在每一组中按照pv进行降序排序,选出每一组排名前三的信息(TopN问题)

-- 以下写法报错,因为SQL的执行顺序:from where select,where的时候,别名还没有出现
/*
select *,
       dense_rank() over (partition by cookie_id order by pv desc ) as rk3 -- 1 2 3 3 4
from test_ordinal_func
where rk3 <= 3;
*/

-- 通过子查询解决
select *
from (
         select *,
                dense_rank() over (partition by cookie_id order by pv desc ) as rk3 -- 1 2 3 3 4
         from test_ordinal_func
     ) as dense_rank
where dense_rank.rk3 <= 3;


-- 需求:查询新冠疫情数据表中每一个州确诊人数最多的县TopN
select *
from (
         select *,
                -- partition by:后面可跟多个字段(同州同县分一组)
                -- order by:后面可跟多个字段(主要条件相同比较次要条件。按确诊病例降序,确证病例相同则按死亡病例降序)
                dense_rank() over (partition by state, county order by cases desc, deaths desc) as rk
         from covid2
     ) as dense_rank
where dense_rank.rk = 1;

-- 分组和排序的后边都可以指定多个字段
/*
select *,
       dense_rank() over (partition by cookie_id,pv order by cookie_id,create_time desc ) as rk3 -- 1 2 3 3 4
from test_ordinal_func;
*/


---------开窗函数(窗口函数)-指定区间进行统计开窗函数(聚合开窗函数)------------------------------

-- 3、使用聚合开窗函数进行统计
-- 累加区间:从第1行累加到当前行(默认)
select cookie_id,
       create_time,
       pv,
       sum(pv) over (partition by cookie_id order by create_time) as pv1
from test_ordinal_func;

-- 等价上边的写法
select cookie_id,
       create_time,
       pv,
       sum(pv) over (partition by cookie_id order by create_time
           rows between unbounded preceding and current row) as pv2
from test_ordinal_func;

-- 累加区间:从前3行累加到当前行
select cookie_id,
       create_time,
       pv,
       sum(pv) over (partition by cookie_id order by create_time
           rows between 3 preceding and current row)
from test_ordinal_func;

-- 累加区间:从前3行累加到下1行
select cookie_id,
       create_time,
       pv,
       sum(pv) over (partition by cookie_id order by create_time
           rows between 3 preceding and 1 following) as pv5
from test_ordinal_func;

-- 累加区间:从当前行加到组的最后
select cookie_id,
       create_time,
       pv,
       sum(pv) over (partition by cookie_id order by create_time
           rows between current row and unbounded following) as pv6
from test_ordinal_func;

-- 以上的sum可以替换为avg,max,min
select cookie_id,
       create_time,
       pv,
       max(pv) over (partition by cookie_id order by create_time) as pv1
from test_ordinal_func;

select cookie_id,
       create_time,
       pv,
       min(pv) over (partition by cookie_id order by create_time) as pv1
from test_ordinal_func;

select cookie_id,
       create_time,
       pv,
       avg(pv) over (partition by cookie_id order by create_time) as pv1
from test_ordinal_func;


-- 前后函数:lag lead
-- lag
-- 将上1行数据放在当前行
select cookie_id,
       create_time,
       pv,
       lag(create_time, 1) over (partition by cookie_id order by create_time)
from test_ordinal_func;

-- 将上2行数据放在当前行
select cookie_id,
       create_time,
       pv,
       lag(create_time, 2) over (partition by cookie_id order by create_time)
from test_ordinal_func;

-- lead
-- 将下1行数据放在当前行
select cookie_id,
       create_time,
       pv,
       lead(create_time, 1) over (partition by cookie_id order by create_time)
from test_ordinal_func;

-- 将下2数据放在当前行
select cookie_id,
       create_time,
       pv,
       lead(create_time, 2) over (partition by cookie_id order by create_time)
from test_ordinal_func;

相关文章

  • 2022-03-07 开窗函数

  • 开窗函数

    over在聚合函数中的使用:一般格式:聚合函数名(列) over(选项)over必须与聚合函数或排序函数一起使用...

  • 开窗函数

    印象笔记 1、开窗函数https://www.douban.com/group/topic/155112949/[...

  • 开窗函数

  • 开窗函数

    排序同一个年级中年纪的排序开窗操作:rank() over (partition by)有点想group by只不...

  • 开窗函数

    开窗函数定义 一组行被称为一个窗口,开窗函数是指可以用于「分区」或「分组」计算的函数。这些函数结合 OVER 子句...

  • Oracle分析函数用法详解

    OVER(PARTITION BY)开窗函数用法 开窗函数,Oracle从8.1.6开始提供开窗函数,开窗函数用于...

  • SQL总结-开窗函数

    一 . 开窗函数 分组函数 开窗函数 分组与开窗的区别分组函数每组只返回一行,而开窗函数每组返回多行。如下: 分组...

  • 开窗函数实践

    https://mp.weixin.qq.com/s/uJ6KY_yfH90kmhKAUd7HLw

  • Hive开窗函数

    1. 介绍 普通聚合函数聚合的行集是组,开窗函数聚合的行集是窗口。因此,普通聚合函数每组(Group by)只有一...

网友评论

      本文标题:2022-03-07 开窗函数

      本文链接:https://www.haomeiwen.com/subject/ciynrrtx.html