美文网首页
sql面试题(二)

sql面试题(二)

作者: Yobhel | 来源:发表于2023-05-04 11:02 被阅读0次
    image.png
    id      dt              lowcarbon
    1001    2021-12-12      123
    1002    2021-12-12      45
    1001    2021-12-13      43
    1001    2021-12-13      45
    1001    2021-12-13      23
    1002    2021-12-14      45
    1001    2021-12-14      230
    1002    2021-12-15      45
    1001    2021-12-15      23
    找出连续3天及以上减少碳排放量在100以上的用户
    
    1)按照用户ID及时间字段分组,计算每个用户单日减少的碳排放量
    select
        id,
        dt,
        sum(lowcarbon) lowcarbon
    from test1
    group by id,dt
    having lowcarbon>100;t1
    1001    2021-12-12      123
    1001    2021-12-13      111
    1001    2021-12-14      230
    
    等差数列法:两个等差数列如果等差相同,则相同位置的数据相减等到的结果相同
    2)按照用户分组,同时按照时间排序,计算每条数据的Rank值
    select
        id,
        dt,
        lowcarbon,
        rank() over(partition by id order by dt) rk
    from t1;t2
    
    3)将每行数据中的日期减去Rank值
    select
        id,
        dt,
        lowcarbon,
        date_sub(dt,rk) flag
    from t2;t3
    
    4)按照用户及Flag分组,求每个组有多少条数据,并找出大于等于3条的数据
    select
        id,
        flag,
        count(*) ct
    from t3
    group by id,flag
    having ct>=3;
    
    5)最终HQL
    select
        id,
        flag,
        count(*) ct
    from 
    (select
        id,
        dt,
        lowcarbon,
        date_sub(dt,rk) flag
    from 
    (select
        id,
        dt,
        lowcarbon,
        rank() over(partition by id order by dt) rk
    from 
    (select
        id,
        dt,
        sum(lowcarbon) lowcarbon
    from test1
    group by id,dt
    having lowcarbon>100)t1)t2)t3
    group by id,flag
    having ct>=3;
    
    image.png
    id      ts
    1001    17523641234
    1001    17523641256
    1002    17523641278
    1001    17523641334
    1002    17523641434
    1001    17523641534
    1001    17523641544
    1002    17523641634
    1001    17523641638
    1001    17523641654
    时间间隔小于60秒,则分为同一个组
    1001    17523641234     1
    1001    17523641256     1
    1001    17523641334     2
    1001    17523641534     3
    1001    17523641544     3
    1001    17523641638     4
    1001    17523641654     4
    1002    17523641278     1
    1002    17523641434     2
    1002    17523641634     3
    
    1)将上一行时间数据下移
    lead:领导
    lag:延迟
    select
        id,
        ts,
        lag(ts,1,0) over(partition by id order by ts) lagts
    from
        test2;t1
    1001    17523641234 0
    1001    17523641256 17523641234
    1001    17523641334 17523641256
    1001    17523641534 17523641334
    1001    17523641544 17523641534
    1001    17523641638 17523641544
    1001    17523641654 17523641638
    1002    17523641278 0
    1002    17523641434 17523641278
    1002    17523641634 17523641434
    
    2)将当前行时间数据减去上一行时间数据
    select
        id,
        ts,
        ts-lagts tsdiff
    from
        t1;t2
    
    select
        id,
        ts,
        ts-lagts tsdiff
    from
        (select
        id,
        ts,
        lag(ts,1,0) over(partition by id order by ts) lagts
    from
        test2)t1;t2
    1001    17523641234 17523641234
    1001    17523641256 22
    1001    17523641334 78
    1001    17523641534 200
    1001    17523641544 10
    1001    17523641638 94
    1001    17523641654 16
    1002    17523641278 17523641278
    1002    17523641434 156
    1002    17523641634 200
    
    3)计算每个用户范围内从第一行到当前行tsdiff大于等于60的总个数(分组号)
    select
        id,
        ts,
        sum(if(tsdiff>=60,1,0)) over(partition by id order by ts) groupid
    from
        t2;
    
    4)最终HQL
    select
        id,
        ts,
        sum(if(tsdiff>=60,1,0)) over(partition by id order by ts) groupid
    from
        (select
        id,
        ts,
        ts-lagts tsdiff
    from
        (select
        id,
        ts,
        lag(ts,1,0) over(partition by id order by ts) lagts
    from
        test2)t1)t2;
    
    image.png
    id      dt
    1001    2021-12-12
    1001    2021-12-13
    1001    2021-12-14
    1001    2021-12-16
    1001    2021-12-19
    1001    2021-12-20
    
    1002    2021-12-12
    1002    2021-12-16
    1002    2021-12-17
    
    结果
    1001    5
    1002    2
    
    思路一:等差数列
    1001    2021-12-12  1
    1001    2021-12-13  2
    1001    2021-12-14  3
    1001    2021-12-16  4
    1001    2021-12-19  5
    1001    2021-12-20  6
    
    1001    2021-12-12  1   2021-12-11
    1001    2021-12-13  2   2021-12-11
    1001    2021-12-14  3   2021-12-11
    1001    2021-12-16  4   2021-12-12
    1001    2021-12-19  5   2021-12-14
    1001    2021-12-20  6   2021-12-14
    
    1001    2021-12-11  3
    1001    2021-12-12  1
    1001    2021-12-14  1
    
    1001    2021-12-11  3   1
    1001    2021-12-12  1   2
    1001    2021-12-14  1   3
    
    1001    2021-12-11  3   1   2021-12-10
    1001    2021-12-12  1   2   2021-12-10
    1001    2021-12-14  1   3   2021-12-11
    
    思路二:分组
    1001    2021-12-12
    1001    2021-12-13
    1001    2021-12-14
    1001    2021-12-16
    1001    2021-12-19
    1001    2021-12-20
    
    1)将上一行时间数据下移
    1001    2021-12-12  1970-01-01
    1001    2021-12-13  2021-12-12
    1001    2021-12-14  2021-12-13
    1001    2021-12-16  2021-12-14
    1001    2021-12-19  2021-12-16
    1001    2021-12-20  2021-12-19
    select
        id,
        dt,
        lag(dt,1,'1970-01-01') over(partition by id order by dt) lagdt
    from
        test3;t1
    
    2)将当前行时间减去上一行时间数据(datediff(dt1,dt2))
    1001    2021-12-12  564564
    1001    2021-12-13  1
    1001    2021-12-14  1
    1001    2021-12-16  2
    1001    2021-12-19  3
    1001    2021-12-20  1
    select
        id,
        dt,
        datediff(dt,lagdt) flag
    from
        t1;t2
    
    3)按照用户分组,同时按照时间排序,计算从第一行到当前行大于2的数据的总条数(sum(if(flag>2,1,0)))
    1001    2021-12-12  1
    1001    2021-12-13  1
    1001    2021-12-14  1
    1001    2021-12-16  1
    1001    2021-12-19  2
    1001    2021-12-20  2
    select
        id,
        dt,
        sum(if(flag>2,1,0)) over(partition by id order by dt) flag
    from
        t2;t3
    
    4)按照用户和flag分组,求最大时间减去最小时间并加上1
    select
        id,
        flag,
        datediff(max(dt),min(dt)) days
    from
        t3
    group by id,flag;t4
    
    5)取连续登录天数的最大值
    select
        id,
        max(days)+1
    from
        t4
    group by id;
    
    6)最终HQL
    select
        id,
        max(days)+1
    from
        (select
        id,
        flag,
        datediff(max(dt),min(dt)) days
    from
        (select
        id,
        dt,
        sum(if(flag>2,1,0)) over(partition by id order by dt) flag
    from
        (select
        id,
        dt,
        datediff(dt,lagdt) flag
    from
        (select
        id,
        dt,
        lag(dt,1,'1970-01-01') over(partition by id order by dt) lagdt
    from
        test3)t1)t2)t3
    group by id,flag)t4
    group by id;
    
    image.png
    id      stt         edt
    oppo    2021-06-05  2021-06-09
    oppo    2021-06-11  2021-06-21
    
    vivo    2021-06-05  2021-06-15
    vivo    2021-06-09  2021-06-21
    
    redmi   2021-06-05  2021-06-21
    redmi   2021-06-09  2021-06-15
    redmi   2021-06-17  2021-06-26
    
    huawei  2021-06-05  2021-06-26
    huawei  2021-06-09  2021-06-15
    huawei  2021-06-17  2021-06-21
    
    1)将当前行以前的数据中最大的edt放置当前行
    select
        id,
        stt,
        edt,
        max(edt) over(partition by id order by stt rows between UNBOUNDED PRECEDING and 1 PRECEDING) maxEdt
    from test4;t1
    redmi   2021-06-05  2021-06-21  null
    redmi   2021-06-09  2021-06-15  2021-06-21
    redmi   2021-06-17  2021-06-26  2021-06-21
    
    2)比较开始时间与移动下来的数据,如果开始时间大,则不需要操作,
    反之则需要将移动下来的数据加一替换当前行的开始时间
    如果是第一行数据,maxEDT为null,则不需要操作
    select
        id,
        if(maxEdt is null,stt,if(stt>maxEdt,stt,date_add(maxEdt,1))) stt,
        edt
    from t1;t2
    redmi   2021-06-05  2021-06-21
    redmi   2021-06-22  2021-06-15
    redmi   2021-06-22  2021-06-26
    
    3)将每行数据中的结束日期减去开始日期
    select
        id,
        datediff(edt,stt) days
    from
        t2;t3
    redmi   16
    redmi   -4
    redmi   4
    
    4)按照品牌分组,计算每条数据加一的总和
    select
        id,
        sum(if(days>=0,days+1,0)) days
    from
        t3
    group by id;
    redmi   22
    
    5)最终HQL
    select
        id,
        sum(if(days>=0,days+1,0)) days
    from
        (select
        id,
        datediff(edt,stt) days
    from
        (select
        id,
        if(maxEdt is null,stt,if(stt>maxEdt,stt,date_add(maxEdt,1))) stt,
        edt
    from 
        (select
        id,
        stt,
        edt,
        max(edt) over(partition by id order by stt rows between UNBOUNDED PRECEDING and 1 PRECEDING) maxEdt
    from test4)t1)t2)t3
    group by id;
    
    image.png
    image.png

    相关文章

      网友评论

          本文标题:sql面试题(二)

          本文链接:https://www.haomeiwen.com/subject/zxsxsdtx.html