1.找出所有科目成绩都大于某一学科平均成绩的学生
表结构:uid,subject_id,score
select
uid
from(
select
uid,
if(score>avg_score,0,1) as flag
from(
select
uid,
score,
avg(score) over(partition by subject_id) as avg_score
from
score
) t1
) tt1
group by uid having sum(flag)=0
2.统计出每个用户按月的累积访问次数
表结构:uid,visit_date,visit_count
select
uid,
mm,
sum(mm_count) over(partition by uid order by mm) as sum_count
from(
select
uid,
mm,
sum(visit_count) as mm_count
from(
select
uid,
visit_count,
date_format(regexp_replace(visit_date,'/','-'),'yyyy-MM') as mm
from
action
) t1
group by uid,mm
) tt1
group by uid,mm
3.连续问题
找出连续3天及以上减少碳排放量在100以上的用户
表结构:id,dt,lowcarbon
select
id,
flag,
count(1) as num
from(
select
id,
dt,
date_sub(dt,rk) as flag
from(
select
id,
dt,
row_number() over(partition by id order by dt) as rk
from(
select
id,
dt,
sum(lowcarbon) as lowcarbon
from
test
having lowcarbon>100
) t1
) tt1
) ttt1
group by id,flag
having num>=3
4.分组问题(lag or lead)
某个用户连续的访问记录如果时间间隔小于60秒,则分为同一个组
表结构:id,ts
select
id,
ts,
sum(if(diff<60,1,0)) over(partition by id order by ts) as groupid
from(
select
id,
ts,
last,
ts-last as diff
from(
select
id,
ts,
lag(ts,1,0) over(partition by id order by ts) as last
from
test
) t1
) tt1
5.间隔连续问题
计算每个用户最大的连续登录天数,可以间隔一天。解释:如果一个用户在1,3,5,6登录游戏,则视为连续6天登录
表结构: id,dt
select
id,
max(diff) + 1 as days
from(
selet
id,
flag,
datediff(max(dt),min(dt)) as diff
from(
select
id,
dt,
sum(if(flag<=2,0,1)) over(partition by id order by dt) as flag
from(
select
id,
dt,
datediff(dt,lag_dt) as flag
from(
select
id,
dt,
lag(dt,1,'1970-01-01') over(partition by id order by dt) as lag_dt
from
test
) t1
) tt1
) ttt1
group by id,flag
) tttt1
group by id
6.打折日期交叉问题
计算每个品牌总的打折销售天数,注意其中的交叉日期,
比如vivo品牌,第一次活动时间为2021-06-05到2021-06-15,
第二次活动时间为2021-06-09到2021-06-21其中9号到15号为重复天数,
只统计一次,即vivo总打折天数为2021-06-05到2021-06-21共计17天
表结构: id,start_dt,end_dt
select
id,
sum(id(days>0,days+1,0)) as days
from(
select
id,
datediff(end_dt,start_dt) as days
from(
select
id,
if(maxEdt is null,start_dt,if(start_dt>maxEdt,start_dt,date_add(maxEdt,1))) as start_dt,
end_dt
from(
select
id,
start_dt,
end_dt,
max(end_dt) over(partition by id order by start_dt rows between UNBOUNDED PRECEDING and 1 PRECEDING) as maxEdt
from
test
) t1
) t2
) t3
group by id
7.同时在线人数
如下为某直播平台主播开播及关播时间,根据该数据计算出平台最高峰同时在线的主播人数
表结构:id,stt,edt
select
max(sum_p) as max_uv
from(
select
id,
dt,
sum(p) over(order by dt) as sum_p
from(
select id,stt as dt,1 as p from test
union all
select id,edt as dt,-1 as p from test
) t1
) tt1
8.求每日新增用户的1,3,5,7日留存
select
first_dt,
count(distinct user_id) as '新增用户数',
round(count(distinct if(datediff(dt,first_dt)=1,user_id,null))/count(distinct user_id),2) as '次日留存率',
round(count(distinct if(datediff(dt,first_dt)=3,user_id,null))/count(distinct user_id),2) as '3日留存率',
round(count(distinct if(datediff(dt,first_dt)=5,user_id,null))/count(distinct user_id),2) as '5日留存率',
round(count(distinct if(datediff(dt,first_dt)=7,user_id,null))/count(distinct user_id),2) as '7日留存率'
from(
select
user_id,
dt,
first_dt
from
test t1
left join(
select
user_id,
min(dt) as first_dt
from
test
group by user_id
) t2
on t1.user_id = t2.user_id
) t3
group by first_dt
9.不用窗口函数求出每个部门消费排名前二的游戏
表结构: department game cash
-- 测试数据:
a game1 100
a game2 100
a game3 200
a game4 400
b game1 300
b game2 300
b game3 200
b game4 300
-- 缺陷:如果cash相同,排名会相同
select
department,
game,
count(distinct b_cash) as rk
from(
select
a.department,
a.game,
a.cash as a_cash,
b.cash as b_cash
from
game_cash a
inner join
game_cash b
on a.department=b.department and a.cash<=b.cash
order by a.department,a.game
) t
group by department,game
having rk<=2
10.求每个部门除去第一名和最后一名的平均薪资
表结构: dep uid cash
select
dep,
avg(cash) as avg_fee
from(
select
dep,
uid,
cash,
row_number() over(partition by dep order by cash) as rk1,
row_number() over(partition by dep order by cash desc) as rk2
from
test
) t
where rk1>1 and rk2>1
group by dep
网友评论