drop table default.salary;
create table default.salary (
userid string,
dateid string,
salary int
);
/*
insert into default.salary values('001','2019-03-01',70);
insert into default.salary values('001','2019-03-02',70);
insert into default.salary values('001','2019-03-03',70);
insert into default.salary values('002','2019-03-01',80);
insert into default.salary values('002','2019-03-02',80);
insert into default.salary values('002','2019-03-03',80);
insert into default.salary values('001','2019-04-01',70);
insert into default.salary values('001','2019-04-02',70);
insert into default.salary values('001','2019-05-03',70);
*/
select * from default.salary;
--写法一
SELECT userid
,first1
,last1
,datediff(last1, first1)+1 days
,salary * (datediff(last1, first1)+1) as salary
FROM
( SELECT userid
,num
,salary
,min(dateid) as first1
,max(dateid) as last1
FROM
( SELECT a.userid
,a.dateid
,date_sub(dateid, rn - 1) num
,salary
FROM
( SELECT userid
,dateid
,salary
,row_number() over(PARTITION BY userid ORDER BY dateid) rn
FROM default.salary
GROUP BY userid,dateid,salary
) a
) b
group by userid,num,salary
) a
--写法二
SELECT a.userid
,b.starttime
,b.endtime
,datediff(b.endtime,b.starttime) + 1 days
,sum(case when a.dateid>=b.starttime and a.dateid<=b.endtime then a.salary else 0 end) as total_salary
FROM default.salary a
LEFT JOIN
( SELECT userid
,days
,qujian[0] starttime
,case when days=1 then qujian[0]
when days=2 then qujian[1]
when days=3 then qujian[2]
when days=4 then qujian[3] end as endtime --此处如有其它时间差,需要进行枚举
FROM --取出连续时间范围
( SELECT userid
,datediff(max(end_date),min(end_date)) + 1 days
,COLLECT_set(end_date) qujian
FROM --求出连续时间之差
( SELECT userid
,dateid end_date
,salary
,date_sub(dateid,rn-1) as start_date
FROM --日期与排名之间的差值
( SELECT userid
,dateid
,salary
,row_number () over (partition by userid order by dateid) rn
FROM default.salary --按name分组后对date进行排序
) a
) a
GROUP BY userid,start_date
) a
) b on a.userid=b.userid
GROUP BY a.userid,b.starttime,b.endtime
网友评论