name |
seq_num |
a |
1 |
a |
2 |
b |
4 |
b |
5 |
c |
1 |
c |
3 |
c |
4 |
create table if not exists tmp.rows_preced_test_20210526
(
name string comment '用户名'
,seq_num int comment '使用产品编码'
)partitioned by (pt string comment 'YYMMDD数据入库时间')
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS orcfile
select name, max(l_cnt) as m_cnt from
(
select name ,continue ,count(1) as l_cnt
from
(
select name
,seq_num
-- 伪列排序与原始值差值的绝对值
,abs(row_number() over(partition by name order by name,seq_num ) - seq_num ) as continue
from
( -- 对原始数据初步处理, 求次数无需group by , 重复数据算一次需groupby 去重
select name,seq_num
from normal_stg_tmp.rows_preced_test_20210526
group by name,seq_num
) a
) a group by name,continue
) b group by name
name |
seq_num |
伪列值 |
伪列与原始值差值 |
a |
1 |
1 |
0 |
a |
2 |
2 |
0 |
b |
4 |
1 |
3 |
b |
5 |
2 |
3 |
c |
1 |
1 |
0 |
c |
3 |
2 |
1 |
c |
4 |
3 |
1 |
对 伪列与原始值差值
计算每个name 下的count(1) 最大值可得连续最大值
可得
![](https://img.haomeiwen.com/i13553937/a234e999779f3da7.png)
连续最大值.png
网友评论