随机采样
--获取10%的随机样本数据
SELECT t.*
FROM <t> t
WHERE RAND() < 0.1
分层采样
hash 版
select
bins, score, rank1, st_count
from
(
select
round(score, 3) as bins,
score,
count(*) over (partition by hash(level) ) as st_count,
rank() over (partition by hash(level) order by rand()) as rank1
FROM
t_table
) A
where rank1 <= 0.1 * st_count;
非hash 版
select
bins, score, rank1, st_count
from
(
select
round(score, 3) as bins,
score,
count(*) over (partition by level ) as st_count,
rank() over (partition by level order by rand()) as rank1
FROM
t_table
) A
where rank1 <= 0.1 * st_count;
网友评论