美文网首页
【SQL】抽样

【SQL】抽样

作者: 7ccc099f4608 | 来源:发表于2020-11-24 17:02 被阅读0次

    随机采样

    
    --获取10%的随机样本数据
    SELECT t.*
    FROM <t> t
    WHERE RAND() < 0.1
    
    

    分层采样

    hash 版

    select 
        bins, score, rank1, st_count
    from 
    (
        select 
            round(score, 3) as bins, 
            score,
            count(*) over (partition by hash(level)  ) as st_count, 
            rank() over (partition by hash(level) order by rand()) as rank1
        FROM 
            t_table
    ) A 
    where rank1 <= 0.1 * st_count; 
    

    非hash 版

    select 
        bins, score, rank1, st_count
    from 
    (
        select 
            round(score, 3) as bins, 
            score,
            count(*) over (partition by level  ) as st_count, 
            rank() over (partition by level order by rand()) as rank1
        FROM 
            t_table
    ) A 
    where rank1 <= 0.1 * st_count; 
    

    相关文章

      网友评论

          本文标题:【SQL】抽样

          本文链接:https://www.haomeiwen.com/subject/tooiiktx.html