美文网首页
R-数据分箱(make bins)-cut_by_element

R-数据分箱(make bins)-cut_by_element

作者: 倪桦 | 来源:发表于2022-12-21 19:19 被阅读0次

1. Prepare test data

Heart.dat <- function(npt = 1e4){
    npt = npt *2;x = runif(npt) * 3 + (-1.5);y = runif(npt) * 3 + (-1.5)
    lapply(seq(npt), function(i){
        if( (x[i]**2 + y[i]**2 - 1)**3 - x[i]**2 * y[i]**3 <= 0 ) return( data.frame(x =x[i],y=y[i]) )
    }) %>% data.table::rbindlist() %>% tidyr::drop_na() -> ._ ; return(._)
    cat("...Heart point sets generated")
}
data.use <- Heart.dat(2e4)
data.use %>% ggplot( aes(x,y)) +
    geom_point(color = "red",size = .01) +
    coord_equal() +
    theme_classic()

2. Pack points.method

pack_bs <- function(vec,nbs = 1){
    names(vec) = paste0("x.",seq(length(vec)))
    tmp <- rep(1:ceiling(length(vec)/nbs), each=nbs, length.out=length(vec))
    names(tmp) <- names(sort(vec))
    return(paste0("b.",unname(tmp[paste0("x.",seq(length(vec)))])))
}

3. Make Bins

animation:::saveGIF(movie.name= "bins-change.gif",{
    for (bins.size in seq(1,10000,50) ) {
        p <- data.use %>%
            mutate(bins.x =pack_bs(x,nbs = !!(bins.size)),
                   bins.y =pack_bs(y,nbs = !!(bins.size))) %>%
            group_by(bins.x,bins.y) %>%
            summarise(rx_bin = mean(x),ry_bin = mean(y),.groups = 'drop') %>% 
            ggplot(aes(rx_bin,ry_bin)) +
            geom_point(color = "red",size = 10) +
            coord_equal(xlim = c(-1.5,1.5),ylim = c(-1.5,1.5)) +
            theme_classic()+
            labs(title = paste0("BINS::",bins.size),x = "",y = "")
        print(p)
    }
}, interval = .1, ani.width = 1000, ani.height = 1000)
bins-change.gif

相关文章

网友评论

      本文标题:R-数据分箱(make bins)-cut_by_element

      本文链接:https://www.haomeiwen.com/subject/kpbmqdtx.html