美文网首页R语言Cook Rggplot2绘图
使用Tabplot可视化大型数据集

使用Tabplot可视化大型数据集

作者: 热衷组培的二货潜 | 来源:发表于2019-03-01 16:54 被阅读4次

    我是被图mark的,先记录下来。 image.png

    require(ggplot2)
    # devtools::install_github("mtennekes/tabplot")
    library(tabplot)
    data(diamonds)
    ## add some NA's
    is.na(diamonds$price) <- diamonds$cut == "Ideal"
    is.na(diamonds$cut) <- (runif(nrow(diamonds)) > 0.8)
    tableplot(diamonds)
    
    # 下面的表格图由五列组成,其中数据按价格排序。添加的缺失值位于底部,并且(默认情况下)以鲜红色显示
    tableplot(diamonds, select = c(carat, price, cut, color, clarity), sortCol = price)
    
    
    tableplot(diamonds, select = c(carat, price, cut, color, clarity), sortCol = price, 
              from = 0, to = 5)
    
    
    tableplot(diamonds, subset = price < 5000 & cut == "Premium")
    
    tablePalettes()
    tableplot(diamonds, pals = list(cut="Set1(6)", color="Set5", clarity=rainbow(8)))
    
    
    diamonds$carat_class <- num2fac(diamonds$carat, n=20)
    diamonds$price_class <- num2fac(diamonds$price, n=100)
    
    tableplot(diamonds, select=c(carat, price, carat_class, price_class))
    
    
    
    # create large dataset
    large_diamonds <- diamonds[rep(seq.int(nrow(diamonds)), 10),]
    
    system.time({
      p <- tablePrepare(large_diamonds)
    })
    # 用户 系统 流逝 
    # 0.78 0.27 1.06 
    
    system.time({
      tableplot(p, plot=FALSE)
    })
    # 用户 系统 流逝 
    # 0.14 0.09 0.23 
    
    system.time({
      tableplot(p, sortCol=price, nBins=200, plot=FALSE)
    })
    # 用户 系统 流逝 
    # 0.11 0.12 0.24
    
    # 虽然在中等台式计算机上第一步​​需要几秒钟,但与直接方法相比,从中间结果(对象p)创建表图的处理时间非常短
    
    system.time({
      tableplot(p, sample=TRUE)
    })
    # 用户 系统 流逝 
    # 0.31 0.25 0.56
    
    carat.norm <- with(diamonds, carat / max(diamonds$carat))
    
    # draw samples
    exp.diamonds <- diamonds[sample(1:nrow(diamonds), size=10000, prob=carat.norm, replace=TRUE),]
    chp.diamonds <- diamonds[sample(1:nrow(diamonds), size=10000, prob=1-carat.norm, replace=TRUE),]
    
    tp1 <- tableplot(exp.diamonds, plot=FALSE)
    tp2 <- tableplot(chp.diamonds, plot=FALSE)
    
    plot(tp2 - tp1)
    
    tab <- tableplot(diamonds, plot = FALSE)
    summary(tab)
    
    tableplot(diamonds, select = 1:7, fontsize = 14, legend.lines = 8, title = "Shine on you crazy Diamond", fontsize.title = 18)
    
    tab2 <- tableChange(tab, select_string = c("carat", "price", "cut", "color", "clarity"), pals = list(cut="Set1(2)"))
    plot(tab2)
    
    tableSave(tab, filename = "diamonds.png", width = 5, height = 3, fontsize = 6, legend.lines = 6)
    

    希望以后的数据分析能用到这个包

    相关文章

      网友评论

        本文标题:使用Tabplot可视化大型数据集

        本文链接:https://www.haomeiwen.com/subject/pftjuqtx.html