DatistEQ之ggplot2两个变量绘图

作者: 了无_数据科学 | 来源:发表于2021-03-12 16:20 被阅读0次

    1、两个变量:x,y皆连续

    使用数据集mtcars

    mtcars数据集包含从1974年《美国汽车趋势》杂志中提取的数据,该数据描述了32辆汽车(1973–74年型号)的油耗以及汽车设计和性能的10个其他属性。

    mtcars数据集
    #先创建一个ggplot图层
    library(ggplot2)
    
    b <- ggplot(data = mtcars, aes(x=wt, y=mpg))
    
    #DatistEQ回收数据,后续代码省略此行
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm") 
    

    可能添加的图层有:

    • geom_point():散点图
    • geom_smooth():平滑线
    • geom_quantile():分位线
    • geom_rug():边际地毯线
    • geom_jitter():避免重叠
    • geom_text():添加文本注释
    x,y皆连续

    1.1 散点图

    b+geom_point()
    
    散点图

    将变量cyl映射给点的颜色和形状

    b + geom_point(aes(color = factor(cyl), shape = factor(cyl)))
    
    散点图

    自定义颜色

    b+geom_point(aes(color=factor(cyl), shape=factor(cyl)))+
      scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+theme_classic()
    
    自定义颜色的散点图

    1.2 平滑线

    可以添加回归曲线

    b+geom_smooth()
    
    平滑线

    散点图+回归线

    b+geom_point()+
      geom_smooth(method = "lm", se=FALSE)#去掉置信区间
    
    散点图+回归线

    使用loess方法

    b+geom_point()+
      geom_smooth(method = "loess")
    
    loess方法

    将变量映射给颜色和形状

    b+geom_point(aes(color=factor(cyl), shape=factor(cyl)))+
      geom_smooth(aes(color=factor(cyl), shape=factor(cyl)), method = "lm", se=FALSE, fullrange=TRUE)
    
    image.png

    1.3 分位线

    # Package `quantreg` required for `stat_quantile`.
    if (!require("quantreg")) install.packages("quantreg")
    ggplot(data = mpg, aes(cty, hwy))+
      geom_point()+geom_quantile()+
      theme_minimal()
    
    image.png

    1.4 边际地毯线

    使用数据集faithful

    ggplot(data = faithful, aes(x=eruptions, y=waiting))+
      geom_point()+geom_rug()
    
    image.png

    避免重叠
    实际上geom_jitter()是geom_point(position="jitter")的简称,下面使用数据集mpg

    p <- ggplot(data = mpg, aes(displ, hwy))
    p+geom_point()
    
    image.png

    1.5 增加抖动防止重叠

    p+geom_jitter(width = 0.5, height = 0.5)
    
    image.png

    其中两个参数:
    width:x轴方向的抖动幅度
    height:y轴方向的抖动幅度

    1.6 文本注释

    参数label用来指定注释标签 (ggrepel可以避免标签重叠)

    b+geom_text(aes(label=rownames(mtcars)))
    
    image.png

    完整代码如下:

    header1("两个变量:x,y皆连续")
    header2("散点图")
    library(ggplot2)
    b <- ggplot(data = mtcars, aes(x=wt, y=mpg))
    b+geom_point()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #将变量cyl映射给点的颜色和形状
    b + geom_point(aes(color = factor(cyl), shape = factor(cyl)))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #自定义颜色
    b+geom_point(aes(color=factor(cyl), shape=factor(cyl)))+
      scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+theme_classic()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
      
    header2("平滑线")
    b+geom_smooth()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #散点图+回归线
    b+geom_point()+
      geom_smooth(method = "lm", se=FALSE)#去掉置信区间
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #使用loess方法
    b+geom_point()+
      geom_smooth(method = "loess")
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")  
      
    #将变量映射给颜色和形状
    b+geom_point(aes(color=factor(cyl), shape=factor(cyl)))+
      geom_smooth(aes(color=factor(cyl), shape=factor(cyl)), method = "lm", se=FALSE, fullrange=TRUE)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")  
    
    #分位线
    # Package `quantreg` required for `stat_quantile`.
    if (!require("quantreg")) install.packages("quantreg")
    ggplot(data = mpg, aes(cty, hwy))+ geom_point() + geom_quantile() + theme_minimal()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")  
    
    ggplot(mtcars, aes(hp, disp))  + 
      geom_point() + 
      geom_quantile(quantiles = 0.5, aes(group = factor(gear), colour = factor(gear)),
                    xseq = min(mtcars$hp):max(mtcars$hp))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")  
    
    #边际地毯线
    #使用数据集faithful
    ggplot(data = faithful, aes(x=eruptions, y=waiting))+
      geom_point()+geom_rug()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")  
    
    #避免重叠
    #实际上geom_jitter()是geom_point(position="jitter")的简称,下面使用数据集mpg
    p <- ggplot(data = mpg, aes(displ, hwy))
    p+geom_point()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")  
    
    #增加抖动防止重叠
    p+geom_jitter(width = 0.5, height = 0.5)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")  
    
    #文本注释
    #参数label用来指定注释标签 (ggrepel可以避免标签重叠)
    b <- ggplot(data = mtcars, aes(x=wt, y=mpg))
    b+geom_text(aes(label=rownames(mtcars)))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")  
    

    2、两个变量:连续二元分布

    使用数据集diamonds,该数据集收集了约54000颗钻石的价格和质量的信息。每条记录由十个变量构成,其中有三个是名义变量,分别描述钻石的切工,颜色和净度;
    注:数据集diamonds,源于ggplot2包(因为钻石的价格定价取决于重量,颜色,刀工等影响,价格该如何制定合理,为公司抢占市场制定价格提供依据。)

    image.png

    carat:克拉重量
    cut:切工
    color:颜色
    clarity:净度
    depth:深度
    table:钻石宽度
    以及X,Y,Z

    image.png

    创建ggplot图层,后面再逐步添加图层

    c <- ggplot(data=diamonds, aes(carat, price))
    

    可添加的图层有:

    geom_bin2d(): 二维封箱热图
    geom_hex(): 六边形封箱图
    geom_density_2d(): 二维等高线密度图

    image.png

    2.1 二维封箱热图

    geom_bin2d()将点的数量用矩形封装起来,通过颜色深浅来反映点密度

    c+geom_bin2d()
    
    二维封箱热图

    设置bin的数量

    c+geom_bin2d(bins=150)
    
    二维封箱热图

    2.2 六边形封箱图

    geom_hex()依赖于另一个R包hexbin,所以没安装的先安装:

    if (!require("hexbin")) install.packages("hexbin")
    library(hexbin)
    c+geom_hex()
    
    六边形封箱图

    修改bin的数目

    c+geom_hex(bins=10)
    
    六边形封箱图

    2.3 二维等高线密度图

    sp <- ggplot(faithful, aes(x=eruptions, y=waiting))
    sp+geom_point()+ geom_density_2d()
    
    等高线密度图

    完整代码如下:

    library(ggplot2)
    
    #diamonds有54000颗钻石的数据,太大只显示头部的几行记录
    output(head(diamonds))
    
    #二维封箱热图
    header1("二维封箱热图")
    c <- ggplot(data=diamonds, aes(carat, price))
    c+geom_bin2d()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #设置bin的数量
    c+geom_bin2d(bins=150)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #六边形封箱图
    header1("六边形封箱图")
    if (!require("hexbin")) install.packages("hexbin")
    library(hexbin)
    c+geom_hex()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #修改bin的数目
    c+geom_hex(bins=10)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    header1("二维等高线密度图")
    sp <- ggplot(faithful, aes(x=eruptions, y=waiting))
    sp+geom_point()+ geom_density_2d()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    

    3、两个变量:连续函数

    主要是如何通过线来连接两个变量,使用数据集economics。

    head(economics)
    
    economics数据集

    先创建一个ggplot图层,后面逐步添加图层

    d <- ggplot(data = economics, aes(x=date, y=unemploy))
    

    可添加的图层有:

    geom_area():面积图
    geom_line():折线图
    geom_step(): 阶梯图

    3.1 面积图

    d+geom_area()
    
    面积图

    3.2 线图

    d+geom_line()
    
    线图

    3.3 阶梯图

    set.seed(1111)
    ss <- economics[sample(1:nrow(economics), 20),]
    ggplot(ss, aes(x=date, y=unemploy))+geom_step()
    
    阶梯图

    完整代码如下:

    library(ggplot2)
    
    output(head(economics))
    
    #面积图
    header1("面积图")
    d <- ggplot(data = economics, aes(x=date, y=unemploy))
    d+geom_area()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
     
    header1("线图")
    d+geom_line()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    header1("阶梯图")
    set.seed(1111)
    ss <- economics[sample(1:nrow(economics), 20),]
    ggplot(ss, aes(x=date, y=unemploy))+ geom_step()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    

    4、 两个变量:x离散,y连续

    使用数据集ToothGrowth,其中的变量len(Tooth length)是连续变量,dose是离散变量。

    ToothGrowth$dose <- as.factor(ToothGrowth$dose)
    head(ToothGrowth)
    

    创建图层

    e <- ggplot(data = ToothGrowth, aes(x=dose, y=len))
    

    可添加的图层有:
    geom_boxplot(): 箱线图
    geom_violin():小提琴图
    geom_dotplot():点图
    geom_jitter(): 带状图
    geom_line(): 线图
    geom_bar(): 条形图

    image.png

    4.1 箱线图

    e+geom_boxplot()
    
    image.png

    添加有缺口的箱线图

    e+geom_boxplot(notch = TRUE)
    
    image.png

    将dose映射给填充颜色

    e+geom_boxplot(aes(fill=dose))
    
    image.png

    按supp进行分类并映射给填充颜色

    ggplot(ToothGrowth, aes(x=dose, y=len))+ geom_boxplot(aes(fill=supp))
    
    image.png

    4.2 小提琴图

    e+geom_violin(trim = FALSE)
    
    image.png

    添加中值点

    e+geom_violin(trim = FALSE)+
      stat_summary(fun.data = mean_sdl, fun.args = list(mult=1), 
                   geom="pointrange", color="red")
    
    image.png

    与箱线图结合

    e+geom_violin(trim = FALSE)+
      geom_boxplot(width=0.2)
    
    image.png

    将dose映射给颜色进行分组

    e+geom_violin(aes(color=dose), trim = FALSE)
    
    image.png

    4.3 点图

    e+geom_dotplot(binaxis = "y", stackdir = "center")
    
    image.png

    添加中值点

    e + geom_dotplot(binaxis = "y", stackdir = "center") + 
      stat_summary(fun.data=mean_sdl, color = "red",geom = "pointrange",fun.args=list(mult=1))
    
    image.png

    与箱线图结合

    e + geom_boxplot() + 
      geom_dotplot(binaxis = "y", stackdir = "center")
    
    image.png

    添加小提琴图

    e + geom_violin(trim = FALSE) +
      geom_dotplot(binaxis='y', stackdir='center')
    
    image.png

    将dose映射给颜色以及填充色

    e + geom_dotplot(aes(color = dose, fill = dose), 
                     binaxis = "y", stackdir = "center")
    
    image.png

    4.5 带状图

    带状图是一种一维散点图,当样本量很小时,与箱线图相当

    e + geom_jitter(position=position_jitter(0.2))
    
    image.png

    添加中值点

    e + geom_jitter(position=position_jitter(0.2)) + 
      stat_summary(fun.data="mean_sdl",  fun.args = list(mult=1), 
                   geom="pointrange", color = "red")
    
    image.png

    与点图结合

    e + geom_jitter(position=position_jitter(0.2)) + 
      geom_dotplot(binaxis = "y", stackdir = "center")
    
    image.png

    与小提琴图结合

    e + geom_violin(trim = FALSE) +
      geom_jitter(position=position_jitter(0.2))
    
    image.png

    将dose映射给颜色和形状

    e +  geom_jitter(aes(color = dose, shape = dose),
                     position=position_jitter(0.2))
    
    image.png

    4.6 线图

    #构造数据集
    df <- data.frame(supp=rep(c("VC", "OJ"), each=3),
                    dose=rep(c("D0.5", "D1", "D2"),2),
                    len=c(6.8, 15, 33, 4.2, 10, 29.5))
    head(df)
    
    image.png

    将supp映射线型

    ggplot(df, aes(x=dose, y=len, group=supp)) +
      geom_line(aes(linetype=supp))+
      geom_point()
    
    image.png

    修改线型、点的形状以及颜色

    ggplot(df, aes(x=dose, y=len, group=supp)) +
      geom_line(aes(linetype=supp, color = supp))+
      geom_point(aes(shape=supp, color = supp))
    
    image.png

    4.7 条形图

    #构造数据集
    df <- data.frame(dose=c("D0.5", "D1", "D2"),
                    len=c(4.2, 10, 29.5))
    head(df)
    ##   dose  len
    ## 1 D0.5  4.2
    ## 2   D1 10.0
    ## 3   D2 29.5
    
    df2 <- data.frame(supp=rep(c("VC", "OJ"), each=3),
                    dose=rep(c("D0.5", "D1", "D2"),2),
                    len=c(6.8, 15, 33, 4.2, 10, 29.5))
    
    head(df2)
    ##   supp dose  len
    ## 1   VC D0.5  6.8
    ## 2   VC   D1 15.0
    ## 3   VC   D2 33.0
    ## 4   OJ D0.5  4.2
    ## 5   OJ   D1 10.0
    ## 6   OJ   D2 29.5
    

    创建图层

    f <- ggplot(df, aes(x = dose, y = len))
    f + geom_bar(stat = "identity")
    
    image.png

    修改填充色以及添加标签

    f + geom_bar(stat="identity", fill="steelblue")+
      geom_text(aes(label=len), vjust=-0.3, size=3.5)+
      theme_minimal()
    
    image.png

    将dose映射给条形图颜色

    f + geom_bar(aes(color = dose),
                 stat="identity", fill="white")
    
    image.png

    修改填充色

    f + geom_bar(aes(fill = dose), stat="identity")
    
    image.png

    将变量supp映射给填充色,从而达到分组效果

    g <- ggplot(data=df2, aes(x=dose, y=len, fill=supp)) 
    g + geom_bar(stat = "identity")#position默认为stack
    
    image.png

    修改position为dodge

    g + geom_bar(stat="identity", position=position_dodge())
    
    image.png

    完整代码如下:

    library(ggplot2)
    
    #output(head(ToothGrowth))
    ToothGrowth$dose <- as.factor(ToothGrowth$dose)
    
    output(head(ToothGrowth))
    
    #箱线图
    header1("箱线图")
    e <- ggplot(data = ToothGrowth, aes(x=dose, y=len))
    e+geom_boxplot()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #添加有缺口的箱线图
    e+geom_boxplot(notch = TRUE)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #将dose映射给填充颜色
    e+geom_boxplot(aes(fill=dose))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #按supp进行分类并映射给填充颜色
    ggplot(ToothGrowth, aes(x=dose, y=len))+ geom_boxplot(aes(fill=supp))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #小提琴图
    header1("小提琴图")
    e+geom_violin(trim = FALSE)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #添加中值点
    if (!require("Hmisc")) install.packages("Hmisc")
    e+geom_violin(trim = FALSE)+
      stat_summary(fun.data = mean_sdl, fun.args = list(mult=1), 
                   geom="pointrange", color="red")
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #与箱线图结合
    e+geom_violin(trim = FALSE)+geom_boxplot(width=0.2)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    
    #将dose映射给颜色进行分组
    e+geom_violin(aes(color=dose), trim = FALSE)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #点图
    header1("点图")
    e+geom_dotplot(binaxis = "y", stackdir = "center")
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #添加中值点
    e + geom_dotplot(binaxis = "y", stackdir = "center") + 
      stat_summary(fun.data=mean_sdl, color = "red",geom = "pointrange",fun.args=list(mult=1))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    
    #与箱线图结合
    e + geom_boxplot() + 
      geom_dotplot(binaxis = "y", stackdir = "center")
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #添加小提琴图
    e + geom_violin(trim = FALSE) +
      geom_dotplot(binaxis='y', stackdir='center')
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    
    #将dose映射给颜色以及填充色
    e + geom_dotplot(aes(color = dose, fill = dose), 
                     binaxis = "y", stackdir = "center")
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #带状图
    header1("带状图")
    e + geom_jitter(position=position_jitter(0.2))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #添加中值点
    e + geom_jitter(position=position_jitter(0.2)) + 
      stat_summary(fun.data="mean_sdl",  fun.args = list(mult=1), 
                   geom="pointrange", color = "red")
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #与点图结合
    e + geom_jitter(position=position_jitter(0.2)) + 
      geom_dotplot(binaxis = "y", stackdir = "center")
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    
    #与小提琴图结合
    e + geom_violin(trim = FALSE) +
      geom_jitter(position=position_jitter(0.2))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #将dose映射给颜色和形状
    e + geom_jitter(aes(color = dose, shape = dose),
                     position=position_jitter(0.2))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #线图
    header1("线图")
    df <- data.frame(supp=rep(c("VC", "OJ"), each=3),
                    dose=rep(c("D0.5", "D1", "D2"),2),
                    len=c(6.8, 15, 33, 4.2, 10, 29.5))
    output( head(df))                
      
    ggplot(df, aes(x=dose, y=len, group=supp)) +
      geom_line(aes(linetype=supp))+
      geom_point()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #修改线型、点的形状以及颜色
    ggplot(df, aes(x=dose, y=len, group=supp)) +
      geom_line(aes(linetype=supp, color = supp))+
      geom_point(aes(shape=supp, color = supp)) 
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    header1("条形图")
    df <- data.frame(dose=c("D0.5", "D1", "D2"),
                    len=c(4.2, 10, 29.5))
    output(head(df))
    
    df2 <- data.frame(supp=rep(c("VC", "OJ"), each=3),
                    dose=rep(c("D0.5", "D1", "D2"),2),
                    len=c(6.8, 15, 33, 4.2, 10, 29.5))
    output(head(df2))
    
    f <- ggplot(df, aes(x = dose, y = len))
    f + geom_bar(stat = "identity")
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #修改填充色以及添加标签
    f + geom_bar(stat="identity", fill="steelblue")+
      geom_text(aes(label=len), vjust=-0.3, size=3.5)+
      theme_minimal()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
      
     
    #将dose映射给条形图颜色    
    f + geom_bar(aes(color = dose),
                 stat="identity", fill="white")  
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
         
    #修改填充色
    f + geom_bar(aes(fill = dose), stat="identity")  
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #将变量supp映射给填充色,从而达到分组效果
    g <- ggplot(data=df2, aes(x=dose, y=len, fill=supp)) 
    g + geom_bar(stat = "identity")#position默认为stack
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #修改position为dodge
    g + geom_bar(stat="identity", position=position_dodge())   
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    

    5、 两个变量:x、y皆离散

    使用数据集diamonds中的两个离散变量color以及cut

    ggplot(diamonds, aes(cut, color)) +
      geom_jitter(aes(color = cut), size = 0.5)
    
    image.png

    6、 两个变量:绘制误差图

    df <- ToothGrowth
    df$dose <- as.factor(df$dose)
    head(df)
    ##    len supp dose
    ## 1  4.2   VC  0.5
    ## 2 11.5   VC  0.5
    ## 3  7.3   VC  0.5
    ## 4  5.8   VC  0.5
    ## 5  6.4   VC  0.5
    ## 6 10.0   VC  0.5
    

    绘制误差图需要知道均值以及标准误,下面这个函数用来计算每组的均值以及标准误。

    data_summary <- function(data, varname, grps){
      require(plyr)
      summary_func <- function(x, col){
        c(mean = mean(x[[col]], na.rm=TRUE),
          sd = sd(x[[col]], na.rm=TRUE))
      }
      data_sum<-ddply(data, grps, .fun=summary_func, varname)
      data_sum <- rename(data_sum, c("mean" = varname))
     return(data_sum)
    }
    

    计算均值以及标准误

    df2 <- data_summary(df, varname="len", grps= "dose")
    # Convert dose to a factor variable
    df2$dose=as.factor(df2$dose)
    head(df2)
    ##   dose    len       sd
    ## 1  0.5 10.605 4.499763
    ## 2    1 19.735 4.415436
    ## 3    2 26.100 3.774150
    

    创建图层

    f <- ggplot(df2, aes(x = dose, y = len, 
                         ymin = len-sd, ymax = len+sd))
    

    可添加的图层有:
    geom_crossbar(): 空心柱,上中下三线分别代表ymax、mean、ymin
    geom_errorbar(): 误差棒
    geom_errorbarh(): 水平误差棒
    geom_linerange():竖直误差线
    geom_pointrange():中间为一点的误差线

    image.png

    6.1 空心柱

    geom_crossbar()
    f+geom_crossbar()
    
    image.png

    将dose映射给颜色

    f+geom_crossbar(aes(color=dose))
    
    image.png

    自定义颜色

    f+geom_crossbar(aes(color=dose))+
      scale_color_manual(values = c("#999999", "#E69F00", "#56B4E9"))+theme_classic()
    
    image.png

    修改填充色

    f+geom_crossbar(aes(fill=dose))+
      scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9"))+
      theme_classic()
    
    image.png

    通过将supp映射给颜色实现分组,可以利用函数stat_summary()来计算mean和sd

    f <- ggplot(df, aes(x=dose, y=len, color=supp))
    f+stat_summary(fun.data = mean_sdl, fun.args = list(mult=1), geom="crossbar", width=0.6, position = position_dodge(0.8))
    
    image.png

    6.2 误差棒

    f <- ggplot(df2, aes(x=dose, y=len, ymin=len-sd, ymax=len+sd))
    

    将dose映射给颜色

    f+geom_errorbar(aes(color=dose), width=0.2)
    
    image.png

    与线图结合

    f+geom_line(aes(group=1))+
      geom_errorbar(width=0.15)
    
    image.png

    与条形图结合,并将变量dose映射给颜色

    f+geom_bar(aes(color=dose), stat = "identity", fill="white")+
      geom_errorbar(aes(color=dose), width=0.1)
    
    image.png

    6.3 水平误差棒

    #构造数据集
    df2 <- data_summary(ToothGrowth, varname="len", grps = "dose")
    df2$dose <- as.factor(df2$dose)
    head(df2)
    ##   dose    len       sd
    ## 1  0.5 10.605 4.499763
    ## 2    1 19.735 4.415436
    ## 3    2 26.100 3.774150
    

    创建图层

    f <- ggplot(data = df2, aes(x=len, y=dose,xmin=len-sd, xmax=len+sd))
    

    参数xmin与xmax用来设置水平误差棒

    f+geom_errorbarh()
    
    image.png

    通过映射实现分组

    f+geom_errorbarh(aes(color=dose))
    
    image.png

    6.4 竖直误差线

    geom_linerange()与geom_pointrange()

    f <- ggplot(df2, aes(x=dose, y=len, ymin=len-sd, ymax=len+sd))
    

    line range

    f+geom_linerange()
    
    image.png

    6.5 中间为一点的误差线

    point range

    f+geom_pointrange()
    
    image.png

    6.6 点图+误差棒

    g <- ggplot(df, aes(x=dose, y=len))+
      geom_dotplot(binaxis = "y", stackdir = "center")
    

    添加geom_crossbar()

    g+stat_summary(fun.data = mean_sdl, fun.args = list(mult=1), geom="crossbar", color="red", width=0.1)
    
    image.png

    添加geom_errorbar()

    g + stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
            geom="errorbar", color="red", width=0.2) +
      stat_summary(fun.y=mean, geom="point", color="red")
    
    image.png

    添加geom_pointrange()

    g + stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
                     geom="pointrange", color="red")
    
    image.png

    完整代码如下:

    library(ggplot2)
    df <- ToothGrowth
    df$dose <- as.factor(df$dose)
    head(df)
    
    data_summary <- function(data, varname, grps){
      require(plyr)
      summary_func <- function(x, col){
        c(mean = mean(x[[col]], na.rm=TRUE),
          sd = sd(x[[col]], na.rm=TRUE))
      }
      data_sum<-ddply(data, grps, .fun=summary_func, varname)
      data_sum <- rename(data_sum, c("mean" = varname))
     return(data_sum)
    }
    
    df2 <- data_summary(df, varname="len", grps= "dose")
    # Convert dose to a factor variable
    df2$dose=as.factor(df2$dose)
    head(df2)
    
    f <- ggplot(df2, aes(x = dose, y = len, 
                         ymin = len-sd, ymax = len+sd))                  
    
    f+geom_crossbar()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")                     
    #将dose映射给颜色                  
    f+geom_crossbar(aes(color=dose))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #自定义颜色
    f+geom_crossbar(aes(color=dose))+
      scale_color_manual(values = c("#999999", "#E69F00", "#56B4E9"))+theme_classic()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm") 
     
    #修改填充色
    f+geom_crossbar(aes(fill=dose))+
      scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9"))+
      theme_classic()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    f <- ggplot(df, aes(x=dose, y=len, color=supp))
    f+stat_summary(fun.data = mean_sdl, fun.args = list(mult=1), geom="crossbar", width=0.6, position = position_dodge(0.8))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    header1("误差棒")
    f <- ggplot(df2, aes(x=dose, y=len, ymin=len-sd, ymax=len+sd))
    
    #将dose映射给颜色
    f+geom_errorbar(aes(color=dose), width=0.2)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #与线图结合
    f+geom_line(aes(group=1))+
      geom_errorbar(width=0.15)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    
    f+geom_bar(aes(color=dose), stat = "identity", fill="white")+
      geom_errorbar(aes(color=dose), width=0.1)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    header1("水平误差棒") 
    df2 <- data_summary(ToothGrowth, varname="len", grps = "dose")
    df2$dose <- as.factor(df2$dose)
    output(head(df2))
    
    #创建图层
    f <- ggplot(data = df2, aes(x=len, y=dose,xmin=len-sd, xmax=len+sd))
    
    f+geom_errorbarh()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    f+geom_errorbarh(aes(color=dose))
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #line range
    f <- ggplot(df2, aes(x=dose, y=len, ymin=len-sd, ymax=len+sd))
    f+geom_linerange()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #point range
    f+geom_pointrange()
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    
    #点图+误差棒
    g <- ggplot(df, aes(x=dose, y=len))+
      geom_dotplot(binaxis = "y", stackdir = "center")
    
    
    #添加geom_crossbar()
    g+stat_summary(fun.data = mean_sdl, fun.args = list(mult=1), geom="crossbar", color="red", width=0.1)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #添加geom_errorbar()
    g + stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
            geom="errorbar", color="red", width=0.2) +
      stat_summary(fun.y=mean, geom="point", color="red")
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    
    #添加geom_pointrange()
    g + stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
                     geom="pointrange", color="red")
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    

    7、 两个变量:地图绘制

    ggplot2提供了绘制地图的函数geom_map(),依赖于包maps提供地理信息。
    安装map

    if (!require("maps")) install.packages("maps")
    

    下面将绘制美国地图,数据集采用USArrests

    library(maps)
    head(USArrests)
    ##            Murder Assault UrbanPop Rape
    ## Alabama      13.2     236       58 21.2
    ## Alaska       10.0     263       48 44.5
    ## Arizona       8.1     294       80 31.0
    ## Arkansas      8.8     190       50 19.5
    ## California    9.0     276       91 40.6
    ## Colorado      7.9     204       78 38.7
    

    对数据进行整理一下,添加一列state

    crimes <- data.frame(state=tolower(rownames(USArrests)), USArrests)
    head(crimes)
    ##            Murder Assault UrbanPop Rape
    ## Alabama      13.2     236       58 21.2
    ## Alaska       10.0     263       48 44.5
    ## Arizona       8.1     294       80 31.0
    ## Arkansas      8.8     190       50 19.5
    ## California    9.0     276       91 40.6
    ## Colorado      7.9     204       78 38.7
    #数据重铸
    library(reshape2)
    crimesm <- melt(crimes, id=1)
    head(crimesm)
    ##        state variable value
    ## 1    alabama   Murder  13.2
    ## 2     alaska   Murder  10.0
    ## 3    arizona   Murder   8.1
    ## 4   arkansas   Murder   8.8
    ## 5 california   Murder   9.0
    ## 6   colorado   Murder   7.9
    map_data <- map_data("state")
    #绘制地图,使用Murder进行着色
    ggplot(crimes, aes(map_id=state))+
      geom_map(aes(fill=Murder), map=map_data)+
      expand_limits(x=map_data$long, y=map_data$lat)
    
    image.png

    完整代码如下:

    library(ggplot2)
    
    if (!require("maps")) install.packages("maps")
    library(maps)
    output(head(USArrests))
    
    crimes <- data.frame(state=tolower(rownames(USArrests)), USArrests)
    output(head(crimes))
    
    if (!require("reshape2")) install.packages("reshape2")
    library(reshape2)
    crimesm <- melt(crimes, id=1)
    output(head(crimesm))
    
    map_data <- map_data("state")
    ggplot(crimes, aes(map_id=state))+
      geom_map(aes(fill=Murder), map=map_data)+
      expand_limits(x=map_data$long, y=map_data$lat)
    ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
    

    相关文章

      网友评论

        本文标题:DatistEQ之ggplot2两个变量绘图

        本文链接:https://www.haomeiwen.com/subject/ojywqltx.html