美文网首页
数据挖掘20210111学习笔记

数据挖掘20210111学习笔记

作者: 爱吃甜品的鱼 | 来源:发表于2021-02-22 15:50 被阅读0次

    R语言作图

    图片.png
    画图所需的包

    低级绘图函数建立在高级绘图函数基础上,不能单独使用

    图片.png

    ggplot2语法

    1.入门级绘图模板
    2.映射-颜色、大小、透明度、形状
    3.分面
    4.几何对象
    5.统计变换
    6.位置调整
    7.坐标系

    #作图分三类
    #1.基础包
    plot(iris[,1],iris[,3],col = iris[,5]) 
    text(6.5,4, labels = 'hello')
    
    boxplot(iris[,1]~iris[,5])
    
    dev.off()   #关闭画板,清空画板
    
    #2.ggplot2 中坚力量
    test = iris
    library(ggplot2)
    ggplot(data = test)+
      geom_point(mapping = aes(x = Sepal.Length,
                               y = Petal.Length,
                               color = Species))
    
    #3.ggpubr 江湖救急
    library(ggpubr)
    ggscatter(iris,
              x="Sepal.Length",
              y="Petal.Length",
              color="Species")
    

    1.入门级模板

    ggplot(data = <DATA>)+
    <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))
    
    > ggplot(data = iris)+
    +   geom_point(mapping = aes(x = Sepal.Length,
    +                            y = Petal.Length))
    

    2.映射:按照数据框的某一列来定义图的某个属性;手动设置

    映射 手动设置

    例1

    > ggplot(data = test)+
    +   geom_point(mapping = aes(x = Sepal.Length,
    +                            y = Petal.Length,
    +                            color = Species),    #aes的参数是列名,是映射
    +              shape =8)  #shape是geom_point的函数,而不是aes的函数,geom_point函数的参数是具体颜色形状等
    

    例2

    > ggplot(data = test)+
    +   geom_point(mapping = aes(x = Sepal.Length,
    +                            y = Petal.Length),
    +              color="blue")
    

    3.分面

    > ggplot(data = test) + 
    +   geom_point(mapping = aes(x = Sepal.Length, y = Petal.Length)) + 
    +   facet_wrap(~ Species) #按照Species这一列的取值分为子图,有几个取值就分为几个子图
    

    双分面

    > test$Group = sample(letters[1:5],150,replace = T)
    > ggplot(data = test) + 
    +   geom_point(mapping = aes(x = Sepal.Length, y = Petal.Length)) + 
    +   facet_grid(Group ~ Species) 
    

    4.几何对象

    分组

    > ggplot(data = test) + 
    +   geom_smooth(mapping = aes(x = Sepal.Length, 
    +                             y = Petal.Length)) 
    
    
    > ggplot(data = test) + 
    +   geom_smooth(mapping = aes(x = Sepal.Length, 
    +                             y = Petal.Length,
    +                             group = Species))    #将一条线分成三段
    
    
    > ggplot(data = test) + 
    +   geom_smooth(mapping = aes(x = Sepal.Length, 
    +                             y = Petal.Length,
    +                             color = Species))    #分成三段,每段一种颜色
    

    几何对象可以叠加:局部映射VS全局映射

    #局部映射
    > ggplot(data = test) + 
    +   geom_smooth(mapping = aes(x = Sepal.Length,y = Petal.Length))+
    +   geom_point(mapping = aes(x = Sepal.Length,y = Petal.Length))
    
    
    #全局映射
    > ggplot(data = test,mapping = aes(x = Sepal.Length, y = Petal.Length))+
    +   geom_smooth()+
    +   geom_point()
    
    
    图片.png
    #练习6-2
    # 1.尝试写出下图的代码
    # 数据是iris
    # X轴是Species
    # y轴是Sepal.Width
    # 图是箱线图
    > ggplot(data = iris,mapping = aes(x = Species, y = Sepal.Width))+
    +   geom_boxplot()
    
    # 2. 尝试在此图上叠加点图,
    # 能发现什么问题?
    > ggplot(data = iris,mapping = aes(x = Species, y = Sepal.Width))+
    +   geom_boxplot()+
    +   geom_point()     #在图上看不出每个子集有50个元素
    
    # 3.用下列代码作图,观察结果
    > ggplot(test,aes(x = Sepal.Length,y = Petal.Length,color = Species)) +
    +   geom_point()+
    +   geom_smooth(color = "black")    #当局部映射和全局映射冲突时,以局部映射为准
    

    5.统计变换-直方图

    > View(diamonds)
    > table(diamonds$cut)
    > ggplot(data = diamonds) + 
    +   geom_bar(mapping = aes(x = cut))
    > ggplot(data = diamonds) + 
    +   stat_count(mapping = aes(x = cut))
    

    统计变化使用场景
    (1)使用表中数据直接作图,而不统计

    > fre = as.data.frame(table(diamonds$cut))
    > ggplot(data = fre) +
    +   geom_bar(mapping = aes(x = Var1, y = Freq), stat = "identity")
    

    (2)不统计count,统计prop(比例),count改为prop

    > ggplot(data = diamonds) + 
    +   geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))
    

    6.位置关系

    抖动的点图

    > ggplot(data = mpg,mapping = aes(x = class, 
    +                                 y = hwy,
    +                                 group = class)) + 
    +   geom_boxplot()+
    +   geom_point()
    
    图片.png
    > ggplot(data = mpg,mapping = aes(x = class, 
    +                                 y = hwy,
    +                                 group = class)) + 
    +   geom_boxplot()+
    +   geom_jitter()
    
    图片.png

    堆叠直方图

    > ggplot(data = diamonds) + 
    +   geom_bar(mapping = aes(x = cut,fill=clarity))
    
    图片.png

    并列直方图

    > ggplot(data = diamonds) + 
    +   geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")
    
    图片.png

    7.坐标系

    翻转coord_flip()

    > ggplot(data = mpg, mapping = aes(x = class, y = hwy)) + 
    +   geom_boxplot() +
    +   coord_flip()
    
    图片.png
    > bar <- ggplot(data = diamonds) + 
    +   geom_bar(
    +     mapping = aes(x = cut, fill = cut), 
    +     show.legend = FALSE,
    +     width = 1
    +   ) + 
    +   theme(aspect.ratio = 1) +
    +   labs(x = NULL, y = NULL)
    
    > bar + coord_flip()
    
    图片.png

    极坐标系coord_polar()

    > bar + coord_polar()
    
    图片.png
    图片.png 图片.png
    > ggplot(iris, aes(x = Species, y = Sepal.Width))+
    +   geom_violin(aes(fill = Species))+
    +   geom_boxplot()+
    +   geom_jitter(aes(shape = Species))+
    +   coord_flip()+
    +   theme_classic()
    

    ggpubr

    ggscatter(iris,x="Sepal.Length",y="Petal.Length",color="Species")
    
    图片.png
    > p <- ggboxplot(iris, x = "Species", y = "Sepal.Length",
    +                color = "Species", shape = "Species",
    +                add = "jitter")
    
    图片.png
    > my_comparisons <- list( c("setosa", "versicolor"), c("setosa", "virginica"), c("versicolor", "virginica") )
    > p + stat_compare_means(comparisons = my_comparisons)+ # Add pairwise comparisons p-value
    +   stat_compare_means(label.y = 9) 
    
    图片.png
    图片.png
    eoffice包 导出为ppt,全部元素都是可编辑模式
    library(eoffice)
    topptx(p,"iris_box_ggpubr.pptx")
    

    小洁老师的画图合集 https://www.jianshu.com/nb/35523479

    相关文章

      网友评论

          本文标题:数据挖掘20210111学习笔记

          本文链接:https://www.haomeiwen.com/subject/euckfltx.html