美文网首页
dplyr包常用函数操作

dplyr包常用函数操作

作者: 多啦A梦詹 | 来源:发表于2020-02-22 11:18 被阅读0次

    R语言中的有一个数据处理的强大的包,它就是 dplyr 包,dplyr包 像操作数据库一样操作 R,方便,轻松,快捷。

    主要内容

    1、选择数据表的列: select, rename
    2、select 只会选择你指定的列
    3、rename 则会改变列名, 并选择其他所有的列
    4、选择数据表的行: filter
    5、改变数据表的列: mutate, transmute
    6、mutate 会保留改变前和改变后的列
    7、transmute 则只会保留改变后的列, 而扔掉改变前的列
    8、通过 group_by 和 summarize 函数可以把数据进行分组进行分析


    library(dplyr)
    knitr::kable(dplyr::filter(iris, Sepal.Length > 7))
    
    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    7.1 3.0 5.9 2.1 virginica
    7.6 3.0 6.6 2.1 virginica
    7.3 2.9 6.3 1.8 virginica
    7.2 3.6 6.1 2.5 virginica
    7.7 3.8 6.7 2.2 virginica
    7.7 2.6 6.9 2.3 virginica
    7.7 2.8 6.7 2.0 virginica
    7.2 3.2 6.0 1.8 virginica
    7.2 3.0 5.8 1.6 virginica
    7.4 2.8 6.1 1.9 virginica
    7.9 3.8 6.4 2.0 virginica
    7.7 3.0 6.1 2.3 virginica
    knitr::kable(dplyr::distinct(rbind(iris[1:10, ], iris[1:15, ])))  #unique
    
    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    5.1 3.5 1.4 0.2 setosa
    4.9 3.0 1.4 0.2 setosa
    4.7 3.2 1.3 0.2 setosa
    4.6 3.1 1.5 0.2 setosa
    5.0 3.6 1.4 0.2 setosa
    5.4 3.9 1.7 0.4 setosa
    4.6 3.4 1.4 0.3 setosa
    5.0 3.4 1.5 0.2 setosa
    4.4 2.9 1.4 0.2 setosa
    4.9 3.1 1.5 0.1 setosa
    5.4 3.7 1.5 0.2 setosa
    4.8 3.4 1.6 0.2 setosa
    4.8 3.0 1.4 0.1 setosa
    4.3 3.0 1.1 0.1 setosa
    5.8 4.0 1.2 0.2 setosa
    knitr::kable(iris %>% select(Species, starts_with("Petal")) %>% filter(Species == 
        "setosa"))
    
    Species Petal.Length Petal.Width
    setosa 1.4 0.2
    setosa 1.4 0.2
    setosa 1.3 0.2
    setosa 1.5 0.2
    setosa 1.4 0.2
    setosa 1.7 0.4
    setosa 1.4 0.3
    setosa 1.5 0.2
    setosa 1.4 0.2
    setosa 1.5 0.1
    setosa 1.5 0.2
    setosa 1.6 0.2
    setosa 1.4 0.1
    setosa 1.1 0.1
    setosa 1.2 0.2
    setosa 1.5 0.4
    setosa 1.3 0.4
    setosa 1.4 0.3
    setosa 1.7 0.3
    setosa 1.5 0.3
    setosa 1.7 0.2
    setosa 1.5 0.4
    setosa 1.0 0.2
    setosa 1.7 0.5
    setosa 1.9 0.2
    setosa 1.6 0.2
    setosa 1.6 0.4
    setosa 1.5 0.2
    setosa 1.4 0.2
    setosa 1.6 0.2
    setosa 1.6 0.2
    setosa 1.5 0.4
    setosa 1.5 0.1
    setosa 1.4 0.2
    setosa 1.5 0.2
    setosa 1.2 0.2
    setosa 1.3 0.2
    setosa 1.4 0.1
    setosa 1.3 0.2
    setosa 1.5 0.2
    setosa 1.3 0.3
    setosa 1.3 0.3
    setosa 1.3 0.2
    setosa 1.6 0.6
    setosa 1.9 0.4
    setosa 1.4 0.3
    setosa 1.6 0.2
    setosa 1.4 0.2
    setosa 1.5 0.2
    setosa 1.4 0.2
    knitr::kable(dplyr::slice(iris, 10:15))  #切片,行名不显示
    
    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    4.9 3.1 1.5 0.1 setosa
    5.4 3.7 1.5 0.2 setosa
    4.8 3.4 1.6 0.2 setosa
    4.8 3.0 1.4 0.1 setosa
    4.3 3.0 1.1 0.1 setosa
    5.8 4.0 1.2 0.2 setosa
    knitr::kable(dplyr::sample_n(iris, 10))  #随机抽取10个
    
    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    6.7 3.3 5.7 2.5 virginica
    5.4 3.4 1.5 0.4 setosa
    5.1 3.3 1.7 0.5 setosa
    6.0 3.4 4.5 1.6 versicolor
    6.7 2.5 5.8 1.8 virginica
    5.7 2.8 4.1 1.3 versicolor
    5.1 3.5 1.4 0.3 setosa
    6.4 2.8 5.6 2.2 virginica
    6.3 2.9 5.6 1.8 virginica
    5.2 3.5 1.5 0.2 setosa
    knitr::kable(dplyr::sample_frac(iris, 0.1))  #百分比抽样
    
    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    7.2 3.6 6.1 2.5 virginica
    6.9 3.1 5.1 2.3 virginica
    6.2 3.4 5.4 2.3 virginica
    7.7 2.8 6.7 2.0 virginica
    4.6 3.2 1.4 0.2 setosa
    6.3 2.5 4.9 1.5 versicolor
    5.7 2.6 3.5 1.0 versicolor
    5.7 2.8 4.1 1.3 versicolor
    4.7 3.2 1.6 0.2 setosa
    6.3 2.8 5.1 1.5 virginica
    6.9 3.1 4.9 1.5 versicolor
    5.7 2.5 5.0 2.0 virginica
    5.1 3.3 1.7 0.5 setosa
    7.2 3.0 5.8 1.6 virginica
    5.0 3.2 1.2 0.2 setosa
    knitr::kable(head(dplyr::arrange(iris, Sepal.Length)))  #从小到大排序
    
    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    4.3 3.0 1.1 0.1 setosa
    4.4 2.9 1.4 0.2 setosa
    4.4 3.0 1.3 0.2 setosa
    4.4 3.2 1.3 0.2 setosa
    4.5 2.3 1.3 0.3 setosa
    4.6 3.1 1.5 0.2 setosa
    knitr::kable(head(dplyr::arrange(iris, desc(Sepal.Length))))
    
    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    7.9 3.8 6.4 2.0 virginica
    7.7 3.8 6.7 2.2 virginica
    7.7 2.6 6.9 2.3 virginica
    7.7 2.8 6.7 2.0 virginica
    7.7 3.0 6.1 2.3 virginica
    7.6 3.0 6.6 2.1 virginica
    knitr::kable(summarise(iris, avg = mean(Sepal.Length)))
    
    avg
    5.843333
    knitr::kable(summarise(iris, sum = sum(Sepal.Length)))
    
    sum
    876.5
    # %>% 管道符
    knitr::kable(head(mtcars, 20) %>% tail())
    
    mpg cyl disp hp drat wt qsec vs am gear carb Model
    15 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4 Cadillac Fleetwood
    16 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4 Lincoln Continental
    17 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4 Chrysler Imperial
    18 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1 Fiat 128
    19 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2 Honda Civic
    20 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1 Toyota Corolla
    knitr::kable(head(dplyr::group_by(iris, Species)))
    
    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    5.1 3.5 1.4 0.2 setosa
    4.9 3.0 1.4 0.2 setosa
    4.7 3.2 1.3 0.2 setosa
    4.6 3.1 1.5 0.2 setosa
    5.0 3.6 1.4 0.2 setosa
    5.4 3.9 1.7 0.4 setosa
    knitr::kable(iris %>% group_by(Species) %>% summarise(avg = mean(Sepal.Width)) %>% 
        arrange(avg))
    
    avg
    3.057333
    knitr::kable(head(dplyr::mutate(iris, new = Sepal.Length + Petal.Length)))  #增加列
    
    Sepal.Length Sepal.Width Petal.Length Petal.Width Species new
    5.1 3.5 1.4 0.2 setosa 6.5
    4.9 3.0 1.4 0.2 setosa 6.3
    4.7 3.2 1.3 0.2 setosa 6.0
    4.6 3.1 1.5 0.2 setosa 6.1
    5.0 3.6 1.4 0.2 setosa 6.4
    5.4 3.9 1.7 0.4 setosa 7.1
    # Connect
    a = data.frame(x1 = c("A", "B", "C"), x2 = c(1, 2, 3))
    b = data.frame(x1 = c("A", "B", "D"), x3 = c(T, F, T))
    knitr::kable(dplyr::left_join(a, b, by = "x1"))
    
    x1 x2 x3
    A 1 TRUE
    B 2 FALSE
    C 3 NA
    knitr::kable(dplyr::full_join(a, b, by = "x1"))
    
    x1 x2 x3
    A 1 TRUE
    B 2 FALSE
    C 3 NA
    D NA TRUE
    knitr::kable(dplyr::semi_join(a, b, by = "x1"))
    
    x1 x2
    A 1
    B 2
    knitr::kable(dplyr::anti_join(a, b, by = "x1"))
    
    x1 x2
    C 3
    first <- slice(mtcars, 1:5)
    mtcars <- mutate(mtcars, Model = rownames(mtcars))
    first <- slice(mtcars, 1:5)
    second <- slice(mtcars, 4:8)
    knitr::kable(intersect(first, second))
    
    mpg cyl disp hp drat wt qsec vs am gear carb Model
    21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 4
    18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 5
    knitr::kable(union_all(first, second))
    
    mpg cyl disp hp drat wt qsec vs am gear carb Model
    21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 1
    21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 2
    22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 3
    21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 4
    18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 5
    21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 4
    18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 5
    18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 6
    14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 7
    24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 8
    knitr::kable(union(first, second))
    
    mpg cyl disp hp drat wt qsec vs am gear carb Model
    21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 1
    21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 2
    22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 3
    21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 4
    18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 5
    18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 6
    14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 7
    24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 8
    knitr::kable(setdiff(first, second))
    
    mpg cyl disp hp drat wt qsec vs am gear carb Model
    21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 1
    21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 2
    22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 3
    knitr::kable(setdiff(second, first))
    
    mpg cyl disp hp drat wt qsec vs am gear carb Model
    18.1 6 225.0 105 2.76 3.46 20.22 1 0 3 1 6
    14.3 8 360.0 245 3.21 3.57 15.84 0 0 3 4 7
    24.4 4 146.7 62 3.69 3.19 20.00 1 0 4 2 8

    相关文章

      网友评论

          本文标题:dplyr包常用函数操作

          本文链接:https://www.haomeiwen.com/subject/hankqhtx.html