R语言中的有一个数据处理的强大的包,它就是 dplyr 包,dplyr包 像操作数据库一样操作 R,方便,轻松,快捷。
主要内容
1、选择数据表的列: select, rename
2、select 只会选择你指定的列
3、rename 则会改变列名, 并选择其他所有的列
4、选择数据表的行: filter
5、改变数据表的列: mutate, transmute
6、mutate 会保留改变前和改变后的列
7、transmute 则只会保留改变后的列, 而扔掉改变前的列
8、通过 group_by 和 summarize 函数可以把数据进行分组进行分析
library(dplyr)
knitr::kable(dplyr::filter(iris, Sepal.Length > 7))
Sepal.Length |
Sepal.Width |
Petal.Length |
Petal.Width |
Species |
7.1 |
3.0 |
5.9 |
2.1 |
virginica |
7.6 |
3.0 |
6.6 |
2.1 |
virginica |
7.3 |
2.9 |
6.3 |
1.8 |
virginica |
7.2 |
3.6 |
6.1 |
2.5 |
virginica |
7.7 |
3.8 |
6.7 |
2.2 |
virginica |
7.7 |
2.6 |
6.9 |
2.3 |
virginica |
7.7 |
2.8 |
6.7 |
2.0 |
virginica |
7.2 |
3.2 |
6.0 |
1.8 |
virginica |
7.2 |
3.0 |
5.8 |
1.6 |
virginica |
7.4 |
2.8 |
6.1 |
1.9 |
virginica |
7.9 |
3.8 |
6.4 |
2.0 |
virginica |
7.7 |
3.0 |
6.1 |
2.3 |
virginica |
knitr::kable(dplyr::distinct(rbind(iris[1:10, ], iris[1:15, ]))) #unique
Sepal.Length |
Sepal.Width |
Petal.Length |
Petal.Width |
Species |
5.1 |
3.5 |
1.4 |
0.2 |
setosa |
4.9 |
3.0 |
1.4 |
0.2 |
setosa |
4.7 |
3.2 |
1.3 |
0.2 |
setosa |
4.6 |
3.1 |
1.5 |
0.2 |
setosa |
5.0 |
3.6 |
1.4 |
0.2 |
setosa |
5.4 |
3.9 |
1.7 |
0.4 |
setosa |
4.6 |
3.4 |
1.4 |
0.3 |
setosa |
5.0 |
3.4 |
1.5 |
0.2 |
setosa |
4.4 |
2.9 |
1.4 |
0.2 |
setosa |
4.9 |
3.1 |
1.5 |
0.1 |
setosa |
5.4 |
3.7 |
1.5 |
0.2 |
setosa |
4.8 |
3.4 |
1.6 |
0.2 |
setosa |
4.8 |
3.0 |
1.4 |
0.1 |
setosa |
4.3 |
3.0 |
1.1 |
0.1 |
setosa |
5.8 |
4.0 |
1.2 |
0.2 |
setosa |
knitr::kable(iris %>% select(Species, starts_with("Petal")) %>% filter(Species ==
"setosa"))
Species |
Petal.Length |
Petal.Width |
setosa |
1.4 |
0.2 |
setosa |
1.4 |
0.2 |
setosa |
1.3 |
0.2 |
setosa |
1.5 |
0.2 |
setosa |
1.4 |
0.2 |
setosa |
1.7 |
0.4 |
setosa |
1.4 |
0.3 |
setosa |
1.5 |
0.2 |
setosa |
1.4 |
0.2 |
setosa |
1.5 |
0.1 |
setosa |
1.5 |
0.2 |
setosa |
1.6 |
0.2 |
setosa |
1.4 |
0.1 |
setosa |
1.1 |
0.1 |
setosa |
1.2 |
0.2 |
setosa |
1.5 |
0.4 |
setosa |
1.3 |
0.4 |
setosa |
1.4 |
0.3 |
setosa |
1.7 |
0.3 |
setosa |
1.5 |
0.3 |
setosa |
1.7 |
0.2 |
setosa |
1.5 |
0.4 |
setosa |
1.0 |
0.2 |
setosa |
1.7 |
0.5 |
setosa |
1.9 |
0.2 |
setosa |
1.6 |
0.2 |
setosa |
1.6 |
0.4 |
setosa |
1.5 |
0.2 |
setosa |
1.4 |
0.2 |
setosa |
1.6 |
0.2 |
setosa |
1.6 |
0.2 |
setosa |
1.5 |
0.4 |
setosa |
1.5 |
0.1 |
setosa |
1.4 |
0.2 |
setosa |
1.5 |
0.2 |
setosa |
1.2 |
0.2 |
setosa |
1.3 |
0.2 |
setosa |
1.4 |
0.1 |
setosa |
1.3 |
0.2 |
setosa |
1.5 |
0.2 |
setosa |
1.3 |
0.3 |
setosa |
1.3 |
0.3 |
setosa |
1.3 |
0.2 |
setosa |
1.6 |
0.6 |
setosa |
1.9 |
0.4 |
setosa |
1.4 |
0.3 |
setosa |
1.6 |
0.2 |
setosa |
1.4 |
0.2 |
setosa |
1.5 |
0.2 |
setosa |
1.4 |
0.2 |
knitr::kable(dplyr::slice(iris, 10:15)) #切片,行名不显示
Sepal.Length |
Sepal.Width |
Petal.Length |
Petal.Width |
Species |
4.9 |
3.1 |
1.5 |
0.1 |
setosa |
5.4 |
3.7 |
1.5 |
0.2 |
setosa |
4.8 |
3.4 |
1.6 |
0.2 |
setosa |
4.8 |
3.0 |
1.4 |
0.1 |
setosa |
4.3 |
3.0 |
1.1 |
0.1 |
setosa |
5.8 |
4.0 |
1.2 |
0.2 |
setosa |
knitr::kable(dplyr::sample_n(iris, 10)) #随机抽取10个
Sepal.Length |
Sepal.Width |
Petal.Length |
Petal.Width |
Species |
6.7 |
3.3 |
5.7 |
2.5 |
virginica |
5.4 |
3.4 |
1.5 |
0.4 |
setosa |
5.1 |
3.3 |
1.7 |
0.5 |
setosa |
6.0 |
3.4 |
4.5 |
1.6 |
versicolor |
6.7 |
2.5 |
5.8 |
1.8 |
virginica |
5.7 |
2.8 |
4.1 |
1.3 |
versicolor |
5.1 |
3.5 |
1.4 |
0.3 |
setosa |
6.4 |
2.8 |
5.6 |
2.2 |
virginica |
6.3 |
2.9 |
5.6 |
1.8 |
virginica |
5.2 |
3.5 |
1.5 |
0.2 |
setosa |
knitr::kable(dplyr::sample_frac(iris, 0.1)) #百分比抽样
Sepal.Length |
Sepal.Width |
Petal.Length |
Petal.Width |
Species |
7.2 |
3.6 |
6.1 |
2.5 |
virginica |
6.9 |
3.1 |
5.1 |
2.3 |
virginica |
6.2 |
3.4 |
5.4 |
2.3 |
virginica |
7.7 |
2.8 |
6.7 |
2.0 |
virginica |
4.6 |
3.2 |
1.4 |
0.2 |
setosa |
6.3 |
2.5 |
4.9 |
1.5 |
versicolor |
5.7 |
2.6 |
3.5 |
1.0 |
versicolor |
5.7 |
2.8 |
4.1 |
1.3 |
versicolor |
4.7 |
3.2 |
1.6 |
0.2 |
setosa |
6.3 |
2.8 |
5.1 |
1.5 |
virginica |
6.9 |
3.1 |
4.9 |
1.5 |
versicolor |
5.7 |
2.5 |
5.0 |
2.0 |
virginica |
5.1 |
3.3 |
1.7 |
0.5 |
setosa |
7.2 |
3.0 |
5.8 |
1.6 |
virginica |
5.0 |
3.2 |
1.2 |
0.2 |
setosa |
knitr::kable(head(dplyr::arrange(iris, Sepal.Length))) #从小到大排序
Sepal.Length |
Sepal.Width |
Petal.Length |
Petal.Width |
Species |
4.3 |
3.0 |
1.1 |
0.1 |
setosa |
4.4 |
2.9 |
1.4 |
0.2 |
setosa |
4.4 |
3.0 |
1.3 |
0.2 |
setosa |
4.4 |
3.2 |
1.3 |
0.2 |
setosa |
4.5 |
2.3 |
1.3 |
0.3 |
setosa |
4.6 |
3.1 |
1.5 |
0.2 |
setosa |
knitr::kable(head(dplyr::arrange(iris, desc(Sepal.Length))))
Sepal.Length |
Sepal.Width |
Petal.Length |
Petal.Width |
Species |
7.9 |
3.8 |
6.4 |
2.0 |
virginica |
7.7 |
3.8 |
6.7 |
2.2 |
virginica |
7.7 |
2.6 |
6.9 |
2.3 |
virginica |
7.7 |
2.8 |
6.7 |
2.0 |
virginica |
7.7 |
3.0 |
6.1 |
2.3 |
virginica |
7.6 |
3.0 |
6.6 |
2.1 |
virginica |
knitr::kable(summarise(iris, avg = mean(Sepal.Length)))
knitr::kable(summarise(iris, sum = sum(Sepal.Length)))
# %>% 管道符
knitr::kable(head(mtcars, 20) %>% tail())
|
mpg |
cyl |
disp |
hp |
drat |
wt |
qsec |
vs |
am |
gear |
carb |
Model |
15 |
10.4 |
8 |
472.0 |
205 |
2.93 |
5.250 |
17.98 |
0 |
0 |
3 |
4 |
Cadillac Fleetwood |
16 |
10.4 |
8 |
460.0 |
215 |
3.00 |
5.424 |
17.82 |
0 |
0 |
3 |
4 |
Lincoln Continental |
17 |
14.7 |
8 |
440.0 |
230 |
3.23 |
5.345 |
17.42 |
0 |
0 |
3 |
4 |
Chrysler Imperial |
18 |
32.4 |
4 |
78.7 |
66 |
4.08 |
2.200 |
19.47 |
1 |
1 |
4 |
1 |
Fiat 128 |
19 |
30.4 |
4 |
75.7 |
52 |
4.93 |
1.615 |
18.52 |
1 |
1 |
4 |
2 |
Honda Civic |
20 |
33.9 |
4 |
71.1 |
65 |
4.22 |
1.835 |
19.90 |
1 |
1 |
4 |
1 |
Toyota Corolla |
knitr::kable(head(dplyr::group_by(iris, Species)))
Sepal.Length |
Sepal.Width |
Petal.Length |
Petal.Width |
Species |
5.1 |
3.5 |
1.4 |
0.2 |
setosa |
4.9 |
3.0 |
1.4 |
0.2 |
setosa |
4.7 |
3.2 |
1.3 |
0.2 |
setosa |
4.6 |
3.1 |
1.5 |
0.2 |
setosa |
5.0 |
3.6 |
1.4 |
0.2 |
setosa |
5.4 |
3.9 |
1.7 |
0.4 |
setosa |
knitr::kable(iris %>% group_by(Species) %>% summarise(avg = mean(Sepal.Width)) %>%
arrange(avg))
knitr::kable(head(dplyr::mutate(iris, new = Sepal.Length + Petal.Length))) #增加列
Sepal.Length |
Sepal.Width |
Petal.Length |
Petal.Width |
Species |
new |
5.1 |
3.5 |
1.4 |
0.2 |
setosa |
6.5 |
4.9 |
3.0 |
1.4 |
0.2 |
setosa |
6.3 |
4.7 |
3.2 |
1.3 |
0.2 |
setosa |
6.0 |
4.6 |
3.1 |
1.5 |
0.2 |
setosa |
6.1 |
5.0 |
3.6 |
1.4 |
0.2 |
setosa |
6.4 |
5.4 |
3.9 |
1.7 |
0.4 |
setosa |
7.1 |
# Connect
a = data.frame(x1 = c("A", "B", "C"), x2 = c(1, 2, 3))
b = data.frame(x1 = c("A", "B", "D"), x3 = c(T, F, T))
knitr::kable(dplyr::left_join(a, b, by = "x1"))
x1 |
x2 |
x3 |
A |
1 |
TRUE |
B |
2 |
FALSE |
C |
3 |
NA |
knitr::kable(dplyr::full_join(a, b, by = "x1"))
x1 |
x2 |
x3 |
A |
1 |
TRUE |
B |
2 |
FALSE |
C |
3 |
NA |
D |
NA |
TRUE |
knitr::kable(dplyr::semi_join(a, b, by = "x1"))
knitr::kable(dplyr::anti_join(a, b, by = "x1"))
first <- slice(mtcars, 1:5)
mtcars <- mutate(mtcars, Model = rownames(mtcars))
first <- slice(mtcars, 1:5)
second <- slice(mtcars, 4:8)
knitr::kable(intersect(first, second))
mpg |
cyl |
disp |
hp |
drat |
wt |
qsec |
vs |
am |
gear |
carb |
Model |
21.4 |
6 |
258 |
110 |
3.08 |
3.215 |
19.44 |
1 |
0 |
3 |
1 |
4 |
18.7 |
8 |
360 |
175 |
3.15 |
3.440 |
17.02 |
0 |
0 |
3 |
2 |
5 |
knitr::kable(union_all(first, second))
mpg |
cyl |
disp |
hp |
drat |
wt |
qsec |
vs |
am |
gear |
carb |
Model |
21.0 |
6 |
160.0 |
110 |
3.90 |
2.620 |
16.46 |
0 |
1 |
4 |
4 |
1 |
21.0 |
6 |
160.0 |
110 |
3.90 |
2.875 |
17.02 |
0 |
1 |
4 |
4 |
2 |
22.8 |
4 |
108.0 |
93 |
3.85 |
2.320 |
18.61 |
1 |
1 |
4 |
1 |
3 |
21.4 |
6 |
258.0 |
110 |
3.08 |
3.215 |
19.44 |
1 |
0 |
3 |
1 |
4 |
18.7 |
8 |
360.0 |
175 |
3.15 |
3.440 |
17.02 |
0 |
0 |
3 |
2 |
5 |
21.4 |
6 |
258.0 |
110 |
3.08 |
3.215 |
19.44 |
1 |
0 |
3 |
1 |
4 |
18.7 |
8 |
360.0 |
175 |
3.15 |
3.440 |
17.02 |
0 |
0 |
3 |
2 |
5 |
18.1 |
6 |
225.0 |
105 |
2.76 |
3.460 |
20.22 |
1 |
0 |
3 |
1 |
6 |
14.3 |
8 |
360.0 |
245 |
3.21 |
3.570 |
15.84 |
0 |
0 |
3 |
4 |
7 |
24.4 |
4 |
146.7 |
62 |
3.69 |
3.190 |
20.00 |
1 |
0 |
4 |
2 |
8 |
knitr::kable(union(first, second))
mpg |
cyl |
disp |
hp |
drat |
wt |
qsec |
vs |
am |
gear |
carb |
Model |
21.0 |
6 |
160.0 |
110 |
3.90 |
2.620 |
16.46 |
0 |
1 |
4 |
4 |
1 |
21.0 |
6 |
160.0 |
110 |
3.90 |
2.875 |
17.02 |
0 |
1 |
4 |
4 |
2 |
22.8 |
4 |
108.0 |
93 |
3.85 |
2.320 |
18.61 |
1 |
1 |
4 |
1 |
3 |
21.4 |
6 |
258.0 |
110 |
3.08 |
3.215 |
19.44 |
1 |
0 |
3 |
1 |
4 |
18.7 |
8 |
360.0 |
175 |
3.15 |
3.440 |
17.02 |
0 |
0 |
3 |
2 |
5 |
18.1 |
6 |
225.0 |
105 |
2.76 |
3.460 |
20.22 |
1 |
0 |
3 |
1 |
6 |
14.3 |
8 |
360.0 |
245 |
3.21 |
3.570 |
15.84 |
0 |
0 |
3 |
4 |
7 |
24.4 |
4 |
146.7 |
62 |
3.69 |
3.190 |
20.00 |
1 |
0 |
4 |
2 |
8 |
knitr::kable(setdiff(first, second))
mpg |
cyl |
disp |
hp |
drat |
wt |
qsec |
vs |
am |
gear |
carb |
Model |
21.0 |
6 |
160 |
110 |
3.90 |
2.620 |
16.46 |
0 |
1 |
4 |
4 |
1 |
21.0 |
6 |
160 |
110 |
3.90 |
2.875 |
17.02 |
0 |
1 |
4 |
4 |
2 |
22.8 |
4 |
108 |
93 |
3.85 |
2.320 |
18.61 |
1 |
1 |
4 |
1 |
3 |
knitr::kable(setdiff(second, first))
mpg |
cyl |
disp |
hp |
drat |
wt |
qsec |
vs |
am |
gear |
carb |
Model |
18.1 |
6 |
225.0 |
105 |
2.76 |
3.46 |
20.22 |
1 |
0 |
3 |
1 |
6 |
14.3 |
8 |
360.0 |
245 |
3.21 |
3.57 |
15.84 |
0 |
0 |
3 |
4 |
7 |
24.4 |
4 |
146.7 |
62 |
3.69 |
3.19 |
20.00 |
1 |
0 |
4 |
2 |
8 |
网友评论