dplyr包的运用
rm(list = ls())
options(stringsAsFactors = F)
test <- iris[c(1:2,51:52,101:102),]
library(dplyr)
#mutate(),新增列
colnames(test)
mutate(test, new.space=Sepal.Length * Sepal.Width )
#select(),按列筛选
select(test,c(2:4))
#filter()筛选行
filter(test, Species=='virginica' )
filter(test, test$Sepal.Length >=5 & test$Sepal.Length<6 )
filter(test, Species %in% c('versicolor','virginica') )
#arrange(),按某1列或某几列对整个表格进行排序
arrange(test,Sepal.Length)
arrange(test,desc(Sepal.Length))
#summarise():汇总
summarise(test,mean(Sepal.Width),sd(Sepal.Width))
# 先按照Species分组,计算每组Sepal.Length的平均值和标准差
group_by(test, Species)
summarise(group_by(test, Species),mean(Sepal.Length),sd(Sepal.Length))
#管道操作 %>% (cmd/ctr + shift + M)
library(tidyverse)
test %>%
group_by(Species) %>%
summarise(mean(Sepal.Width))
test1 <- data.frame(x = c('b','e','f','x'),
z = c("A","B","C",'D'),
stringsAsFactors = F)
test2 <- data.frame(x = c('a','b','c','d','e','f'),
y = c(1,2,3,4,5,6),
stringsAsFactors = F)
inner_join(test1, test2, by = "x")
left_join(test1, test2, by = 'x')
left_join(test2, test1, by = 'x')
right_join(test1, test2, by = 'x')
full_join( test1, test2, by = 'x')
网友评论