1.寻找众数
# 我们想返回出现次数最多的字符"a"
mystring <- c("a", "a", "b", "c")
mode_string <- names(which.max(table(a)))
2.按照一组数的大小选择第n位的数据
mydata <- c(1, 2, 3, 4, 5)
mydata_max <- sort(mydata, decreasing=T)[length(mydata)][1] # 最大值
mydata_2nd <- sort(mydata, decreasing=T)[length(mydata)][2] # 第2大值,如果没有重复值
mydata_min <- sort(mydata, decreasing=T)[length(mydata)][length(mydata)] # 最小值
3.按照顺序添加序号
# 假设data.frame为mydata
mydata %>%
group_by(name) %>%
mutate(testing=1:length(name))
# 效果如下所示
name | score | testing |
---|---|---|
ERIC | 100 | 1 |
ERIC | 99 | 2 |
ERIC | 98 | 3 |
SALLY | 100 | 1 |
SALLY | 99 | 2 |
SALLY | 98 | 3 |
4.group_by() 后需要 ungroup();
# dplyr包
group_by() %>%
mutate() %>%
summarise() %>%
ungroup()
5.group_by + filter:直接筛选符合条件的分组
# 假设mydata为data.frame,包含name, score, testing字段,见3
mydata %>%
group_by(name) %>%
# 选择最高分数的信息
filter(score==sort(score, decreasing=T)[1]) %>%
ungroup()
6.建立空数据框
df <- data.frame(Doubles=double(),
Ints=integer(),
Factors=factor(),
Logicals=logical(),
Characters=character(),
stringsAsFactors=FALSE)
str(df)
> str(df)
'data.frame': 0 obs. of 5 variables:
$ Doubles : num
$ Ints : int
$ Factors : Factor w/ 0 levels:
$ Logicals : logi
$ Characters: chr
网友评论