有一个整齐的数据集,其中第一列是待处理的变量,其他的全是分组变量;目的是以其他列为分组,对第一列求均值。
1. 遇事不决,for 循环
# 数据准备
require(tidyverse)
data <- subset(diamonds,
select = c(carat, cut, color, clarity)) %>%
sample_n(100)
# 提取colnames做循环的索引
names <- colnames(data )[c(2:4)]
# 去掉双引号
name <- noquote(names)
# 汇总和for 循环
mean <- list()
for (i in name){
mean[[i]] <- tapply(data$carat, data[i], mean)
}
2. apply 家族
# 数据准备
require(tidyverse)
data <- subset(diamonds,
select = c(carat, cut, color, clarity)) %>%
sample_n(100)
# aplly 家族套嵌
mean_f <- function(measure_col = NULL, data = NULL) {
measurevar = data[[measure_col]]
sub_data = data[-measure_col]
mean = apply(sub_data, 2, function(sub_data) {
tapply(measurevar, sub_data, mean)
})
return(mean)
}
3. 结果处理
结果是多层list,需要解析为长列表
# 第一层lapply是对每一个list 进行unlist;
# 第二次lapply,利用 `length<-` 指定每个list的长度
do.call(cbind, lapply(lapply(mean, unlist),
`length<-`, max(lengths(mean))))
4. 美化一下
value <- do.call(cbind, lapply(lapply(mean, unlist),
`length<-`, max(lengths(mean))))
key <- do.call(cbind, lapply(lapply(mean, rownames),
`length<-`, max(lengths(mean))))
cbind(key, value)
网友评论