题: 给定一个数据框,根据某个分组对另一个列进行求值。
举例: 按照年份和月份进行分组,对温度进行求均值,其中温度中存在NA
数据库实现这种操作有很多方法,按照代码长度,列出我想出的解题思路
1. for循环
df$Year_Month <- factor(paste(df$Year, df$Month, sep = "-" ))
temp_date <- c()
temp_mean <- c()
for (i in unique(df$Year_Month)){
tmp_df <- df[df$Year_Month == i,]
tmp_mean <- mean(tmp_df$Temperature, na.rm = T)
temp_date <- c(temp_date,i)
temp_mean <- c(temp_mean, tmp_mean)
}
res_tmp <- data.frame(date=temp_date, temp_mean=temp_mean)
2. split-lapply-do.call
df$Year_Month <- factor(paste(df$Year, df$Month, sep = "-" ))
df_list <- split(df, f=df$Year_Month)
temp_mean_list <- lapply(df_list, function(x) mean(x$Temperature, na.rm = T) )
do.call(rbind, temp_mean_list)
3. split-sapply
df$Year_Month <- factor(paste(df$Year, df$Month, sep = "-" ))
df_list <- split(df, f=df$Year_Month)
temp_mean_list <- sapply(df_list, function(x) mean(x$Temperature, na.rm = T) )
4. dplyr
library(dplyr)
df %>%
group_by(Year, Month) %>%
summarise(temp_mean = mean(Temperature, na.rm = T)) %>%
head()
5. SQL
library(sqldf)
sqldf("select avg(Temperature) from df group by Year,Month")
6. aggregate
out <- aggregate(df$Temperature, by=list(df$Year, df$Month), FUN=mean, na.rm=TRUE)
网友评论