- 模拟数据
set.seed(123)
id <- rep(1:3,each = 3)
time <- rep(1:3,3)
PaO2 <- round(rnorm(9,mean = 70,sd = 10))
PcvO2 <- round(rnorm(9,mean = 40,sd = 8))
data <- data.frame(id,time,PaO2,PcvO2)
head(data,10)
# id time PaO2 PcvO2
# 1 1 1 64 36
# 2 1 2 68 50
# 3 1 3 86 43
# 4 2 1 71 43
# 5 2 2 71 41
# 6 2 3 87 36
# 7 3 1 75 54
# 8 3 2 57 44
# 9 3 3 63 24
-
melt
函数 (宽数据转化为长数据)
library(reshape)
data.melt <- melt(data,id = (c("id","time")),
measure.vars = (c("PaO2","PcvO2")),variable_name = "PO2")
head(data.melt,20)
# id time PO2 value
# 1 1 1 PaO2 64
# 2 1 2 PaO2 68
# 3 1 3 PaO2 86
# 4 2 1 PaO2 71
# 5 2 2 PaO2 71
# 6 2 3 PaO2 87
# 7 3 1 PaO2 75
# 8 3 2 PaO2 57
# 9 3 3 PaO2 63
# 10 1 1 PcvO2 36
# 11 1 2 PcvO2 50
# 12 1 3 PcvO2 43
# 13 2 1 PcvO2 43
# 14 2 2 PcvO2 41
# 15 2 3 PcvO2 36
# 16 3 1 PcvO2 54
# 17 3 2 PcvO2 44
# 18 3 3 PcvO2 24
-
cast
函数在melt函数处理的数据基础上,对数据进行各种转换
cast(data.melt,id~PO2,mean) #按照id计算mean;“PO2”被展开
# id PaO2 PcvO2
# 1 1 72.66667 43.00000
# 2 2 76.33333 40.00000
# 3 3 65.00000 40.66667
cast(data.melt,time~PO2,mean) #按照次数(time)计算mean
# time PaO2 PcvO2
# 1 1 70.00000 44.33333
# 2 2 65.33333 45.00000
# 3 3 78.66667 34.33333
cast(data.melt,id+time~PO2) #将“PO2”展开;返回宽数据形式
# id time PaO2 PcvO2
# 1 1 1 64 36
# 2 1 2 68 50
# 3 1 3 86 43
# 4 2 1 71 43
# 5 2 2 71 41
# 6 2 3 87 36
# 7 3 1 75 54
# 8 3 2 57 44
# 9 3 3 63 24
cast(data.melt,id~time+PO2,subset = time <3 & id <3)
# id 1_PaO2 1_PcvO2 2_PaO2 2_PcvO2
# 1 1 64 36 68 50
# 2 2 71 43 71 41
利用cast
进行复杂的运算
cast(data.melt,id~time~PO2) # 两个亚组,分别展开
# , , PO2 = PaO2
#
# time
# id 1 2 3
# 1 64 68 86
# 2 71 71 87
# 3 75 57 63
#
# , , PO2 = PcvO2
#
# time
# id 1 2 3
# 1 36 50 43
# 2 43 41 36
# 3 54 44 24
cast(data.melt,id~time|PO2)$PaO2
# id 1 2 3
# 1 1 64 68 86
# 2 2 71 71 87
# 3 3 75 57 63
利用cast
计算边际值
cast(data.melt,time~PO2,mean,margins = c("grand_row","grand_col"))
# time PaO2 PcvO2 (all)
# 1 1 70.00000 44.33333 57.16667
# 2 2 65.33333 45.00000 55.16667
# 3 3 78.66667 34.33333 56.50000
# 4 (all) 71.33333 41.22222 56.27778
- 拆分字符向量为多个列
data.split <- data.frame(lac_1 = 2.3,lac_2 = 3.4,lac_3 = 4.5,wbc_1 = 12,wbc_2 = 11,wbc_3 =6,
hb_1 = 60,hb_2 = 77,hb_3 = 89)
data.split
# lac_1 lac_2 lac_3 wbc_1 wbc_2 wbc_3 hb_1 hb_2 hb_3
# 1 2.3 3.4 4.5 12 11 6 60 77 89
将以上数据框data.split转换为长数据
variable.name <- colsplit(names(data.split),"_",c("lab","days"))
data.reshape <- cbind(variable.name,t(data.split))
row.names(data.reshape) <- NULL
names(data.reshape)[3] <- "value"
data.reshape
# lab days value
# 1 lac 1 2.3
# 2 lac 2 3.4
# 3 lac 3 4.5
# 4 wbc 1 12.0
# 5 wbc 2 11.0
# 6 wbc 3 6.0
# 7 hb 1 60.0
# 8 hb 2 77.0
# 9 hb 3 89.0
- 自动生成研究队列的基线特征,简化数据处理
round(funstofun(mean,median,min,max,sd)(data$PaO2),1)
# mean median min max sd
# 71.3 71.0 57.0 87.0 10.1
round(funstofun(mean,median,min,max,sd)(data$PcvO2),1)
# mean median min max sd
# 41.2 43.0 24.0 54.0 8.7
学习视频来源:
章仲恒教授丁香园课程:Reshape程序包的数据处理
网友评论