Reshape程序包的数据处理

作者: 北欧森林 | 来源:发表于2021-05-06 23:10 被阅读0次

Reshape程序包的数据处理
R-Purrr的使用，加速数据处理
Pandas 数据处理(一) —— 几个简单函数掌握！
Data Science with R in 4 Weeks -
numpy或pandas中reshape(-1)等用法
Pytorch中的view()和reshape()有何不同？
Leetcode PHP题解--D53 566. Reshape
R语言依据某个字段拉横
tf.reshape
Tensorflow 之RNN

模拟数据

set.seed(123)
id <- rep(1:3,each = 3)
time <- rep(1:3,3)
PaO2 <- round(rnorm(9,mean = 70,sd = 10))
PcvO2 <- round(rnorm(9,mean = 40,sd = 8))
data <- data.frame(id,time,PaO2,PcvO2)

head(data,10)

#   id time PaO2 PcvO2
# 1  1    1   64    36
# 2  1    2   68    50
# 3  1    3   86    43
# 4  2    1   71    43
# 5  2    2   71    41
# 6  2    3   87    36
# 7  3    1   75    54
# 8  3    2   57    44
# 9  3    3   63    24

melt函数 (宽数据转化为长数据)

library(reshape)
data.melt <- melt(data,id = (c("id","time")),
                  measure.vars = (c("PaO2","PcvO2")),variable_name = "PO2")
head(data.melt,20)

#    id time   PO2 value
# 1   1    1  PaO2    64
# 2   1    2  PaO2    68
# 3   1    3  PaO2    86
# 4   2    1  PaO2    71
# 5   2    2  PaO2    71
# 6   2    3  PaO2    87
# 7   3    1  PaO2    75
# 8   3    2  PaO2    57
# 9   3    3  PaO2    63
# 10  1    1 PcvO2    36
# 11  1    2 PcvO2    50
# 12  1    3 PcvO2    43
# 13  2    1 PcvO2    43
# 14  2    2 PcvO2    41
# 15  2    3 PcvO2    36
# 16  3    1 PcvO2    54
# 17  3    2 PcvO2    44
# 18  3    3 PcvO2    24

cast函数在melt函数处理的数据基础上，对数据进行各种转换

cast(data.melt,id~PO2,mean) #按照id计算mean；“PO2”被展开
#   id     PaO2    PcvO2
# 1  1 72.66667 43.00000
# 2  2 76.33333 40.00000
# 3  3 65.00000 40.66667

cast(data.melt,time~PO2,mean) #按照次数(time)计算mean
#   time     PaO2    PcvO2
# 1    1 70.00000 44.33333
# 2    2 65.33333 45.00000
# 3    3 78.66667 34.33333

cast(data.melt,id+time~PO2) #将“PO2”展开；返回宽数据形式
#  id time PaO2 PcvO2
# 1  1    1   64    36
# 2  1    2   68    50
# 3  1    3   86    43
# 4  2    1   71    43
# 5  2    2   71    41
# 6  2    3   87    36
# 7  3    1   75    54
# 8  3    2   57    44
# 9  3    3   63    24

cast(data.melt,id~time+PO2,subset = time <3 & id <3) 
#   id 1_PaO2 1_PcvO2 2_PaO2 2_PcvO2
# 1  1     64      36     68      50
# 2  2     71      43     71      41

利用cast进行复杂的运算

cast(data.melt,id~time~PO2) # 两个亚组，分别展开
# , , PO2 = PaO2
# 
# time
# id   1  2  3
# 1 64 68 86
# 2 71 71 87
# 3 75 57 63
# 
# , , PO2 = PcvO2
# 
# time
# id   1  2  3
# 1 36 50 43
# 2 43 41 36
# 3 54 44 24
 
cast(data.melt,id~time|PO2)$PaO2
# id  1  2  3
# 1  1 64 68 86
# 2  2 71 71 87
# 3  3 75 57 63

利用cast计算边际值

cast(data.melt,time~PO2,mean,margins = c("grand_row","grand_col"))
# time     PaO2    PcvO2    (all)
# 1     1 70.00000 44.33333 57.16667
# 2     2 65.33333 45.00000 55.16667
# 3     3 78.66667 34.33333 56.50000
# 4 (all) 71.33333 41.22222 56.27778

拆分字符向量为多个列

data.split <- data.frame(lac_1 = 2.3,lac_2 = 3.4,lac_3 = 4.5,wbc_1 = 12,wbc_2 = 11,wbc_3 =6,
                         hb_1 = 60,hb_2 = 77,hb_3 = 89)
data.split
#   lac_1 lac_2 lac_3 wbc_1 wbc_2 wbc_3 hb_1 hb_2 hb_3
# 1   2.3   3.4   4.5    12    11     6   60   77   89

将以上数据框data.split转换为长数据

variable.name <- colsplit(names(data.split),"_",c("lab","days"))
data.reshape <- cbind(variable.name,t(data.split))
row.names(data.reshape) <- NULL
names(data.reshape)[3] <- "value"
data.reshape

#  lab days value
# 1 lac    1   2.3
# 2 lac    2   3.4
# 3 lac    3   4.5
# 4 wbc    1  12.0
# 5 wbc    2  11.0
# 6 wbc    3   6.0
# 7  hb    1  60.0
# 8  hb    2  77.0
# 9  hb    3  89.0

自动生成研究队列的基线特征，简化数据处理

round(funstofun(mean,median,min,max,sd)(data$PaO2),1)
# mean median    min    max     sd 
# 71.3   71.0   57.0   87.0   10.1 
round(funstofun(mean,median,min,max,sd)(data$PcvO2),1)
# mean median    min    max     sd 
# 41.2   43.0   24.0   54.0    8.7

学习视频来源：
章仲恒教授丁香园课程：Reshape程序包的数据处理