美文网首页
Reshape程序包的数据处理

Reshape程序包的数据处理

作者: 北欧森林 | 来源:发表于2021-05-06 23:10 被阅读0次
    1. 模拟数据
    set.seed(123)
    id <- rep(1:3,each = 3)
    time <- rep(1:3,3)
    PaO2 <- round(rnorm(9,mean = 70,sd = 10))
    PcvO2 <- round(rnorm(9,mean = 40,sd = 8))
    data <- data.frame(id,time,PaO2,PcvO2)
    
    head(data,10)
    
    #   id time PaO2 PcvO2
    # 1  1    1   64    36
    # 2  1    2   68    50
    # 3  1    3   86    43
    # 4  2    1   71    43
    # 5  2    2   71    41
    # 6  2    3   87    36
    # 7  3    1   75    54
    # 8  3    2   57    44
    # 9  3    3   63    24
    
    1. melt函数 (宽数据转化为长数据)
    library(reshape)
    data.melt <- melt(data,id = (c("id","time")),
                      measure.vars = (c("PaO2","PcvO2")),variable_name = "PO2")
    head(data.melt,20)
    
    #    id time   PO2 value
    # 1   1    1  PaO2    64
    # 2   1    2  PaO2    68
    # 3   1    3  PaO2    86
    # 4   2    1  PaO2    71
    # 5   2    2  PaO2    71
    # 6   2    3  PaO2    87
    # 7   3    1  PaO2    75
    # 8   3    2  PaO2    57
    # 9   3    3  PaO2    63
    # 10  1    1 PcvO2    36
    # 11  1    2 PcvO2    50
    # 12  1    3 PcvO2    43
    # 13  2    1 PcvO2    43
    # 14  2    2 PcvO2    41
    # 15  2    3 PcvO2    36
    # 16  3    1 PcvO2    54
    # 17  3    2 PcvO2    44
    # 18  3    3 PcvO2    24
    
    1. cast函数在melt函数处理的数据基础上,对数据进行各种转换
    cast(data.melt,id~PO2,mean) #按照id计算mean;“PO2”被展开
    #   id     PaO2    PcvO2
    # 1  1 72.66667 43.00000
    # 2  2 76.33333 40.00000
    # 3  3 65.00000 40.66667
    
    cast(data.melt,time~PO2,mean) #按照次数(time)计算mean
    #   time     PaO2    PcvO2
    # 1    1 70.00000 44.33333
    # 2    2 65.33333 45.00000
    # 3    3 78.66667 34.33333
    
    cast(data.melt,id+time~PO2) #将“PO2”展开;返回宽数据形式
    #  id time PaO2 PcvO2
    # 1  1    1   64    36
    # 2  1    2   68    50
    # 3  1    3   86    43
    # 4  2    1   71    43
    # 5  2    2   71    41
    # 6  2    3   87    36
    # 7  3    1   75    54
    # 8  3    2   57    44
    # 9  3    3   63    24
    
    cast(data.melt,id~time+PO2,subset = time <3 & id <3) 
    #   id 1_PaO2 1_PcvO2 2_PaO2 2_PcvO2
    # 1  1     64      36     68      50
    # 2  2     71      43     71      41
    

    利用cast进行复杂的运算

    cast(data.melt,id~time~PO2) # 两个亚组,分别展开
    # , , PO2 = PaO2
    # 
    # time
    # id   1  2  3
    # 1 64 68 86
    # 2 71 71 87
    # 3 75 57 63
    # 
    # , , PO2 = PcvO2
    # 
    # time
    # id   1  2  3
    # 1 36 50 43
    # 2 43 41 36
    # 3 54 44 24
     
    cast(data.melt,id~time|PO2)$PaO2
    # id  1  2  3
    # 1  1 64 68 86
    # 2  2 71 71 87
    # 3  3 75 57 63
    

    利用cast计算边际值

    cast(data.melt,time~PO2,mean,margins = c("grand_row","grand_col"))
    # time     PaO2    PcvO2    (all)
    # 1     1 70.00000 44.33333 57.16667
    # 2     2 65.33333 45.00000 55.16667
    # 3     3 78.66667 34.33333 56.50000
    # 4 (all) 71.33333 41.22222 56.27778
    
    1. 拆分字符向量为多个列
    data.split <- data.frame(lac_1 = 2.3,lac_2 = 3.4,lac_3 = 4.5,wbc_1 = 12,wbc_2 = 11,wbc_3 =6,
                             hb_1 = 60,hb_2 = 77,hb_3 = 89)
    data.split
    #   lac_1 lac_2 lac_3 wbc_1 wbc_2 wbc_3 hb_1 hb_2 hb_3
    # 1   2.3   3.4   4.5    12    11     6   60   77   89
    

    将以上数据框data.split转换为长数据

    variable.name <- colsplit(names(data.split),"_",c("lab","days"))
    data.reshape <- cbind(variable.name,t(data.split))
    row.names(data.reshape) <- NULL
    names(data.reshape)[3] <- "value"
    data.reshape
    
    #  lab days value
    # 1 lac    1   2.3
    # 2 lac    2   3.4
    # 3 lac    3   4.5
    # 4 wbc    1  12.0
    # 5 wbc    2  11.0
    # 6 wbc    3   6.0
    # 7  hb    1  60.0
    # 8  hb    2  77.0
    # 9  hb    3  89.0
    
    1. 自动生成研究队列的基线特征,简化数据处理
    round(funstofun(mean,median,min,max,sd)(data$PaO2),1)
    # mean median    min    max     sd 
    # 71.3   71.0   57.0   87.0   10.1 
    round(funstofun(mean,median,min,max,sd)(data$PcvO2),1)
    # mean median    min    max     sd 
    # 41.2   43.0   24.0   54.0    8.7 
    

    学习视频来源:
    章仲恒教授丁香园课程:Reshape程序包的数据处理

    相关文章

      网友评论

          本文标题:Reshape程序包的数据处理

          本文链接:https://www.haomeiwen.com/subject/ozwddltx.html