美文网首页
【生信技能树】2019-12-22作业:自定义函数实现melt和

【生信技能树】2019-12-22作业:自定义函数实现melt和

作者: 猫叽先森 | 来源:发表于2019-12-23 00:47 被阅读0次
    rm(list=ls())
    options(stringsAsFactors = F)
    ##载入测试数据
    load('example.Rdata')
    a
    #          GSM3781295 GSM3781296 GSM3781297 GSM3781298
    #NM_214711   1.972502   1.939664   2.047662   2.208423
    #NM_214710   2.688776   3.037042   5.280936   3.070719
    #NM_214462   6.311992   1.704874   7.960758   1.901060
    #NM_214461   6.102595   3.608155   6.625782   3.861742
    library(reshape2)
    aa <- a
    aa$probe_id <- rownames(aa)
    b <- melt(data = aa,
              id.vars = c("probe_id"),
              variable.name = "samples",
              value.name = "exprs")
    b$samples <- as.character(b$samples)
    b
    #    probe_id    samples    exprs
    #1  NM_214711 GSM3781295 1.972502
    #2  NM_214710 GSM3781295 2.688776
    #3  NM_214462 GSM3781295 6.311992
    #4  NM_214461 GSM3781295 6.102595
    #5  NM_214711 GSM3781296 1.939664
    #6  NM_214710 GSM3781296 3.037042
    #7  NM_214462 GSM3781296 1.704874
    #8  NM_214461 GSM3781296 3.608155
    #9  NM_214711 GSM3781297 2.047662
    #10 NM_214710 GSM3781297 5.280936
    #11 NM_214462 GSM3781297 7.960758
    #12 NM_214461 GSM3781297 6.625782
    #13 NM_214711 GSM3781298 2.208423
    #14 NM_214710 GSM3781298 3.070719
    #15 NM_214462 GSM3781298 1.901060
    #16 NM_214461 GSM3781298 3.861742
    save(b,file = 'b.Rdata')
    
    melt_by_cat <- function(df) {
      df_new <- data.frame()
      for (i in 1:ncol(df)) {
        tmp <- as.data.frame(df[,i])
        colnames(tmp) <- c("exprs")
        tmp$probe_id <- rownames(df)
        tmp$samples <- colnames(df)[i]
        df_new <- rbind(df_new,tmp)
      }
      index <- c('probe_id','samples','exprs')
      df_new <- df_new[index]
      return(df_new)
    }
    re1 <- melt_by_cat(a)
    ##判断re1和b是否一致
    identical(b,re1)
    #[1] TRUE
    
    d <- dcast(data = b, probe_id ~samples)
    d
    #   probe_id GSM3781295 GSM3781296 GSM3781297 GSM3781298
    #1 NM_214461   6.102595   3.608155   6.625782   3.861742
    #2 NM_214462   6.311992   1.704874   7.960758   1.901060
    #3 NM_214710   2.688776   3.037042   5.280936   3.070719
    #4 NM_214711   1.972502   1.939664   2.047662   2.208423
    
    dcast_by_cat <- function(df0) {
      sp_index <- unique(df0$samples)
      create_df <- function(x,index,i) {
        df <- x[x$samples == index[i],]
        df <- df[,-2]
        colnames(df)[2] <- index[i]
        return(df)
      }
      dcast_df <- create_df(df0,sp_index,1)
      for (i in 2:length(sp_index)) {
        tmp <- create_df(df0,sp_index,i)
        dcast_df <- merge(dcast_df,tmp)
      }
      return(dcast_df)
    }
    re2 <- dcast_by_cat(b)
    ##判断re2和d结果是否一致
    identical(d,re2)
    #[1] TRUE
    

    收获:

    1. 学习了melt()dcast()最基本的用法
      1)melt()得到的数据框,总行数=原数据框行数*列数,
      每行3个数据,分别对应原数据框每个单元格的行名,列名,数值
      b <- melt(data = aa,id.vars = c("probe_id"),variable.name = "samples",value.name = "exprs")

      2)dcast()得到的数据框,
      d <- dcast(data = b,probe_id ~samples)
      以~号前的列做行名,~号后为列名,重排数据

    存在问题:

    1. 还是未能掌握apply函数的使用方法,尝试使用apply()改写melt(),得到的结果是一个List
    melt_by_cat_2 <- function(df) {
      df_new <- data.frame()
      id <- 0
      df_new <- apply(df,2,function(x){
        tmp <- as.data.frame(x)
        colnames(tmp) <- c("exprs")
        tmp$probe_id <- rownames(df)
        id <- id + 1
        tmp$samples <- colnames(df)[id]
        tmp <- as.data.frame(tmp)
        df_new <- as.data.frame(rbind(df_new,tmp))
      })
      return(df_new)
    }
    re <- melt_by_cat_2(a)
    

    相关文章

      网友评论

          本文标题:【生信技能树】2019-12-22作业:自定义函数实现melt和

          本文链接:https://www.haomeiwen.com/subject/acianctx.html