美文网首页常用工具
使用ImpluseDE2处理转录组时间序列数据

使用ImpluseDE2处理转录组时间序列数据

作者: FengSL | 来源:发表于2020-09-17 23:56 被阅读0次

    ImpulseDE2 是一个处理时序数据的R包,用于查找时序数据的差异基因,它可以用来解决两类问题。

    Case-only differential expression analysis tests, whether the expression level of a gene changes over time.
    Case-control differential expression analysis tests, whether the expression trajectory of a gene over time differs between samples from a case and samples from a control condition.

    1. Case-only

    做差异分析的时候有4个主要的参数:

    matCountData : 输入数据,为基因表达的counts数
    dfAnnotation :构建的样品信息
    boolCaseCtrl : 布尔值,是否执行Case-control分析,默认是FALSE
    vecConfounders:向量,为需要修正的批次效应信息

    生成模拟数据

    library(ImpulseDE2)
    lsSimulatedData <- simulateDataSetImpulseDE2(
      vecTimePointsA   = rep(seq(1,8),3),
      vecTimePointsB   = NULL,
      vecBatchesA      = NULL,
      vecBatchesB      = NULL,
      scaNConst        = 30,
      scaNImp          = 10,
      scaNLin          = 10,
      scaNSig          = 10,
      scaMuBatchEffect = NULL,
      scaSDBatchEffect = NULL,
      dirOutSimulation = NULL)
    

    其中

    lsSimulatedData$matObservedCounts  是Counts数据矩阵,可以用自己的数据代替
    lsSimulatedData$dfAnnotation               是 数据的注释信息,可以自行构建
    格式如下:
    ##            Sample Condition Time  Batch
    ## A_1_Rep1 A_1_Rep1      case    1 B_NULL
    ## A_2_Rep1 A_2_Rep1      case    2 B_NULL
    ## A_3_Rep1 A_3_Rep1      case    3 B_NULL
    ## A_4_Rep1 A_4_Rep1      case    4 B_NULL
    ## A_5_Rep1 A_5_Rep1      case    5 B_NULL
    ## A_6_Rep1 A_6_Rep1      case    6 B_NULL
    ## A_7_Rep1 A_7_Rep1      case    7 B_NULL
    ## A_8_Rep1 A_8_Rep1      case    8 B_NULL
    ## A_1_Rep2 A_1_Rep2      case    1 B_NULL
    ## A_2_Rep2 A_2_Rep2      case    2 B_NULL
    ## A_3_Rep2 A_3_Rep2      case    3 B_NULL
    ## A_4_Rep2 A_4_Rep2      case    4 B_NULL
    ## A_5_Rep2 A_5_Rep2      case    5 B_NULL
    ## A_6_Rep2 A_6_Rep2      case    6 B_NULL
    ## A_7_Rep2 A_7_Rep2      case    7 B_NULL
    ## A_8_Rep2 A_8_Rep2      case    8 B_NULL
    ## A_1_Rep3 A_1_Rep3      case    1 B_NULL
    ## A_2_Rep3 A_2_Rep3      case    2 B_NULL
    ## A_3_Rep3 A_3_Rep3      case    3 B_NULL
    ## A_4_Rep3 A_4_Rep3      case    4 B_NULL
    ## A_5_Rep3 A_5_Rep3      case    5 B_NULL
    ## A_6_Rep3 A_6_Rep3      case    6 B_NULL
    ## A_7_Rep3 A_7_Rep3      case    7 B_NULL
    ## A_8_Rep3 A_8_Rep3      case    8 B_NULL
    

    构建方式如下:

    data = lsSimulatedData$matObservedCounts  这里请替换成自己的数据
    Sample = colnames(data)
    Condition = rep("case",24)
    Time = rep(seq(1,8),3)
    Batch = rep("B_NULL",24)
    dfAnnotation=data.frame(Sample,Condition,Time,Batch,row.names = Sample)
    

    运行:

    data = as.matrix(data)
    objectImpulseDE2 <- runImpulseDE2(
      matCountData    = data, 
      dfAnnotation    = dfAnnotation,
      boolCaseCtrl    = FALSE,
      vecConfounders  = NULL,
      scaNProc        = 1 )
    查看结果
    head(objectImpulseDE2$dfImpulseDE2Results)
    

    2.修正批次效应

    假设三个实验重复分别来自三个批次,构建注释信息如下:

    Sample = colnames(da)
    Condition = rep("case",24)
    Time = rep(seq(1,8),3)
    B1 = rep("B1",8)
    B2 = rep("B2",8)
    B3 = rep("B3",8)
    Batch = c(B1,B2,B3)
    dfAnnotation=data.frame(Sample,Condition,Time,Batch,row.names = Sample)
    
    objectImpulseDE2 <- runImpulseDE2(
      matCountData    = data, 
      dfAnnotation    = dfAnnotation,
      boolCaseCtrl    = FALSE,
      vecConfounders  = c("Batch"),   ## 这里选择批次信息
      scaNProc        = 1 )
    

    3. Case-control

    用上面类似的方法构建如下注释信息:

    ##            Sample Condition Time Batch
    ## A_1_Rep1 A_1_Rep1      case    1    B1
    ## A_2_Rep1 A_2_Rep1      case    2    B1
    ## A_3_Rep1 A_3_Rep1      case    3    B1
    ## A_4_Rep1 A_4_Rep1      case    4    B1
    ## A_5_Rep1 A_5_Rep1      case    5    B1
    ## A_6_Rep1 A_6_Rep1      case    6    B1
    ## A_7_Rep1 A_7_Rep1      case    7    B1
    ## A_8_Rep1 A_8_Rep1      case    8    B1
    ## A_1_Rep2 A_1_Rep2      case    1    B2
    ## A_2_Rep2 A_2_Rep2      case    2    B2
    ## A_3_Rep2 A_3_Rep2      case    3    B2
    ## A_4_Rep2 A_4_Rep2      case    4    B2
    ## A_5_Rep2 A_5_Rep2      case    5    B2
    ## A_6_Rep2 A_6_Rep2      case    6    B2
    ## A_7_Rep2 A_7_Rep2      case    7    B2
    ## A_8_Rep2 A_8_Rep2      case    8    B2
    ## A_1_Rep3 A_1_Rep3      case    1    B3
    ## A_2_Rep3 A_2_Rep3      case    2    B3
    ## A_3_Rep3 A_3_Rep3      case    3    B3
    ## A_4_Rep3 A_4_Rep3      case    4    B3
    ## A_5_Rep3 A_5_Rep3      case    5    B3
    ## A_6_Rep3 A_6_Rep3      case    6    B3
    ## A_7_Rep3 A_7_Rep3      case    7    B3
    ## A_8_Rep3 A_8_Rep3      case    8    B3
    ## B_1_Rep1 B_1_Rep1   control    1    C1
    ## B_2_Rep1 B_2_Rep1   control    2    C1
    ## B_3_Rep1 B_3_Rep1   control    3    C1
    ## B_4_Rep1 B_4_Rep1   control    4    C1
    ## B_5_Rep1 B_5_Rep1   control    5    C1
    ## B_6_Rep1 B_6_Rep1   control    6    C1
    ## B_7_Rep1 B_7_Rep1   control    7    C1
    ## B_8_Rep1 B_8_Rep1   control    8    C1
    ## B_1_Rep2 B_1_Rep2   control    1    C2
    ## B_2_Rep2 B_2_Rep2   control    2    C2
    ## B_3_Rep2 B_3_Rep2   control    3    C2
    ## B_4_Rep2 B_4_Rep2   control    4    C2
    ## B_5_Rep2 B_5_Rep2   control    5    C2
    ## B_6_Rep2 B_6_Rep2   control    6    C2
    ## B_7_Rep2 B_7_Rep2   control    7    C2
    ## B_8_Rep2 B_8_Rep2   control    8    C2
    ## B_1_Rep3 B_1_Rep3   control    1    C3
    ## B_2_Rep3 B_2_Rep3   control    2    C3
    ## B_3_Rep3 B_3_Rep3   control    3    C3
    ## B_4_Rep3 B_4_Rep3   control    4    C3
    ## B_5_Rep3 B_5_Rep3   control    5    C3
    ## B_6_Rep3 B_6_Rep3   control    6    C3
    ## B_7_Rep3 B_7_Rep3   control    7    C3
    ## B_8_Rep3 B_8_Rep3   control    8    C3
    

    运行代码进行计算

    objectImpulseDE2 <- runImpulseDE2(
      matCountData    = data, 
      dfAnnotation    = dfAnnotation,
      boolCaseCtrl    = TRUE,
      vecConfounders  = c("Batch"),
      scaNProc        = 1 )
    
    

    4. 绘制单基因轨迹图

    library(ggplot2)
    lsgplotsGenes <- plotGenes(
      vecGeneIDs       = head(result$Gene), ###给定需要绘制的基因列表
      scaNTopIDs       = NULL,  ##绘制最显著的10 个基因
      objectImpulseDE2 = objectImpulseDE2,
      boolCaseCtrl     = T,
      dirOut           = NULL,
      strFileName      = NULL,
      vecRefPval       = NULL, 
      strNameRefMethod = NULL)
    
    print(lsgplotsGenes[[2]])
    

    5. 绘制表达热图

    
    library(ComplexHeatmap)
    lsHeatmaps <- plotHeatmap(
      objectImpulseDE2       = objectImpulseDE2,
      strCondition           = "case",
      boolIdentifyTransients = F,
      scaQThres              = 0.01)
    draw(lsHeatmaps$complexHeatmapRaw)
    

    相关文章

      网友评论

        本文标题:使用ImpluseDE2处理转录组时间序列数据

        本文链接:https://www.haomeiwen.com/subject/ujqdyktx.html