使用ImpluseDE2处理转录组时间序列数据

作者: FengSL | 来源:发表于2020-09-17 23:56 被阅读0次

使用ImpluseDE2处理转录组时间序列数据
scanpy的空间转录组学数据的分析和可视化
转录组时间序列数据处理
转录组时间序列数据处理
maSigPro 处理时间序列转录组数据
时间序列单细胞转录组数据分析
第07周-时间序列单细胞转录组数据分析
STEM软件分析基因表达模式
转录组入门学习（四）
转录组数据库的基本使用（一）-GO数据库

ImpulseDE2 是一个处理时序数据的R包，用于查找时序数据的差异基因，它可以用来解决两类问题。

Case-only differential expression analysis tests, whether the expression level of a gene changes over time.
Case-control differential expression analysis tests, whether the expression trajectory of a gene over time differs between samples from a case and samples from a control condition.

1. Case-only

做差异分析的时候有4个主要的参数：

matCountData ：输入数据，为基因表达的counts数
dfAnnotation ：构建的样品信息
boolCaseCtrl ：布尔值，是否执行Case-control分析，默认是FALSE
vecConfounders：向量，为需要修正的批次效应信息

生成模拟数据

library(ImpulseDE2)
lsSimulatedData <- simulateDataSetImpulseDE2(
  vecTimePointsA   = rep(seq(1,8),3),
  vecTimePointsB   = NULL,
  vecBatchesA      = NULL,
  vecBatchesB      = NULL,
  scaNConst        = 30,
  scaNImp          = 10,
  scaNLin          = 10,
  scaNSig          = 10,
  scaMuBatchEffect = NULL,
  scaSDBatchEffect = NULL,
  dirOutSimulation = NULL)

其中

lsSimulatedData$matObservedCounts  是Counts数据矩阵，可以用自己的数据代替
lsSimulatedData$dfAnnotation               是 数据的注释信息，可以自行构建
格式如下:
##            Sample Condition Time  Batch
## A_1_Rep1 A_1_Rep1      case    1 B_NULL
## A_2_Rep1 A_2_Rep1      case    2 B_NULL
## A_3_Rep1 A_3_Rep1      case    3 B_NULL
## A_4_Rep1 A_4_Rep1      case    4 B_NULL
## A_5_Rep1 A_5_Rep1      case    5 B_NULL
## A_6_Rep1 A_6_Rep1      case    6 B_NULL
## A_7_Rep1 A_7_Rep1      case    7 B_NULL
## A_8_Rep1 A_8_Rep1      case    8 B_NULL
## A_1_Rep2 A_1_Rep2      case    1 B_NULL
## A_2_Rep2 A_2_Rep2      case    2 B_NULL
## A_3_Rep2 A_3_Rep2      case    3 B_NULL
## A_4_Rep2 A_4_Rep2      case    4 B_NULL
## A_5_Rep2 A_5_Rep2      case    5 B_NULL
## A_6_Rep2 A_6_Rep2      case    6 B_NULL
## A_7_Rep2 A_7_Rep2      case    7 B_NULL
## A_8_Rep2 A_8_Rep2      case    8 B_NULL
## A_1_Rep3 A_1_Rep3      case    1 B_NULL
## A_2_Rep3 A_2_Rep3      case    2 B_NULL
## A_3_Rep3 A_3_Rep3      case    3 B_NULL
## A_4_Rep3 A_4_Rep3      case    4 B_NULL
## A_5_Rep3 A_5_Rep3      case    5 B_NULL
## A_6_Rep3 A_6_Rep3      case    6 B_NULL
## A_7_Rep3 A_7_Rep3      case    7 B_NULL
## A_8_Rep3 A_8_Rep3      case    8 B_NULL

构建方式如下:

data = lsSimulatedData$matObservedCounts  这里请替换成自己的数据
Sample = colnames(data)
Condition = rep("case",24)
Time = rep(seq(1,8),3)
Batch = rep("B_NULL",24)
dfAnnotation=data.frame(Sample,Condition,Time,Batch,row.names = Sample)

运行：

data = as.matrix(data)
objectImpulseDE2 <- runImpulseDE2(
  matCountData    = data, 
  dfAnnotation    = dfAnnotation,
  boolCaseCtrl    = FALSE,
  vecConfounders  = NULL,
  scaNProc        = 1 )
查看结果
head(objectImpulseDE2$dfImpulseDE2Results)

2.修正批次效应

假设三个实验重复分别来自三个批次，构建注释信息如下：

Sample = colnames(da)
Condition = rep("case",24)
Time = rep(seq(1,8),3)
B1 = rep("B1",8)
B2 = rep("B2",8)
B3 = rep("B3",8)
Batch = c(B1,B2,B3)
dfAnnotation=data.frame(Sample,Condition,Time,Batch,row.names = Sample)

objectImpulseDE2 <- runImpulseDE2(
  matCountData    = data, 
  dfAnnotation    = dfAnnotation,
  boolCaseCtrl    = FALSE,
  vecConfounders  = c("Batch"),   ## 这里选择批次信息
  scaNProc        = 1 )

3. Case-control

用上面类似的方法构建如下注释信息：

##            Sample Condition Time Batch
## A_1_Rep1 A_1_Rep1      case    1    B1
## A_2_Rep1 A_2_Rep1      case    2    B1
## A_3_Rep1 A_3_Rep1      case    3    B1
## A_4_Rep1 A_4_Rep1      case    4    B1
## A_5_Rep1 A_5_Rep1      case    5    B1
## A_6_Rep1 A_6_Rep1      case    6    B1
## A_7_Rep1 A_7_Rep1      case    7    B1
## A_8_Rep1 A_8_Rep1      case    8    B1
## A_1_Rep2 A_1_Rep2      case    1    B2
## A_2_Rep2 A_2_Rep2      case    2    B2
## A_3_Rep2 A_3_Rep2      case    3    B2
## A_4_Rep2 A_4_Rep2      case    4    B2
## A_5_Rep2 A_5_Rep2      case    5    B2
## A_6_Rep2 A_6_Rep2      case    6    B2
## A_7_Rep2 A_7_Rep2      case    7    B2
## A_8_Rep2 A_8_Rep2      case    8    B2
## A_1_Rep3 A_1_Rep3      case    1    B3
## A_2_Rep3 A_2_Rep3      case    2    B3
## A_3_Rep3 A_3_Rep3      case    3    B3
## A_4_Rep3 A_4_Rep3      case    4    B3
## A_5_Rep3 A_5_Rep3      case    5    B3
## A_6_Rep3 A_6_Rep3      case    6    B3
## A_7_Rep3 A_7_Rep3      case    7    B3
## A_8_Rep3 A_8_Rep3      case    8    B3
## B_1_Rep1 B_1_Rep1   control    1    C1
## B_2_Rep1 B_2_Rep1   control    2    C1
## B_3_Rep1 B_3_Rep1   control    3    C1
## B_4_Rep1 B_4_Rep1   control    4    C1
## B_5_Rep1 B_5_Rep1   control    5    C1
## B_6_Rep1 B_6_Rep1   control    6    C1
## B_7_Rep1 B_7_Rep1   control    7    C1
## B_8_Rep1 B_8_Rep1   control    8    C1
## B_1_Rep2 B_1_Rep2   control    1    C2
## B_2_Rep2 B_2_Rep2   control    2    C2
## B_3_Rep2 B_3_Rep2   control    3    C2
## B_4_Rep2 B_4_Rep2   control    4    C2
## B_5_Rep2 B_5_Rep2   control    5    C2
## B_6_Rep2 B_6_Rep2   control    6    C2
## B_7_Rep2 B_7_Rep2   control    7    C2
## B_8_Rep2 B_8_Rep2   control    8    C2
## B_1_Rep3 B_1_Rep3   control    1    C3
## B_2_Rep3 B_2_Rep3   control    2    C3
## B_3_Rep3 B_3_Rep3   control    3    C3
## B_4_Rep3 B_4_Rep3   control    4    C3
## B_5_Rep3 B_5_Rep3   control    5    C3
## B_6_Rep3 B_6_Rep3   control    6    C3
## B_7_Rep3 B_7_Rep3   control    7    C3
## B_8_Rep3 B_8_Rep3   control    8    C3

运行代码进行计算

objectImpulseDE2 <- runImpulseDE2(
  matCountData    = data, 
  dfAnnotation    = dfAnnotation,
  boolCaseCtrl    = TRUE,
  vecConfounders  = c("Batch"),
  scaNProc        = 1 )

4. 绘制单基因轨迹图

library(ggplot2)
lsgplotsGenes <- plotGenes(
  vecGeneIDs       = head(result$Gene), ###给定需要绘制的基因列表
  scaNTopIDs       = NULL,  ##绘制最显著的10 个基因
  objectImpulseDE2 = objectImpulseDE2,
  boolCaseCtrl     = T,
  dirOut           = NULL,
  strFileName      = NULL,
  vecRefPval       = NULL, 
  strNameRefMethod = NULL)

print(lsgplotsGenes[[2]])

5. 绘制表达热图


library(ComplexHeatmap)
lsHeatmaps <- plotHeatmap(
  objectImpulseDE2       = objectImpulseDE2,
  strCondition           = "case",
  boolIdentifyTransients = F,
  scaQThres              = 0.01)
draw(lsHeatmaps$complexHeatmapRaw)