单细胞转录组测序分析--再说Seurat

作者: HOLLYxiao | 来源:发表于2019-02-26 22:04 被阅读88次

    上期我们讲了使用Seurat分析单个样本,那么2个或多个样本的分析会有哪些不同呢?单细胞转录组数据中一个重要的问题就在于批次效应,任何一个试验步骤都容易带来批次效应,多个样本的比较分析更甚。Seurat专门开发了一种能够比较多个样本的分析方法,我们一起来看看。
    假设有2个样本,分别对2个样本进行数据的预处理及标准化,类似于单个样本的分析

    #样本1
    matrix <- Read10X_h5("filtered_gene_bc_matrices_h5.h5")
    filter_10x_object <-CreateSeuratObject(raw.data = matrix,min.cells = 3)
    filter_10x_object@meta.data$group <- "sample_1"
    mito.genes <- grep(pattern = "^MT-", x = rownames(x = filter_10x_object@data), 
    value = TRUE)
    percent.mito <- Matrix::colSums(filter_10x_object@raw.data[mito.genes, ])/
    Matrix::colSums(filter_10x_object@raw.data)
    filter_10x_object <- AddMetaData(object = filter_10x_object, metadata 
    = percent.mito,col.name = "percent.mito")
    filter_10x_object <- FilterCells(object = filter_10x_object, subset.names =
     c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), 
    high.thresholds = c(2500, 0.05))
    filter_10x_object <- NormalizeData(object = filter_10x_object, 
    normalization.method = "LogNormalize", scale.factor = 10000)
    filter_10x_object<- ScaleData(filter_10x_object, vars.to.regress
     = c("nUMI", "percent.mito"), display.progress = F)
    filter_10x_object <- FindVariableGenes(filter_10x_object, do.plot = F)
    g.1 <- head(rownames(filter_10x_object@hvg.info), 1000)
    
    #样本2
    matrix2 <- Read10X_h5("filtered_gene_bc_matrices_h5.h5")
    filter_10x_object_2<-CreateSeuratObject(raw.data = matrix2,min.cells = 3)
    filter_10x_object_2@meta.data$group <- "sample_2"
    mito.genes <- grep(pattern = "^MT-", x = rownames(x = filter_10x_object_2@data), 
    value = TRUE)
    percent.mito <- Matrix::colSums(filter_10x_object_2@raw.data[mito.genes, ])/
    Matrix::colSums(filter_10x_object_2@raw.data)
    filter_10x_object_2 <- AddMetaData(object = filter_10x_object_2, metadata 
    = percent.mito,col.name = "percent.mito")
    filter_10x_object_2<- FilterCells(object = filter_10x_object_2, subset.names =
     c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), 
    high.thresholds = c(2500, 0.05))
    filter_10x_object_2<- NormalizeData(object = filter_10x_object_2, 
    normalization.method = "LogNormalize", scale.factor = 10000)
    filter_10x_object_2<- ScaleData(filter_10x_object_2, vars.to.regress
     = c("nUMI", "percent.mito"), display.progress = F)
    filter_10x_object_2<- FindVariableGenes(filter_10x_object_2, do.plot = F)
    g.2 <- head(rownames(filter_10x_object_2@hvg.info), 1000)
    
    #选取2个样本高可变基因的交集作为后续分析的基因
    genes.use <- unique(c(g.1, g.2))
    genes.use <- intersect(genes.use, rownames(filter_10x_object@scale.data))
    genes.use <- intersect(genes.use, rownames(filter_10x_object_2@scale.data))
    #执行RunCCA函数,主要目的是鉴定2个数据集共有的变异源,同时也会将2个对象合并成一个对象,
    #并进行数据的标准化和归一化(scale)
    combined <- RunCCA(filter_10x_object, filter_10x_object_2, 
    genes.use = genes.use, num.cc = 30,
    add.cell.id1="sample_1",add.cell.id2="sample_2")
    #如果是2个以上的样本,则需要使用RunMultiCCA函数
    
    #类似于PCA分析的PC选择,执行RunCCA后也需要选择合适的CC个数进行后续分析
    p3 <- MetageneBicorPlot(combined, grouping.var = "group", dims.eval = 1:30, 
    display.progress = FALSE)
    
    8.jpg

    执行完上述步骤后进行align the CCA subspaces

    combined <- AlignSubspace(combined, reduction.type = "cca", 
    grouping.var = "group", dims.align = 1:20)
    #执行完此步骤后会返回一个新的降维数据用于后续聚类分析
    combined <- RunTSNE(combined, reduction.use = "cca.aligned", 
    dims.use = 1:20, do.fast = T)
    combined <- FindClusters(combined, reduction.type = "cca.aligned", 
    resolution = 0.6, dims.use = 1:20)
    #接下去就根据自己的分析需求进行多组数据比较分析了,这里就不介绍了
    
    注:Seurat的多个样本合并分析,与10x官网的cellranger aggr分析相差很大。
    

    相关文章

      网友评论

        本文标题:单细胞转录组测序分析--再说Seurat

        本文链接:https://www.haomeiwen.com/subject/sonzyqtx.html