美文网首页单细胞转录组单细胞
利用SCTransform和RunHarmony做普通的单细胞批

利用SCTransform和RunHarmony做普通的单细胞批

作者: 碌碌无为的杰少 | 来源:发表于2021-01-11 23:46 被阅读0次
    这篇文章从GSE132465下载,https://www.jianshu.com/p/44d30d3194e4这篇文章详细的解释了SCTransform是干什么的,所以我用这个方法试试如何去除批次效应

    常规方法跑一遍

    本文数据经过过滤过的,所以不需要过滤

    library(Seurat)
    library(ggplot2)
    library(cowplot)
    library(Matrix)
    library(dplyr)
    library(ggsci)
    
    library(tidyverse)
    library(SingleR)
    library(harmony)
    library(patchwork)
    
    exp1 <- data.table::fread('GSE132465_GEO_processed_CRC_10X_raw_UMI_count_matrix.txt',data.table = F)
    test <- exp1[1:10,1:10]
    rownames(exp1) <- exp1$Index
    exp1 <-exp1[,-1]
    experiment.aggregate <- CreateSeuratObject(
      exp1,
      project = "multi", 
      min.cells = 10,
      min.features = 200,
      names.field = 1,
      names.delim = "_")
    View(experiment.aggregate@meta.data)
    
    experiment.aggregate[["percent.mt"]] <- PercentageFeatureSet(experiment.aggregate, 
                                                                 pattern = "^MT-")
    #save(experiment.aggregate, file = "experiment.aggregate.Rdata")
    experiment.aggregate <- NormalizeData(experiment.aggregate, 
                                          normalization.method = "LogNormalize",
                                          scale.factor = 10000)
    
    #计算表达量变化显著的基因FindVariableFeatures
    experiment.aggregate <- FindVariableFeatures(experiment.aggregate, 
                                                 selection.method = "vst",
                                                 nfeatures = 1000)
    all.genes <- rownames(experiment.aggregate)
    experiment.aggregate <- ScaleData(experiment.aggregate,
                                      features = all.genes, verbose = FALSE
    )
    experiment.aggregate <- RunPCA(object = experiment.aggregate, 
                                   features = VariableFeatures(experiment.aggregate),
                                   verbose = F,npcs = 50)
    ElbowPlot(experiment.aggregate, ndims = 50)
    dim.use <- 1:20
    experiment.aggregate <- FindNeighbors(experiment.aggregate, dims = dim.use)
    #基于细胞之间的相似性计算具体的cluster
    experiment.aggregate <- FindClusters(experiment.aggregate, resolution = 0.6)
    #TSNE算法聚类(降维)
    experiment.aggregate <- RunTSNE(experiment.aggregate, dims = dim.use, do.fast = TRUE)
    experiment.aggregate <- RunUMAP(experiment.aggregate, dims = dim.use, do.fast = TRUE)
    dir.create("Overview")
    source("./function.R")
    refdata <- get(load("Resource/SingleR_ref/ref_BE_259s.RData"))
    library(SingleR)
    experiment.aggregate <- cell_identify(experiment.aggregate, refdata, output = "Overview")
    
    ##tSNE图展示结果
    p4 <- DimPlot(experiment.aggregate, reduction = "tsne", label = T) + NoLegend()
    
    p5 <- DimPlot(experiment.aggregate, reduction = "tsne", label = T,
                  group.by = "SingleR") + NoLegend()
    pc1 = p4|p5
    pc1
    
    p6 <- DimPlot(experiment.aggregate, reduction = "tsne", group.by = "orig.ident", 
                  split.by = "orig.ident", ncol = 4) + NoLegend()
    ggsave("tsne.png", p6, width = 9, height = 12)
    p1 <- DimPlot(experiment.aggregate, reduction = "umap", label = T) + NoLegend()
    
    p2 <- DimPlot(experiment.aggregate, reduction = "umap", label = T,
                  group.by = "SingleR") + NoLegend()
    pc = p1|p2
    pc
    
    p3 <- DimPlot(experiment.aggregate, reduction = "umap", group.by = "orig.ident", 
                  split.by = "orig.ident", ncol = 4) + NoLegend()
    ggsave("umap.png", p3, width = 9, height = 12)
    
    图片.png
    图片.png
    图片.png
    图片.png
    感觉没啥批次效应

    导入数据入seraut对象,常规操作一下

    library(Seurat)
    library(ggplot2)
    library(cowplot)
    library(Matrix)
    library(dplyr)
    library(ggsci)
    library(tidyverse)
    library(SingleR)
    library(harmony)
    library(patchwork)
    
    exp1 <- data.table::fread('GSE132465_GEO_processed_CRC_10X_raw_UMI_count_matrix.txt',data.table = F)
    test <- exp1[1:10,1:10]
    rownames(exp1) <- exp1$Index
    exp1 <-exp1[,-1]
    experiment.aggregate <- CreateSeuratObject(
      exp1,
      project = "multi", 
      min.cells = 10,
      min.features = 200,
      names.field = 1,
      names.delim = "_")
    View(experiment.aggregate@meta.data)
    
    experiment.aggregate[["percent.mt"]] <- PercentageFeatureSet(experiment.aggregate, 
                                                                 pattern = "^MT-")
    save(experiment.aggregate, file = "experiment.aggregate.Rdata")
    experiment.aggregate <- NormalizeData(experiment.aggregate, 
                                          normalization.method = "LogNormalize",
                                          scale.factor = 10000)
    
    #计算表达量变化显著的基因FindVariableFeatures
    experiment.aggregate <- FindVariableFeatures(experiment.aggregate, 
                                                 selection.method = "vst",
                                                 nfeatures = 1000)
    all.genes <- rownames(experiment.aggregate)
    experiment.aggregate <- ScaleData(experiment.aggregate,
                                      features = all.genes, verbose = FALSE
    )
    

    细胞周期基因

    if(T){
      g2m_genes <- cc.genes$g2m.genes
      g2m_genes <- CaseMatch(search=g2m_genes, match=rownames(experiment.aggregate))
      s_genes <- cc.genes$s.genes    
      s_genes <- CaseMatch(search=s_genes, match=rownames(experiment.aggregate))
      experiment.aggregate <- CellCycleScoring(experiment.aggregate, g2m.features=g2m_genes, s.features=s_genes)
      tmp <- RunPCA(experiment.aggregate, features = c(g2m_genes, s_genes), verbose = F)
      p <- DimPlot(tmp, reduction = "pca", group.by = "orig.ident")
      ggsave("QC/CellCycle_pca.png", p, width = 8, height = 6)
      rm(tmp)
    }
    

    去除批次

    experiment.aggregate <- SCTransform(experiment.aggregate, return.only.var.genes = F, 
                         vars.to.regress = c( vars.to.regress = c("S.Score", "G2M.Score")))
    memory.limit()
    ##使用harmony整合数据
    experiment.aggregate <- RunPCA(experiment.aggregate, npcs=50, verbose=FALSE)
    experiment.aggregate <- RunHarmony(experiment.aggregate, group.by.vars="orig.ident", assay.use="SCT")
    
    ElbowPlot(experiment.aggregate, ndims = 40)
    pc.num=1:20
    scRNA <- RunTSNE(experiment.aggregate, reduction="harmony", dims=pc.num) %>% 
      RunUMAP(reduction="harmony", dims=pc.num) %>%
      FindNeighbors(reduction="harmony", dims=pc.num) %>% 
      FindClusters(resolution=0.6)
    

    细胞注释

    dir.create("Overview")
    source("./function.R")
    refdata <- get(load("Resource/SingleR_ref/ref_BE_259s.RData"))
    library(SingleR)
    scRNA <- cell_identify(scRNA, refdata, output = "Overview")
    

    作Umap图

    p1 <- DimPlot(scRNA, reduction = "umap", label = T) + NoLegend()
    
    p2 <- DimPlot(scRNA, reduction = "umap", label = T,
                  group.by = "SingleR") + NoLegend()
    pc = p1|p2
    pc
    
    图片.png

    查看批次效应

    p3 <- DimPlot(scRNA, reduction = "umap", group.by = "orig.ident", 
                  split.by = "orig.ident", ncol = 4) + NoLegend()
    ggsave("umap.png", p3, width = 9, height = 12)
    
    图片.png

    作tsne图

    p4 <- DimPlot(scRNA, reduction = "tsne", label = T) + NoLegend()
    
    p5 <- DimPlot(scRNA, reduction = "tsne", label = T,
                  group.by = "SingleR") + NoLegend()
    pc1 = p4|p5
    pc1
    
    图片.png

    批次效应

    p6 <- DimPlot(scRNA, reduction = "tsne", group.by = "orig.ident", 
                  split.by = "orig.ident", ncol = 4) + NoLegend()
    ggsave("tsne.png", p6, width = 9, height = 12)
    
    图片.png

    相关文章

      网友评论

        本文标题:利用SCTransform和RunHarmony做普通的单细胞批

        本文链接:https://www.haomeiwen.com/subject/jaghaktx.html