美文网首页单细胞学习
Seurat之整合分析(2)---跨物种

Seurat之整合分析(2)---跨物种

作者: jjjscuedu | 来源:发表于2022-12-16 11:36 被阅读0次

    上面那个帖子介绍了FindIntegrationAnchors和IntegrateData,说也适合整合两个datasets只有部分重叠。那么,从这个角度出发,也适合整合两个跨物种的数据集,利用重叠的homolog基因来实现重叠的部分,所以我们拿arabidopsis和rice的单细胞数据来试试看(随便从paper里面选了一组拟南芥根和水稻根的数据作为测试而已)。

    注:我是用cellranger获得了比对矩阵文件。然后根据arabidopsis和rice的ortholog基因,把rice ortholog的基因ID换成了arabidopsis,方便后面构造arabidopsis和rice的可比文件。

    library(Seurat)

    library(SeuratData)

    library(patchwork)

    library(harmony)

    #library(rliger)

    library(RColorBrewer)

    library(tidyverse)

    library(reshape2)

    library(ggsci)

    library(ggstatsplot)

    #常规的构建seurat对象

    arabidopsis.data <- Read10X(data.dir = "arabidopsis/filtered_feature_bc_matrix/")

    rice.data <- Read10X(data.dir = "rice/filtered_feature_bc_matrix/")

    arabidopsis <- CreateSeuratObject(counts = arabidopsis.data, project = "arabidopsis", min.cells = 3, min.features = 200)

    rice <- CreateSeuratObject(counts = rice.data, project = "rice", min.cells = 3, min.features = 200)

    #做了一些简单的过滤

    arabidopsis[["percent.mg"]] <- PercentageFeatureSet(arabidopsis, pattern ="^ATMG")

    arabidopsis[["percent.cg"]] <- PercentageFeatureSet(arabidopsis, pattern ="^ATCG")

    arabidopsis <- subset(arabidopsis, subset = nFeature_RNA >500& nFeature_RNA <5000& percent.mg <1 & percent.cg <3)

    rice<- subset(rice, subset = nFeature_RNA >500& nFeature_RNA <7000)

    #获得拟南芥和水稻共有基因的

    common_genes <- rownames(arabidopsis)[rownames(arabidopsis) %in% rownames(rice)]

    > length(common_genes)

    [1] 9042

    #提取共有的部分进行对比

    arabidopsis <- arabidopsis[rownames(arabidopsis) %in% common_genes,]

    rice <- rice[rownames(rice) %in% common_genes,]

    #下面的和上面一个帖子一样了

    wb_list <- list()

    wb_list[["arabidopsis"]]  <- arabidopsis

    wb_list[["rice"]]  <- rice

    wb_list <- lapply(X = wb_list, FUN = function(x) {

        x <- NormalizeData(x)

        x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)

    })

    wb_anchors <- FindIntegrationAnchors(object.list = wb_list, dims = 1:30)

    wb_seurat  <- IntegrateData(anchorset = wb_anchors, dims = 1:30)

    DefaultAssay(wb_seurat) <- "RNA"

    wb_seurat <- NormalizeData(wb_seurat, verbose = F)

    wb_seurat <- FindVariableFeatures(wb_seurat, selection.method = "vst", nfeatures = 2000, verbose = F)

    wb_seurat <- ScaleData(wb_seurat, verbose = F)

    wb_seurat <- RunPCA(wb_seurat, npcs = 30, verbose = F)

    wb_seurat <- RunUMAP(wb_seurat, reduction = "pca", dims = 1:30, verbose = F)

    p1<-DimPlot(wb_seurat,reduction = "umap") + scale_color_npg()+

    plot_annotation(title = "arabidopsis and rice, before integration")

    DefaultAssay(wb_seurat) <- "integrated"

    wb_seurat <- ScaleData(wb_seurat, verbose = F)

    wb_seurat <- RunPCA(wb_seurat, npcs = 30, verbose = F)

    wb_seurat <- RunUMAP(wb_seurat, reduction = "pca", dims = 1:30, verbose = F)

    p2<-DimPlot(wb_seurat, reduction = "umap") +

    scale_color_npg()+

    plot_annotation(title = "arabidopsis and rice, after integration")

    p1|p2

    #我们可以明显看出合并之前和合并之后的差异。但是我个人觉得貌似合并的效果也没有那么好,不确定是真实的细胞类型的差异,还是算法带来的误差差异。

    #下面就是常规的聚类和可视化

    wb_seurat <- FindNeighbors(wb_seurat, dims = 1:30, k.param = 10, verbose = F)

    wb_seurat <- FindClusters(wb_seurat, verbose = F)

    ncluster <- length(unique(wb_seurat[[]]$seurat_clusters))

    mycol <- colorRampPalette(brewer.pal(8, "Set2"))(ncluster)

    DimPlot(wb_seurat,label = T, reduction = "umap",cols = mycol, repel = T) +NoLegend()

    #我们也可以查看每个cluster下面,不同物种之间的差异。

    count_table <- table(wb_seurat@meta.data$seurat_clusters, wb_seurat@meta.data$orig.ident)

    count_table

    #### 可视化

    count_table %>%

    as.data.frame() %>%

    ggbarstats(x = Var2, y = Var1,counts = Freq)+

    scale_fill_npg()

    后面,我们也可以根据marker基因来注释每个cluster,同时查看不同细胞类型在物种间的差异,例如上面一个帖子讲的那样。

    相关文章

      网友评论

        本文标题:Seurat之整合分析(2)---跨物种

        本文链接:https://www.haomeiwen.com/subject/zpkrqdtx.html