美文网首页单细胞转录组
单细胞转录组数据分析(三):识别celltype

单细胞转录组数据分析(三):识别celltype

作者: 生信学习者2 | 来源:发表于2021-01-04 17:02 被阅读0次

    识别celltype是单细胞数据分析最关键的一步,识别方法主要分为手动和自动注释,暂时记录下手动注释的过程(极其地痛苦,如果可以还是自动注释更方便,但准确度可能不好,最佳的方法是手动+自动的方式)。更多知识分享请到 https://zouhua.top/

    加载R包

    library(dplyr)
    library(Seurat)
    library(tibble)
    library(ggplot2)
    library(cowplot)
    
    # 设置全局参数
    rm(list = ls())
    options(stringsAsFactors = F)
    options(future.globals.maxSize = 4000 * 1024^2)
    
    colors <- c("#A6CEE3", "#1F78B4", "#08306B", "#B2DF8A", "#006D2C", "#8E0152",
             "#DE77AE", "#CAB2D6", "#6A3D9A", "#FB9A99", "#E31A1C", "#B15928",
             "#619CFF","#FF67A4","#00BCD8", "#EE2B2B", "#2D6BB4")
    

    加载数据

    data.cluster <- readRDS("../../Result/RDS/data.cluster04.rds")
    

    查看cluster

    DimPlot(data.cluster,
            reduction = "umap",
            label = TRUE,
            label.size = 6,
            cols = colors)
    

    识别所有cluster的marker gene

    annotations <- read.csv("../../Result/annotations_mus.csv")
    markers <- FindAllMarkers(object = data.cluster,
                                only.pos = TRUE,
                                logfc.threshold = 0.25)
    #write.csv(markers, "../../Result/data.all.markergenes.csv")
    #markers <- read.csv("../../Result/data.all.markergenes.csv", row.names = 1)
    

    评估marker genes

    anno_all_markers <- markers %>%
            left_join(y = unique(annotations[, c("gene_name", "description")]),
                      by = c("gene" = "gene_name")) %>%
            mutate(Specific=pct.1 - pct.2) #%>%
      #arrange(dplyr::desc(Specific), dplyr::desc(avg_logFC))
    
    # Extract top 50 markers per cluster
    top50 <- anno_all_markers %>%
        group_by(cluster) %>%
        top_n(n = 50, wt = avg_logFC) %>%
        ungroup()
    
    • 热图展示
    mapal <- colorRampPalette(c("#EEEED1", "#AEDCF9", "#10106A"))(20)
    DoHeatmap(object = data.cluster, features = top50$gene, label = TRUE) +
      scale_fill_gradientn(colours = mapal)
    
    • 计算cluster的相关性
    library(pheatmap)
    exp_ave <- AverageExpression(data.cluster)
    CorOb.cor.exp <- as.data.frame(cor(exp_ave$RNA, method = "pearson"))
    pheatmap(CorOb.cor.exp, color=mapal)
    

    网上查找组织细胞的特异基因

    Marker genes Celltype
    AQP1, BST2 endothelial cell
    CXCL2 Fibroblasts,endothelial cell
    ADRA2C,NTRK2 Fibroblasts,smooth muscle cell
    RBP7,CTHRC1 endothelial cell
    LUM, MMP2,SERPINF1,DCN Fibroblasts
    HYAL2,CALCRL,CD320 endothelial cell
    ABCC9,CPE Fibroblasts, endothelial cell
    TTN cardiomyocyte
    LBH,MYH11,MYLK smooth muscle cell,Fibroblasts
    NKG7,KLRD1,PRF1 Natural killer cells
    AIF1,MS4A6A,C3AR1 Macrophages,Monocytes
    • 查看marker genes在cluster的分布情况
    FeaturePlot(data.cluster,
                reduction = "umap",
                features = c("Cd3g", "Cd3d", "Ptprc"),
                pt.size = 0.4,
                order = TRUE,
                min.cutoff = 'q10',
                label = TRUE)
    

    根据marker genes结果重命名cluster

    data.cluster <- RenameIdents(object = data.cluster,
                                   "0" = "cardiomyocyte",
                                   "1" = "fibroblasts",
                                   "2" = "cardiomyocyte",
                                   "3" = "macrophage",
                                   "4" = "cardiomyocyte",
                                   "5" = "endothelial cells",
                                   "6" = "cardiomyocyte",
                                   "7" = "T cells",
                                   "8" = "Unknown",
                                   "9" = "granulocyte",
                                   "10" = "Unknown",
                                   "11" = "Unknown",
                                   "12" = "Unknown")
    
    DimPlot(object = data.cluster,
            reduction = "umap",
            label = TRUE,
            label.size = 5,
            repel = TRUE,
            cols = colors)
    

    提取或丢弃某些cluster

    # 提取
    data.choose <- subset(data.cluster,
                          idents = c("cardiomyocyte", "macrophage"),
                          invert = FALSE)
    # 删除 
    data.cluster <- subset(data.cluster,
                              idents = "Unknown",
                              invert = TRUE)
    

    Reference

    参考文章如引起任何侵权问题,可以与我联系,谢谢。

    相关文章

      网友评论

        本文标题:单细胞转录组数据分析(三):识别celltype

        本文链接:https://www.haomeiwen.com/subject/yqmvoktx.html