美文网首页单细胞单细胞测序
生信技能树单细胞数据挖掘笔记(3)

生信技能树单细胞数据挖掘笔记(3)

作者: 周小钊 | 来源:发表于2020-11-09 16:09 被阅读0次

    生信技能树单细胞数据挖掘笔记(1):数据读入

    生信技能树单细胞数据挖掘笔记(2):创建Seurat对象并进行质控、筛选高变基因并可视化

    生信技能树单细胞数据挖掘笔记(3):降维与聚类

    生信技能树单细胞数据挖掘笔记(4):其他分析(周期判断、double诊断、细胞类型注释)

    生信技能树单细胞数据挖掘笔记(5):轨迹分析

    降维,PCA分析

    load("../2.3.Rdata")
    ### 4、降维,PCA分析,可视化----
    #先进行归一化(正态分布)
    scRNA <- ScaleData(scRNA, features = (rownames(scRNA)))
    #储存到"scale.data"的slot里
    GetAssayData(scRNA,slot="scale.data",assay="RNA")[1:8,1:4]
    #对比下原来的count矩阵
    GetAssayData(scRNA,slot="counts",assay="RNA")[1:8,1:4]
    #scRNA@assays$RNA@
    #PCA降维,利用之前挑选的hvg,可提高效率
    scRNA <- RunPCA(scRNA, features = VariableFeatures(scRNA)) 
    #挑选第一,第二主成分对cell可视化
    DimPlot(scRNA, reduction = "pca", group.by="Patient_ID")
    #发现与原文献中颠倒了
    #seed.use   :Set a random seed. By default, sets the seed to 42. 
    #Setting NULL will not set a seed.
    scRNA <- RunPCA(scRNA, features = VariableFeatures(scRNA),seed.use=3)
    #尝试了seed.use的不同取值发现图形只有四种变化(四个拐角),其中以seed.use=3为代表的一类与原文文献一致
    DimPlot(scRNA, reduction = "pca", group.by="Patient_ID")
    #与文献一致了。个人觉得颠倒与否如果只是随机种子的差别的话,对后续分析应该没影响
    p2_1 <- DimPlot(scRNA, reduction = "pca", group.by="Patient_ID")+
      labs(tag = "D")
    p2_1
    
    原图 seed.use=3后的PCA图
    #挑选主成分,RunPCA默认保留了前50个
    scRNA <- JackStraw(scRNA,reduction = "pca", dims=20)
    scRNA <- ScoreJackStraw(scRNA,dims = 1:20)
    
    p2_2 <- JackStrawPlot(scRNA,dims = 1:20, reduction = "pca") +
      theme(legend.position="bottom") +
      labs(tag = "E")
    p2_2
    p2_3 <- ElbowPlot(scRNA, ndims=20, reduction="pca") 
    p2_2 | p2_3
    
    E图 D图与E图

    聚类、筛选maker基因并可视化

    聚类

    pc.num=1:20
    #基于PCA数据
    scRNA <- FindNeighbors(scRNA, dims = pc.num) 
    # dims参数,需要指定哪些pc轴用于分析;这里利用上面的分析,选择20
    scRNA <- FindClusters(scRNA, resolution = 0.5)
    table(scRNA@meta.data$seurat_clusters)
    
    scRNA = RunTSNE(scRNA, dims = pc.num)
    DimPlot(scRNA, reduction = "tsne",label=T)
    p3_1 <- DimPlot(scRNA, reduction = "tsne",label=T) +
      labs(tag = "E")
    p3_1
    
    F图

    筛选maker基因

    #5.2 marker gene
    #进行差异分析,一般使用标准化数据
    scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize")
    #结果储存在"data"slot里
    GetAssayData(scRNA,slot="data",assay="RNA")[1:8,1:4]
    
    #if test.use is "negbinom", "poisson", or "DESeq2", slot will be set to "counts
    diff.wilcox = FindAllMarkers(scRNA)##默认使用wilcox方法挑选差异基因,大概4-5min
    load("../diff.wilcox.Rdata")
    head(diff.wilcox)
    dim(diff.wilcox)
    library(tidyverse)
    all.markers = diff.wilcox %>% select(gene, everything()) %>%
      subset(p_val<0.05 & abs(diff.wilcox$avg_logFC) > 0.5)
    #An adjusted P value < 0.05and | log 2 [fold change (FC)] | > 0.5 
    #were considered the 2 cutoff criteria for identifying marker genes.
    dim(all.markers)
    summary(all.markers)
    save(all.markers,file = "../markergene.Rdata")
    
    top10 = all.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
    top10
    top10 = CaseMatch(search = as.vector(top10$gene), match = rownames(scRNA)) 
    top10
    length(top10)
    length(unique(sort(top10)))
    
    p3_2 <- DoHeatmap(scRNA, features = top10, group.by = "seurat_clusters")
    p3_2
    p3_1 | p3_2 #下图 
    
    G图 图片合并

    目前,图一的图片已经全部完成,接下来将进行拼图

    拼图

    ### 6、拼图,比较----
    p <- (p1_1 | p1_2 | p1_3 ) /
      ((p2_1| p2_2 | p2_3) /
         (p3_1 | p3_2))
    ggsave("../.my_try.pdf", plot = p, width = 15, height = 18) 
    
    save(scRNA,file = "scRNA.Rdata")
    

    转载请注明:周小钊的博客>>>生信技能树单细胞数据挖掘笔记(3)

    相关文章

      网友评论

        本文标题:生信技能树单细胞数据挖掘笔记(3)

        本文链接:https://www.haomeiwen.com/subject/riolbktx.html