单细胞学习3

作者: soleil要好好学习呀 | 来源:发表于2019-02-24 23:39 被阅读1次

    cluster

    不同聚类方法比较

    normmlization,去除一些批次效应等外部因素。通过对表达矩阵的矩阵的聚类,对细胞群体进行分类。

    无监督聚类:hierarchical clustering,k-means clustering,graph-based clustering。

    library(SingleCellExperiment)
    deng <- readRDS("D:/paopaoR/mass/deng-reads.rds")
    deng
    
    ## class: SingleCellExperiment 
    ## dim: 22431 268 
    ## metadata(0):
    ## assays(2): counts logcounts
    ## rownames(22431): Hvcn1 Gbp7 ... Sox5 Alg11
    ## rowData names(10): feature_symbol is_feature_control ...
    ##   total_counts log10_total_counts
    ## colnames(268): 16cell 16cell.1 ... zy.2 zy.3
    ## colData names(30): cell_type2 cell_type1 ... pct_counts_ERCC
    ##   is_cell_control
    ## reducedDimNames(0):
    ## spikeNames(1): ERCC
    
    table(deng$cell_type2)
    
    ## 
    ##     16cell      4cell      8cell early2cell earlyblast  late2cell 
    ##         50         14         37          8         43         10 
    ##  lateblast   mid2cell   midblast         zy 
    ##         30         12         60          4
    
    table(deng$cell_type1)
    
    ## 
    ## 16cell  2cell  4cell  8cell  blast zygote 
    ##     50     22     14     37    133     12
    
    library(scater)
    
    ## Warning: package 'scater' was built under R version 3.5.2
    
    plotPCA(deng, colour_by = "cell_type2")
    
    ## Warning: 'add_ticks' is deprecated.
    ## Use '+ geom_rug(...)' instead.
    
    image.png
    plotPCA(deng, colour_by = "cell_type1")
    
    ## Warning: 'add_ticks' is deprecated.
    ## Use '+ geom_rug(...)' instead.
    
    image.png

    SC3

    当细胞数量不是很多的时候可以用sc3,共识聚类,可以一步分析到位,也可以根据需要分步分析。但当细胞数量很多的时候速度会很慢,用seurat会比较方便。

    library(SC3)
    deng <- sc3_estimate_k(deng)
    metadata(deng)$sc3$k_estimation
    
    ## [1] 6
    
    deng <- sc3(deng, ks = 10, biology = TRUE)
    
    ## Setting SC3 parameters...
    
    ## Calculating distances between the cells...
    
    ## Performing transformations and calculating eigenvectors...
    
    ## Performing k-means clustering...
    
    ## Calculating consensus matrix...
    
    ## Calculating biology...
    
    colnames(rowData(deng))
    
    ##  [1] "feature_symbol"          "is_feature_control"     
    ##  [3] "is_feature_control_ERCC" "mean_counts"            
    ##  [5] "log10_mean_counts"       "rank_counts"            
    ##  [7] "n_cells_counts"          "pct_dropout_counts"     
    ##  [9] "total_counts"            "log10_total_counts"     
    ## [11] "sc3_gene_filter"         "sc3_10_markers_clusts"  
    ## [13] "sc3_10_markers_padj"     "sc3_10_markers_auroc"   
    ## [15] "sc3_10_de_padj"
    
    colnames(colData(deng))
    
    ##  [1] "cell_type2"                            
    ##  [2] "cell_type1"                            
    ##  [3] "total_features"                        
    ##  [4] "log10_total_features"                  
    ##  [5] "total_counts"                          
    ##  [6] "log10_total_counts"                    
    ##  [7] "pct_counts_top_50_features"            
    ##  [8] "pct_counts_top_100_features"           
    ##  [9] "pct_counts_top_200_features"           
    ## [10] "pct_counts_top_500_features"           
    ## [11] "total_features_endogenous"             
    ## [12] "log10_total_features_endogenous"       
    ## [13] "total_counts_endogenous"               
    ## [14] "log10_total_counts_endogenous"         
    ## [15] "pct_counts_endogenous"                 
    ## [16] "pct_counts_top_50_features_endogenous" 
    ## [17] "pct_counts_top_100_features_endogenous"
    ## [18] "pct_counts_top_200_features_endogenous"
    ## [19] "pct_counts_top_500_features_endogenous"
    ## [20] "total_features_feature_control"        
    ## [21] "log10_total_features_feature_control"  
    ## [22] "total_counts_feature_control"          
    ## [23] "log10_total_counts_feature_control"    
    ## [24] "pct_counts_feature_control"            
    ## [25] "total_features_ERCC"                   
    ## [26] "log10_total_features_ERCC"             
    ## [27] "total_counts_ERCC"                     
    ## [28] "log10_total_counts_ERCC"               
    ## [29] "pct_counts_ERCC"                       
    ## [30] "is_cell_control"                       
    ## [31] "sc3_10_clusters"                       
    ## [32] "sc3_10_log2_outlier_score"
    
    sc3_plot_consensus(deng, k = 10, show_pdata = "cell_type2")
    
    image.png
    sc3_plot_silhouette(deng, k = 10)
    
    image.png
    sc3_plot_expression(deng, k = 10, show_pdata = "cell_type2")
    
    image.png
    sc3_plot_markers(deng, k = 10, show_pdata = "cell_type2")
    
    image.png
    plotPCA(deng, colour_by = "sc3_10_clusters")
    
    ## Warning: 'add_ticks' is deprecated.
    ## Use '+ geom_rug(...)' instead.
    
    image.png
    library(mclust)
    adjustedRandIndex(colData(deng)$cell_type2, colData(deng)$sc3_10_clusters)
    
    ## [1] 0.6616899
    

    pcaReduce

    library(pcaReduce)
    input <- logcounts(deng[rowData(deng)$sc3_gene_filter, ])
    pca.ite <- PCAreduce(t(input), nbt = 1, q = 30, method = 'S')[[1]]#nbt-number of samples,q-number of dimensions to start with
    colData(deng)$pcaReduce <- as.character(pca.ite[,32 - 10])#k-(2~q+1),k=10,(q+1)-10+1
    plotPCA(deng, colour_by = "pcaReduce")
    
    ## Warning: 'add_ticks' is deprecated.
    ## Use '+ geom_rug(...)' instead.
    
    image.png
    adjustedRandIndex(colData(deng)$cell_type2, colData(deng)$pcaReduce)
    
    ## [1] 0.3504523
    

    tsne+k-means

    set.seed(1234)
    deng <- runTSNE(deng,perplexity = 50)#change perplexity to have robust result
    colData(deng)$tSNE_kmeans8 <- as.character(kmeans(deng@reducedDims$TSNE, centers = 8)$clust)
    plotTSNE(deng,colour_by = "tSNE_kmeans8")
    
    ## Warning: 'add_ticks' is deprecated.
    ## Use '+ geom_rug(...)' instead.
    
    image.png
    colData(deng)$tSNE_kmeans10 <- as.character(kmeans(deng@reducedDims$TSNE, centers = 10)$clust)
    plotTSNE(deng,colour_by = "tSNE_kmeans10")
    
    ## Warning: 'add_ticks' is deprecated.
    ## Use '+ geom_rug(...)' instead.
    
    image.png
    adjustedRandIndex(colData(deng)$cell_type2, colData(deng)$tSNE_kmeans10)
    
    ## [1] 0.3753199
    
    deng <- runTSNE(deng,perplexity = 10)
    colData(deng)$tSNE_kmeans10.1 <- as.character(kmeans(deng@reducedDims$TSNE, centers = 10)$clust)
    adjustedRandIndex(colData(deng)$cell_type2, colData(deng)$tSNE_kmeans10.1)
    
    ## [1] 0.4531443
    
    deng <- runTSNE(deng,perplexity = 5)
    colData(deng)$tSNE_kmeans10.2 <- as.character(kmeans(deng@reducedDims$TSNE, centers = 10)$clust)
    adjustedRandIndex(colData(deng)$cell_type2, colData(deng)$tSNE_kmeans10.2)
    
    ## [1] 0.48855
    

    SINCERA

    这个包感觉用的不是很多,感觉就是用了个zscore转换,根据相关层次聚类,迭代找个k值。

    dat <- apply(input, 1, function(y) scRNA.seq.funcs::z.transform.helper(y))# perform z-score transformation
    dd <- as.dist((1 - cor(t(dat), method = "pearson"))/2)# hierarchical clustering
    hc <- hclust(dd, method = "average")
    
    num.singleton <- 0
    kk <- 1
    for (i in 2:dim(dat)[2]) {
        clusters <- cutree(hc, k = i)
        clustersizes <- as.data.frame(table(clusters))
        singleton.clusters <- which(clustersizes$Freq < 2)
        if (length(singleton.clusters) <= num.singleton) {
            kk <- i
        } else {
            break;
        }
    }
    cat(kk)
    
    ## 6
    
    library(pheatmap)
    pheatmap(
        t(dat),
        cluster_cols = hc,
        cutree_cols = kk,
        kmeans_k = 100,
        show_rownames = FALSE
    )
    
    image.png
    adjustedRandIndex(colData(deng)$cell_type2,clusters)
    
    ## [1] 0.3827252
    
    clusters1 <- cutree(hc, k = 10)
    adjustedRandIndex(colData(deng)$cell_type2,clusters1)
    
    ## [1] 0.3748432
    

    dynamicTreeCut

    deng <- runPCA(deng)
    my.dist <- dist(reducedDim(deng))
    my.tree <- hclust(my.dist, method="ward.D2")
    library(dynamicTreeCut)
    my.clusters <- unname(cutreeDynamic(my.tree, distM=as.matrix(my.dist), 
                                          minClusterSize=1, verbose=0))#modify cutheight to change clusters
    table(my.clusters)
    
    ## my.clusters
    ##  1  2  3  4  5  6  7  8 
    ## 55 52 52 34 26 22 14 13
    
    deng$clusters <- factor(my.clusters)
    plotTSNE(deng,colour_by="clusters")
    
    ## Warning: 'add_ticks' is deprecated.
    ## Use '+ geom_rug(...)' instead.
    
    image.png
    adjustedRandIndex(colData(deng)$cell_type2,my.clusters)
    
    ## [1] 0.5354439
    

    SNN clip

    基于graph理论,找共有邻近点,现在的seurat也是基于graph,用knn,snn什么的,可以改变共享邻近点的数目改变cluster结果。

    library(igraph)
    library(scran)
    snn.gr <- buildSNNGraph(deng, use.dimred="PCA")
    cluster.out <- igraph::cluster_walktrap(snn.gr)
    my.clusters <- cluster.out$membership
    table(my.clusters)
    
    ## my.clusters
    ##  1  2  3  4  5  6  7  8  9 
    ## 53 36 13 22 40 33 36 21 14
    
    deng$cluster <- factor(my.clusters)
    plotTSNE(deng, colour_by="cluster")
    
    ## Warning: 'add_ticks' is deprecated.
    ## Use '+ geom_rug(...)' instead.
    
    image.png
    set.seed(123)
    reducedDim(deng, "force") <- igraph::layout_with_fr(snn.gr, niter=3000)
    plotReducedDim(deng, colour_by="cluster", use_dimred="force")
    
    ## Warning: 'add_ticks' is deprecated.
    ## Use '+ geom_rug(...)' instead.
    
    image.png
    adjustedRandIndex(colData(deng)$cell_type2,my.clusters)
    
    ## [1] 0.4505625
    

    RCA

    据说是现有的聚类效果最好的包,用分析的数据投影一个参考来进行聚类,但是模式还不是很成熟,暂时还没有小鼠的模式,而且前期流程跑下来的数据是fpkm,也许有些影响。并且要求名字格式“XXXX_genenames_YYYY",前面是位置信息,后面是ens号,emmm不知道为啥搞成这样,毕竟最后它还是提取中间的名字进行分析。。。。

    RCA, short for Reference Component Analysis, is an R package for robust clustering analysis of single cell RNA sequencing data (scRNAseq). It is developed by Prabhakar Group at Genome Institute of Singapore (GIS).

    Features: -clustering analysis of scRNAseq data from Human samples -three modes : “GlobalPanel”: default option for clusterig general single cell data sets that include a wide spectrum of cell types. “ColonEpitheliumPanel”: suitable for analyzing human colon/intestine derived samples. “SelfProjection”: suitable for analyzing data sets from not-well-studied tissue types (still under optimization)

    相关文章

      网友评论

        本文标题:单细胞学习3

        本文链接:https://www.haomeiwen.com/subject/oomiyqtx.html