美文网首页
空间转录组合作项目分析示例(二)

空间转录组合作项目分析示例(二)

作者: 单细胞空间交响乐 | 来源:发表于2024-08-26 11:29 被阅读0次

    作者,Evil Genius

    这一篇我们继续合作项目的分析示例,我们在了解了空间转录组的整体分析内容之后,就需要开始针对我们自己的数据进行有效分析,注意是有效分析,就是大家分析出来的结果要有生物学意义,不是随便跑个结果就结束的这种。

    首先拿到数据,现在通常都是多个空间数据样本,当然了,低精度还需要单细胞样本,即使是HD,也是推荐用单细胞来解卷积的。


    那么我们首先第一步需要考虑的问题,就是空间数据质控的问题,上过课的都应该明白,空间数据质控不能像单细胞质控那样处理,一般只需要结合图像信息即可,其中唯一需要考虑的问题,就是空间污染的问题。



    数据矫正之后,我们就需要考虑第二个问题,空间数据要不要整合的问题,整合还是不整合都有文章运用,不过通常来讲整合占大多数,这也是公司层面默认的选择,如果选择不整合,就需要根据形态学区域划分,这个难度更高一点。

    我们以其中4个样本为例,实现基础的整合分析。

    我们如果选择了整合,那么方法有两种CCA、Harmony,哪种更好呢?没有定论,从结合空间图像的效果来看,harmony大多数情况是符合形态学特征的,当然了,分析的时候两种结果都要拿到做比较分析。
    分析的时候要注意,软件版本之间可能不兼容。
    library(tidyverse)
    library(Seurat)
    library(cowplot)
    library(RColorBrewer)
    library(pheatmap)
    library(scales)
    library(ggsci)
    library(harmony)
    source('/home/scRNA/DB/SPATIAL_FUNCTIONS.R')
    # Define Color Scheme
    custom_colors <- c("#B4DFFC","#6EAB3D","#FFD700","#A020F0","#FFA500","#AEDD3C","#595959","#D2AF81FF","#3A49FC","#FF0000","#A86F3D","#A18FAA")
    
    #读取数据
    MGH258 = Load10X_Spatial('/home/jinsai/pipeline/drug_pipeline/DB/Inputs/general/GBM_data/MGH258/outs')
    UKF243 = Load10X_Spatial('/home/jinsai/pipeline/drug_pipeline/DB/Inputs/general/GBM_data/UKF243/outs')
    UKF248 = Load10X_Spatial('/home/jinsai/pipeline/drug_pipeline/DB/Inputs/general/GBM_data/UKF248/outs')
    UKF251 = Load10X_Spatial('/home/jinsai/pipeline/drug_pipeline/DB/Inputs/general/GBM_data/UKF251/outs')
    
    samples =  c(MGH258,UKF243,UKF248,UKF251)
    
    ####Merging data for Harmony Batch correction(CCA)
    combined_CCA <- st_combine(samples, ndim = 20, res = 0.5)
    ####Harmony整合
    combined_harmony <- SCTransform(combined_CCA,assay = "Spatial", vars.to.regress = 'sample')
    combined_harmony <- RunPCA(combined_harmony,assay = "SCT")
    combined_harmony<- RunHarmony(combined_harmony,assay.use = "SCT",project.dim = FALSE,group.by.vars = 'sample')
    
    这个时候就会拿到基础的整合结果。
    下面就是基础的降维聚类差异富集了。
    ##CCA
    combined_CCA <- combined_CCA %>% 
        RunUMAP(reduction = "CCA", dims = 1:20) %>% 
        FindNeighbors(reduction = "CCA", dims = 1:20) %>% 
        FindClusters(resolution = 0.4) %>% 
        identity()
    
    ####Harmony
    combined_harmony <- combined_harmony %>% 
        RunUMAP(reduction = "harmony", dims = 1:20) %>% 
        FindNeighbors(reduction = "harmony", dims = 1:20) %>% 
        FindClusters(resolution = 0.4) %>% 
        identity()
    
    
    差异富集大家自己做,然后我们空间展示
    DimPlot(combined_harmony, cols=color.labels, pt.size=3.5)
    
    结合空间信息进行调整
    SpatialDimPlot(combined_harmony,cols=color.labels,pt.size.factor = 2.5)
    

    基础分析是一切的基础,通常在基础分析之上,会加上NMF寻找programs

    library (nmf)
    m <- as.matrix(combined_harmony)####注意这是抽取矩阵
    res <- NMF::nmf(x = m, rank = rank_lb:rank_ub, nrun = 5, method = "snmf/r", .opt = list(debug=F, parallel=F, shared.memory=F, verbose=T))
    
    #robust_nmf_programs function
    
    robust_nmf_programs <- function(nmf_programs, intra_min = 30, intra_max = 10, inter_filter=T, inter_min = 10) {
      
      # Select NMF programs based on the minimum overlap with other NMF programs from the same sample
      intra_intersect <- lapply(nmf_programs, function(z) apply(z, 2, function(x) apply(z, 2, function(y) length(intersect(x,y))))) 
      intra_intersect_max <- lapply(intra_intersect, function(x) apply(x, 2, function(y) sort(y, decreasing = T)[2]))             
      nmf_sel <- lapply(names(nmf_programs), function(x) nmf_programs[[x]][,intra_intersect_max[[x]]>=intra_min]) 
      names(nmf_sel) <- names(nmf_programs)
      
      # Select NMF programs based on i) the maximum overlap with other NMF programs from the same sample and
      # ii) the minimum overlap with programs from another sample
      nmf_sel_unlist <- do.call(cbind, nmf_sel)
      inter_intersect <- apply(nmf_sel_unlist , 2, function(x) apply(nmf_sel_unlist , 2, function(y) length(intersect(x,y)))) ## calculating intersection between all programs
      
      final_filter <- NULL 
      for(i in names(nmf_sel)) {
        a <- inter_intersect[grep(i, colnames(inter_intersect), invert = T),grep(i, colnames(inter_intersect))]
        b <- sort(apply(a, 2, max), decreasing = T) # for each sample, ranks programs based on their maximum overlap with programs of other samples
        if(inter_filter==T) b <- b[b>=inter_min] # selects programs with a maximum intersection of at least 10
        if(length(b) > 1) {
          c <- names(b[1]) 
          for(y in 2:length(b)) {
            if(max(inter_intersect[c,names(b[y])]) <= intra_max) c <- c(c,names(b[y])) # selects programs iteratively from top-down. Only selects programs that have a intersection smaller than 10 with a previously selected programs
          }
          final_filter <- c(final_filter, c)
        } else {
          final_filter <- c(final_filter, names(b))
        }
      }
      return(final_filter)                                                      
    }
    
    ## Create list of NMF matrics where each sample is an entry
    prog_genes_ls <- list()
    for(i in seq_along(sample_ls)) {
      nmf_obj <- readRDS(paste(path, sample_ls[[i]], sep = "/"))
      samp_name <- stringr::str_split(sample_ls[[i]], pattern = "_")[[1]][[1]]
      nmf_mats <- c()
      for(j in names(nmf_obj$fit)) {
        get_basis <- basis(nmf_obj$fit[[j]])
        colnames(get_basis)  <- paste0(samp_name, ".", j, ".", 1:j)
        nmf_mats <- cbind(nmf_mats, get_basis)
      }
      prog_genes_ls[[i]] <- nmf_mats
      names(prog_genes_ls)[i] <- paste0(samp_name)
      rm(nmf_obj, nmf_mats, get_basis)
    }
    Genes_nmf_w_basis <- do.call(c, list(prog_genes_ls))
    
    # Find robust NMFs
    # get gene programs (top 50 genes by NMF score)
    nmf_programs_sig <- lapply(prog_genes_ls, function(x) apply(x, 2, function(y) names(sort(y, decreasing = T))[1:50]))
    
    # for each sample, select robust NMF programs (i.e. observed using different ranks in the same sample), remove redundancy due to multiple ranks, and apply a filter based on the similarity to programs from other samples. 
    nmf_filter_all <- robust_nmf_programs(nmf_programs_sig, intra_min = 35, intra_max = 10, inter_filter=T, inter_min = 10)
    nmf_programs_sig <- lapply(nmf_programs_sig, function(x) x[, is.element(colnames(x), nmf_filter_all),drop=F])
    nmf_programs_sig <- do.call(cbind, nmf_programs_sig)
    
    #leiden clusters
    all_leiden_programs <- do.call(cbind, all_leiden_programs)
    nmf_programs_sig<-cbind(nmf_programs_sig,all_leiden_programs)
    
    # calculate similarity between programs
    nmf_intersect <- apply(nmf_programs_sig , 2, function(x) apply(nmf_programs_sig , 2, function(y) length(intersect(x,y)))) 
    
    # hierarchical clustering of the similarity matrix 
    nmf_intersect_hc_all <- hclust(as.dist(50-nmf_intersect), method="average") 
    nmf_intersect_hc_all <- reorder(as.dendrogram(nmf_intersect_hc_all), colMeans(nmf_intersect))
    nmf_intersect         <- nmf_intersect[order.dendrogram(nmf_intersect_hc_all), order.dendrogram(nmf_intersect_hc_all)]
    
    nmf_intersect<-readRDS("MP/NMF/nmf_intersect_124.RDS")
    nmf_programs_sig<-readRDS("MP/NMF/nmf_programs_sig_124.RDS")
    
    
    ### use a clustering approach that updates MPs in each iteration 
    
    nmf_intersect_KEEP    <- nmf_intersect
    nmf_programs_sig_KEEP <- nmf_programs_sig
    
    
    ### Parameters (later change to function form)v1-keep!
    
    Min_intersect_initial <- 12  # the minimal intersection cutoff for defining the Founder NMF program of a cluster
    Min_intersect_cluster <- 12 # the minimal intersection cuttof for adding a new NMF to the forming cluster 
    Min_group_size        <- 4     # the minimal group size to consider for defining the Founder_NMF of a MP 
    
    Sorted_intersection       <-  sort(apply(nmf_intersect , 2, function(x) (length(which(x>=Min_intersect_initial))-1)  ) , decreasing = TRUE)
    
    Cluster_list <- list()   ### Every entry contains the NMFs of a chosec cluster
    k <- 1
    Curr_cluster <- c()
    MP_list      <- list()
    
    while (Sorted_intersection[1]>Min_group_size) {   
      
      Curr_cluster <- c(Curr_cluster , names(Sorted_intersection[1]))
      
      ### intersection between all remaining NMFs and Genes in MP 
      Genes_MP                   <- nmf_programs_sig[,names(Sorted_intersection[1])] # initial genes are those in the first NMF. Genes_MP always has only 50 genes consisting of the current MP
      nmf_programs_sig           <- nmf_programs_sig[,-match(names(Sorted_intersection[1]) , colnames(nmf_programs_sig))]  # remove selected NMF
      Intersection_with_Genes_MP <- sort(apply(nmf_programs_sig, 2, function(x) length(intersect(Genes_MP,x))) , decreasing = TRUE) # intersection between all other NMFs and Genes_MP  
      NMF_history                <- Genes_MP  # has all genes in all NMFs in the current cluster, for newly defining Genes_MP after adding a new NMF 
      
      ### Create gene list - composed of intersecting genes in descending order. Update Curr_cluster each time.
      
      while ( Intersection_with_Genes_MP[1] >= Min_intersect_cluster) {   ### Define current cluster 
        
        Curr_cluster  <- c(Curr_cluster , names(Intersection_with_Genes_MP)[1])
        
        Genes_MP_temp   <- sort(table(c(NMF_history , nmf_programs_sig[,names(Intersection_with_Genes_MP)[1]])), decreasing = TRUE)   ## Genes_MP is newly defined each time according to all NMFs in the current cluster 
        Genes_at_border <- Genes_MP_temp[which(Genes_MP_temp == Genes_MP_temp[50])]   ### genes with overlap equal to the 50th gene
        
        if (length(Genes_at_border)>1){
          ### Sort last genes in Genes_at_border according to maximal NMF gene scores
          ### Run over all NMF programs in Curr_cluster and extract NMF scores for each gene
          Genes_curr_NMF_score <- c()
          for (i in Curr_cluster) {
            curr_study           <- strsplit(i, "[.]")[[1]][[1]]
            Q                    <- Genes_nmf_w_basis[[curr_study]][ match(names(Genes_at_border),toupper(rownames(Genes_nmf_w_basis[[curr_study]])))[!is.na(match(names(Genes_at_border),toupper(rownames(Genes_nmf_w_basis[[curr_study]]))))]   ,i] 
            #names(Q)             <- names(Genes_at_border[!is.na(match(names(Genes_at_border),toupper(rownames(Genes_nmf_w_basis[[curr_study]]))))])  ### sometimes when adding genes the names do not appear 
            Genes_curr_NMF_score <- c(Genes_curr_NMF_score,  Q )
          }
          Genes_curr_NMF_score_sort <- sort(Genes_curr_NMF_score , decreasing = TRUE)
          Genes_curr_NMF_score_sort <- Genes_curr_NMF_score_sort[unique(names(Genes_curr_NMF_score_sort))]   ## take only the maximal score of each gene - which is the first entry after sorting
          
          Genes_MP_temp <- c(names(Genes_MP_temp[which(Genes_MP_temp > Genes_MP_temp[50])]) , names(Genes_curr_NMF_score_sort))
          
        } else {
          Genes_MP_temp <- names(Genes_MP_temp)[1:50] 
        }
        
        NMF_history   <- c(NMF_history , nmf_programs_sig[,names(Intersection_with_Genes_MP)[1]]) 
        Genes_MP <- Genes_MP_temp[1:50]
        
        nmf_programs_sig      <- nmf_programs_sig[,-match(names(Intersection_with_Genes_MP)[1] , colnames(nmf_programs_sig))]  # remove selected NMF
        
        Intersection_with_Genes_MP <- sort(apply(nmf_programs_sig, 2, function(x) length(intersect(Genes_MP,x))) , decreasing = TRUE) # intersection between all other NMFs and Genes_MP  
        
      }
      
      Cluster_list[[paste0("Cluster_",k)]] <- Curr_cluster
      MP_list[[paste0("MP_",k)]]           <- Genes_MP
      k <- k+1
      
      
      nmf_intersect             <- nmf_intersect[-match(Curr_cluster,rownames(nmf_intersect) ) , -match(Curr_cluster,colnames(nmf_intersect) ) ]  # remove current chosen cluster
      
      Sorted_intersection       <-  sort(apply(nmf_intersect , 2, function(x) (length(which(x>=Min_intersect_initial))-1)  ) , decreasing = TRUE)   ### Sort intersection of remaining NMFs not included in any of the previous clusters
      
      Curr_cluster <- c()
      print(dim(nmf_intersect)[2])
    }
    
    #### *****  Sort Jaccard similarity plot according to new clusters:
    
    inds_sorted <- c()
    
    for (j in 1:length(Cluster_list)){
      
      inds_sorted <- c(inds_sorted , match(Cluster_list[[j]] , colnames(nmf_intersect_KEEP)))
      
    }
    inds_new <- c(inds_sorted   ,   which(is.na( match(1:dim(nmf_intersect_KEEP)[2],inds_sorted)))) ### combine inds in clusters with inds without clusters
    
    
    # plot re-ordered similarity matrix heatmap     
    nmf_intersect_meltI_NEW <- reshape2::melt(nmf_intersect_KEEP[inds_new,inds_new]) 
    
    custom_magma <- c(colorRampPalette(c("white", rev(magma(323, begin = 0.15))[1]))(10), rev(magma(323, begin = 0.18)))
    
    
    ggplot(data = nmf_intersect_meltI_NEW, aes(x=Var1, y=Var2, fill=100*value/(100-value), color=100*value/(100-value))) + 
      geom_tile() + 
      scale_color_gradient2(limits=c(2,25), low=custom_magma[1:111],  mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 13.5, oob=squish, name="Similarity\n(Jaccard index)") +                                
      scale_fill_gradient2(limits=c(2,25), low=custom_magma[1:111],  mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 13.5, oob=squish, name="Similarity\n(Jaccard index)")  +
      theme( axis.ticks = element_blank(), panel.border = element_rect(fill=F), panel.background = element_blank(),  axis.line = element_blank(), axis.text = element_text(size = 11), axis.title = element_text(size = 12), legend.title = element_text(size=11), legend.text = element_text(size = 10), legend.text.align = 0.5, legend.justification = "bottom") + 
      theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) + 
      theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + 
      guides(fill = guide_colourbar(barheight = 4, barwidth = 1))
    
    ggplot(data = nmf_intersect_meltI_NEW, aes(x=Var1, y=Var2, fill=100*value/(100-value), color=100*value/(100-value))) + 
      geom_tile() + 
      scale_color_gradient2(limits=c(2,30), low=custom_magma[1:111],  mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 16, oob=squish, name="Similarity\n(Jaccard index)") +                                
      scale_fill_gradient2(limits=c(2,30), low=custom_magma[1:111],  mid =custom_magma[112:222], high = custom_magma[223:333], midpoint = 16, oob=squish, name="Similarity\n(Jaccard index)")  +
      theme( axis.ticks = element_blank(), panel.border = element_rect(fill=F), panel.background = element_blank(),  axis.line = element_blank(), axis.text = element_text(size = 11), axis.title = element_text(size = 12), legend.title = element_text(size=11), legend.text = element_text(size = 10), legend.text.align = 0.5, legend.justification = "bottom") + 
      theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) + 
      theme(axis.title.y=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank()) + 
      guides(fill = guide_colourbar(barheight = 4, barwidth = 1))
    

    下一步我们就需要考虑空间注释的问题

    生活很好,有你更好

    相关文章

      网友评论

          本文标题:空间转录组合作项目分析示例(二)

          本文链接:https://www.haomeiwen.com/subject/znwokjtx.html