美文网首页单细胞测序技术ggplot2高级绘图操作单细胞数据分析
10X单细胞(10X空间转录组)分析回顾之一些细节绘图操作

10X单细胞(10X空间转录组)分析回顾之一些细节绘图操作

作者: 单细胞空间交响乐 | 来源:发表于2022-01-26 10:30 被阅读0次

    临近年底,我们要总结,马上回家了,大家今天到底挣了点什么??反省一下,今天的自己,有没有更优秀,这一篇我给大家分享一些单细胞分析图片的细节操作,图片是我们分析结果的展示,要美观大方,就算我们过年要相亲了,自己也要打扮的精精神神的,😄

    图片.png

    第一张图,Cluster之间的相关性

    sce <- as.SingleCellExperiment(seurat)
    reducedDim(sce, 'PCA_sub') <- reducedDim(sce, 'PCA')[,1:15, drop = FALSE]
    
    ass_prob <- bootstrapCluster(sce, FUN = function(x) {
        g <- buildSNNGraph(x, use.dimred = 'PCA_sub')
        igraph::cluster_walktrap(g)$membership
      },
      clusters = sce$seurat_clusters
    )
    
    p <- ass_prob %>%
      as_tibble() %>%
      rownames_to_column(var = 'cluster_1') %>%
      pivot_longer(
        cols = 2:ncol(.),
        names_to = 'cluster_2',
        values_to = 'probability'
      ) %>%
      mutate(
        cluster_1 = as.character(as.numeric(cluster_1) - 1),
        cluster_1 = factor(cluster_1, levels = rev(unique(cluster_1))),
        cluster_2 = factor(cluster_2, levels = unique(cluster_2))
      ) %>%
      ggplot(aes(cluster_2, cluster_1, fill = probability)) +
      geom_tile(color = 'white') +
      geom_text(aes(label = round(probability, digits = 2)), size = 2.5) +
      scale_x_discrete(name = 'Cluster', position = 'top') +
      scale_y_discrete(name = 'Cluster') +
      scale_fill_gradient(
        name = 'Probability', low = 'white', high = '#c0392b', na.value = '#bdc3c7',
        limits = c(0,1),
        guide = guide_colorbar(
          frame.colour = 'black', ticks.colour = 'black', title.position = 'left',
          title.theme = element_text(hjust = 1, angle = 90),
          barwidth = 0.75, barheight = 10
        )
      ) +
      coord_fixed() +
      theme_bw() +
      theme(
        legend.position = 'right',
        panel.grid.major = element_blank()
      )
    
    ggsave('plots/cluster_stability.png', p, height = 6, width = 7)
    
    图片.png

    Silhouette plot

    library(cluster)
    
    distance_matrix <- dist(Embeddings(seurat[['pca']])[, 1:15])
    clusters <- seurat@meta.data$seurat_clusters
    silhouette <- silhouette(as.numeric(clusters), dist = distance_matrix)
    seurat@meta.data$silhouette_score <- silhouette[,3]
    
    mean_silhouette_score <- mean(seurat@meta.data$silhouette_score)
    
    p <- seurat@meta.data %>%
      mutate(barcode = rownames(.)) %>%
      arrange(seurat_clusters,-silhouette_score) %>%
      mutate(barcode = factor(barcode, levels = barcode)) %>%
      ggplot() +
      geom_col(aes(barcode, silhouette_score, fill = seurat_clusters), show.legend = FALSE) +
      geom_hline(yintercept = mean_silhouette_score, color = 'red', linetype = 'dashed') +
      scale_x_discrete(name = 'Cells') +
      scale_y_continuous(name = 'Silhouette score') +
      scale_fill_manual(values = custom_colors$discrete) +
      theme_bw() +
      theme(
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()
      )
    ggsave('plots/silhouette_plot.png', p, height = 4, width = 8)
    
    图片.png

    Cluster tree

    seurat <- BuildClusterTree(
      seurat,
      dims = 1:15,
      reorder = FALSE,
      reorder.numeric = FALSE
    )
    
    tree <- seurat@tools$BuildClusterTree
    tree$tip.label <- paste0("Cluster ", tree$tip.label)
    
    p <- ggtree::ggtree(tree, aes(x, y)) +
      scale_y_reverse() +
      ggtree::geom_tree() +
      ggtree::theme_tree() +
      ggtree::geom_tiplab(offset = 1) +
      ggtree::geom_tippoint(color = custom_colors$discrete[1:length(tree$tip.label)], shape = 16, size = 5) +
      coord_cartesian(clip = 'off') +
      theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
    
    ggsave('plots/cluster_tree.png', p, height = 4, width = 6)
    
    图片.png

    比例图

    temp_labels <- seurat@meta.data %>%
      group_by(sample) %>%
      tally()
    
    p1 <- table_samples_by_clusters %>%
      select(-c('total_cell_count')) %>%
      reshape2::melt(id.vars = 'sample') %>%
      mutate(sample = factor(sample, levels = levels(seurat@meta.data$sample))) %>%
      ggplot(aes(sample, value)) +
      geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
      geom_text(
        data = temp_labels,
        aes(x = sample, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
        color = 'black', size = 2.8
      ) +
      scale_fill_manual(name = 'Cluster', values = custom_colors$discrete) +
      scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
      coord_cartesian(clip = 'off') +
      theme_bw() +
      theme(
        legend.position = 'left',
        plot.title = element_text(hjust = 0.5),
        text = element_text(size = 16),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        plot.margin = margin(t = 20, r = 0, b = 0, l = 0, unit = 'pt')
      )
    
    temp_labels <- seurat@meta.data %>%
      group_by(seurat_clusters) %>%
      tally() %>%
      dplyr::rename('cluster' = seurat_clusters)
    
    p2 <- table_clusters_by_samples %>%
      select(-c('total_cell_count')) %>%
      reshape2::melt(id.vars = 'cluster') %>%
      mutate(cluster = factor(cluster, levels = levels(seurat@meta.data$seurat_clusters))) %>%
      ggplot(aes(cluster, value)) +
      geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
      geom_text(
        data = temp_labels, aes(x = cluster, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
        color = 'black', size = 2.8
      ) +
      scale_fill_manual(name = 'Sample', values = custom_colors$discrete) +
      scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
      coord_cartesian(clip = 'off') +
      theme_bw() +
      theme(
        legend.position = 'right',
        plot.title = element_text(hjust = 0.5),
        text = element_text(size = 16),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.title = element_blank(),
        plot.margin = margin(t = 20, r = 0, b = 0, l = 10, unit = 'pt')
      )
    
    ggsave(
      'plots/composition_samples_clusters_by_percent.png',
      p1 + p2 +
      plot_layout(ncol = 2, widths = c(
        seurat@meta.data$sample %>% unique() %>% length(),
        seurat@meta.data$seurat_clusters %>% unique() %>% length()
      )),
      width = 18, height = 8
    )
    
    图片.png
    temp_labels <- seurat@meta.data %>%
      group_by(sample) %>%
      tally()
    
    p1 <- table_samples_by_cell_cycle %>%
      select(-c('total_cell_count')) %>%
      reshape2::melt(id.vars = 'sample') %>%
      mutate(sample = factor(sample, levels = levels(seurat@meta.data$sample))) %>%
      ggplot(aes(sample, value)) +
      geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
      geom_text(
        data = temp_labels,
        aes(x = sample, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
        color = 'black', size = 2.8
      ) +
      scale_fill_manual(name = 'Cell cycle', values = custom_colors$cell_cycle) +
      scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
      coord_cartesian(clip = 'off') +
      theme_bw() +
      theme(
        legend.position = 'none',
        plot.title = element_text(hjust = 0.5),
        text = element_text(size = 16),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        plot.margin = margin(t = 20, r = 0, b = 0, l = 0, unit = 'pt')
      )
    
    temp_labels <- seurat@meta.data %>%
      group_by(seurat_clusters) %>%
      tally() %>%
      dplyr::rename('cluster' = seurat_clusters)
    
    p2 <- table_clusters_by_cell_cycle %>%
      select(-c('total_cell_count')) %>%
      reshape2::melt(id.vars = 'cluster') %>%
      mutate(cluster = factor(cluster, levels = levels(seurat@meta.data$seurat_clusters))) %>%
      ggplot(aes(cluster, value)) +
      geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
      geom_text(
        data = temp_labels,
        aes(x = cluster, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
        color = 'black', size = 2.8
      ) +
      scale_fill_manual(name = 'Cell cycle', values = custom_colors$cell_cycle) +
      scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
      coord_cartesian(clip = 'off') +
      theme_bw() +
      theme(
        legend.position = 'right',
        plot.title = element_text(hjust = 0.5),
        text = element_text(size = 16),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.title = element_blank(),
        plot.margin = margin(t = 20, r = 0, b = 0, l = 10, unit = 'pt')
      )
    
    ggsave(
      'plots/composition_samples_clusters_by_cell_cycle_by_percent.png',
      p1 + p2 +
      plot_layout(ncol = 2, widths = c(
        seurat@meta.data$sample %>% unique() %>% length(),
        seurat@meta.data$seurat_clusters %>% unique() %>% length()
      )),
      width = 18, height = 8
    )
    
    图片.png

    SNN graph

    library(ggnetwork)
    
    SCT_snn <- seurat@graphs$SCT_snn %>%
      as.matrix() %>%
      ggnetwork() %>%
      left_join(seurat@meta.data %>% mutate(vertex.names = rownames(.)), by = 'vertex.names')
    
    p1 <- ggplot(SCT_snn, aes(x = x, y = y, xend = xend, yend = yend)) +
      geom_edges(color = 'grey50', alpha = 0.05) +
      geom_nodes(aes(color = sample), size = 0.5) +
      scale_color_manual(
        name = 'Sample', values = custom_colors$discrete,
        guide = guide_legend(ncol = 1, override.aes = list(size = 2))
      ) +
      theme_blank() +
      theme(legend.position = 'left') +
      annotate(
        geom = 'text', x = Inf, y = -Inf,
        label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
        vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
      )
    
    p2 <- ggplot(SCT_snn, aes(x = x, y = y, xend = xend, yend = yend)) +
      geom_edges(color = 'grey50', alpha = 0.05) +
      geom_nodes(aes(color = seurat_clusters), size = 0.5) +
      scale_color_manual(
        name = 'Cluster', values = custom_colors$discrete,
        guide = guide_legend(ncol = 1, override.aes = list(size = 2))
      ) +
      theme_blank() +
      theme(legend.position = 'right') +
      annotate(
        geom = 'text', x = Inf, y = -Inf,
        label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
        vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
      )
    
    ggsave(
      'plots/snn_graph_by_sample_cluster.png',
      p1 + p2 + plot_layout(ncol = 2),
      height = 5, width = 11
    )
    
    图片.png
    UMAP图
    plot_umap_by_nCount <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
      ggplot(aes(UMAP_1, UMAP_2, color = nCount_RNA)) +
      geom_point(size = 0.2) +
      theme_bw() +
      scale_color_viridis(
        guide = guide_colorbar(frame.colour = 'black', ticks.colour = 'black'),
        labels = scales::comma,
      ) +
      labs(color = 'Number of\ntranscripts') +
      theme(legend.position = 'left') +
      coord_fixed() +
      annotate(
        geom = 'text', x = Inf, y = -Inf,
        label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
        vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
      )
    
    plot_umap_by_sample <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
      ggplot(aes(UMAP_1, UMAP_2, color = sample)) +
      geom_point(size = 0.2) +
      theme_bw() +
      scale_color_manual(values = custom_colors$discrete) +
      labs(color = 'Sample') +
      guides(colour = guide_legend(override.aes = list(size = 2))) +
      theme(legend.position = 'right') +
      coord_fixed() +
      annotate(
        geom = 'text', x = Inf, y = -Inf,
        label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
        vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
      )
    
    plot_umap_by_cluster <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
      ggplot(aes(UMAP_1, UMAP_2, color = seurat_clusters)) +
      geom_point(size = 0.2) +
      theme_bw() +
      scale_color_manual(
        name = 'Cluster', values = custom_colors$discrete,
        guide = guide_legend(ncol = 2, override.aes = list(size = 2))
      ) +
      theme(legend.position = 'left') +
      coord_fixed() +
      annotate(
        geom = 'text', x = Inf, y = -Inf,
        label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
        vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
      )
    
    plot_umap_by_cell_cycle <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
      ggplot(aes(UMAP_1, UMAP_2, color = cell_cycle_seurat)) +
      geom_point(size = 0.2) +
      theme_bw() +
      scale_color_manual(values = custom_colors$cell_cycle) +
      labs(color = 'Cell cycle') +
      guides(colour = guide_legend(override.aes = list(size = 2))) +
      theme(legend.position = 'right') +
      coord_fixed() +
      annotate(
        geom = 'text', x = Inf, y = -Inf,
        label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
        vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
      )
    
    ggsave(
      'plots/umap.png',
      plot_umap_by_nCount + plot_umap_by_sample +
      plot_umap_by_cluster + plot_umap_by_cell_cycle +
      plot_layout(ncol = 2),
      height = 6,
      width = 8.5
    )
    
    图片.png
    Alluvial plots
    ## get sample and cluster names
    samples <- levels(seurat@meta.data$sample)
    clusters <- levels(seurat@meta.data$seurat_clusters)
    
    ## create named vector holding the color assignments for both samples and
    ## clusters
    color_assignments <- setNames(
      c(custom_colors$discrete[1:length(samples)], custom_colors$discrete[1:length(clusters)]),
      c(samples,clusters)
    )
    
    ## prepare data for the plot; factor() calls are necessary for the right order
    ## of columns (first samples then clusters) and boxes within each column (
    ## cluster 1, 2, 3, ..., not 1, 10, 11, ...)
    data <- seurat@meta.data %>%
      group_by(sample,seurat_clusters) %>%
      tally() %>%
      ungroup() %>%
      gather_set_data(1:2) %>%
      dplyr::mutate(
        x = factor(x, levels = unique(x)),
        y = factor(y, levels = unique(y))
      )
    
    DataFrame(data)
    # DataFrame with 114 rows and 6 columns
    #       sample seurat_clusters         n        id               x        y
    #     <factor>        <factor> <integer> <integer>        <factor> <factor>
    # 1          A               0       301         1          sample        A
    # 2          A               1       137         2          sample        A
    # 3          A               2       121         3          sample        A
    # 4          A               3       223         4          sample        A
    # 5          A               4        78         5          sample        A
    # ...      ...             ...       ...       ...             ...      ...
    # 110        E              14        19        53 seurat_clusters       14
    # 111        E              15        18        54 seurat_clusters       15
    # 112        E              16        10        55 seurat_clusters       16
    # 113        E              17         9        56 seurat_clusters       17
    # 114        E              18        15        57 seurat_clusters       18
    
    ## create sample and cluster labels; hjust defines whether a label will be
    ## aligned to the right (1) or to the left (0); the nudge_x parameter is used
    ## to move the label outside of the boxes
    data_labels <- tibble(
        group = c(
          rep('sample', length(samples)),
          rep('seurat_clusters', length(clusters))
        )
     ) %>%
      mutate(
        hjust = ifelse(group == 'sample', 1, 0),
        nudge_x = ifelse(group == 'sample', -0.1, 0.1)
      )
    
    DataFrame(data_labels)
    # DataFrame with 22 rows and 3 columns
    #               group     hjust   nudge_x
    #         <character> <numeric> <numeric>
    # 1            sample         1      -0.1
    # 2            sample         1      -0.1
    # 3            sample         1      -0.1
    # 4   seurat_clusters         0       0.1
    # 5   seurat_clusters         0       0.1
    # ...             ...       ...       ...
    # 18  seurat_clusters         0       0.1
    # 19  seurat_clusters         0       0.1
    # 20  seurat_clusters         0       0.1
    # 21  seurat_clusters         0       0.1
    # 22  seurat_clusters         0       0.1
    
    ## create plot
    p1 <- ggplot(data, aes(x, id = id, split = y, value = n)) +
      geom_parallel_sets(aes(fill = seurat_clusters), alpha = 0.75, axis.width = 0.15) +
      geom_parallel_sets_axes(aes(fill = y), color = 'black', axis.width = 0.1) +
      geom_text(
        aes(y = n, split = y), stat = 'parallel_sets_axes', fontface = 'bold',
        hjust = data_labels$hjust, nudge_x = data_labels$nudge_x
      ) +
      scale_x_discrete(labels = c('Sample','Cluster')) +
      scale_fill_manual(values = color_assignments) +
      theme_bw() +
      theme(
        legend.position = 'none',
        axis.title = element_blank(),
        axis.text.x = element_text(face = 'bold', colour = 'black', size = 15),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank()
      )
    
    clusters <- levels(seurat@meta.data$seurat_clusters)
    cell_types <- sort(unique(seurat@meta.data$cell_type_singler_blueprintencode_main))
    
    color_assignments <- setNames(
      c(custom_colors$discrete[1:length(clusters)], custom_colors$discrete[1:length(cell_types)]),
      c(clusters,cell_types)
    )
    
    data <- seurat@meta.data %>%
      dplyr::rename(cell_type = cell_type_singler_blueprintencode_main) %>%
      dplyr::mutate(cell_type = factor(cell_type, levels = cell_types)) %>%
      group_by(seurat_clusters, cell_type) %>%
      tally() %>%
      ungroup() %>%
      gather_set_data(1:2) %>%
      dplyr::mutate(
        x = factor(x, levels = unique(x)),
        y = factor(y, levels = c(clusters,cell_types))
      )
    
    data_labels <- tibble(
        group = c(
          rep('seurat_clusters', length(clusters)),
          rep('cell_type', length(cell_types))
        )
     ) %>%
      mutate(
        hjust = ifelse(group == 'seurat_clusters', 1, 0),
        nudge_x = ifelse(group == 'seurat_clusters', -0.1, 0.1)
      )
    
    p2 <- ggplot(data, aes(x, id = id, split = y, value = n)) +
      geom_parallel_sets(aes(fill = seurat_clusters), alpha = 0.75, axis.width = 0.15) +
      geom_parallel_sets_axes(aes(fill = y), color = 'black', axis.width = 0.1) +
      geom_text(
        aes(y = n, split = y), stat = 'parallel_sets_axes', fontface = 'bold',
        hjust = data_labels$hjust, nudge_x = data_labels$nudge_x
      ) +
      scale_x_discrete(labels = c('Cluster','Cell type')) +
      scale_fill_manual(values = color_assignments) +
      theme_bw() +
      theme(
        legend.position = 'none',
        axis.title = element_blank(),
        axis.text.x = element_text(face = 'bold', colour = 'black', size = 15),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank()
      )
    
    ggsave(
      'plots/samples_clusters_cell_types_alluvial.png',
      p1 + p2 + plot_layout(ncol = 2),
      height = 6, width = 8
    )
    
    图片.png

    UMAP by cell type

    UMAP_centers_cell_type <- tibble(
        UMAP_1 = as.data.frame(seurat@reductions$UMAP@cell.embeddings)$UMAP_1,
        UMAP_2 = as.data.frame(seurat@reductions$UMAP@cell.embeddings)$UMAP_2,
        cell_type = seurat@meta.data$cell_type_singler_blueprintencode_main
      ) %>%
      group_by(cell_type) %>%
      summarize(x = median(UMAP_1), y = median(UMAP_2))
    
    p <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
      ggplot(aes(UMAP_1, UMAP_2, color = cell_type_singler_blueprintencode_main)) +
      geom_point(size = 0.2) +
      geom_label(
        data = UMAP_centers_cell_type,
        mapping = aes(x, y, label = cell_type),
        size = 3.5,
        fill = 'white',
        color = 'black',
        fontface = 'bold',
        alpha = 0.5,
        label.size = 0,
        show.legend = FALSE
      ) +
      theme_bw() +
      expand_limits(x = c(-22,15)) +
      scale_color_manual(values = custom_colors$discrete) +
      labs(color = 'Cell type') +
      guides(colour = guide_legend(override.aes = list(size = 2))) +
      theme(legend.position = 'right') +
      coord_fixed() +
      annotate(
        geom = 'text', x = Inf, y = -Inf,
        label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
        vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
      )
    
    ggsave(
      'plots/umap_by_cell_type_singler_blueprintencode_main.png',
      p,
      height = 4,
      width = 6
    )
    
    图片.png
    Dot plot (multiple genes)
    # cells will be grouped by samples that they have been assigned to
    group_ids <- unique(seurat@meta.data$cell_type_singler_blueprintencode_main)
    
    # select a set of genes for which we want to show expression
    genes_to_show <- seurat@misc$marker_genes$cerebro_seurat$cell_type_singler_blueprintencode_main %>%
      group_by(cell_type_singler_blueprintencode_main) %>%
      arrange(p_val_adj) %>%
      filter(row_number() == 1) %>%
      arrange(cell_type_singler_blueprintencode_main) %>%
      pull(gene)
    
    # for every sample-gene combination, calculate the average expression across
    # all cells and then transform the data into a data frame
    expression_levels_per_group <- vapply(
        group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
          cells_in_current_group <- which(seurat@meta.data$cell_type_singler_blueprintencode_main == x)
          Matrix::rowMeans(seurat@assays$SCT@data[genes_to_show,cells_in_current_group])
        }
      ) %>%
      t() %>%
      as.data.frame() %>%
      mutate(cell_type_singler_blueprintencode_main = rownames(.)) %>%
      select(cell_type_singler_blueprintencode_main, everything()) %>%
      pivot_longer(
        cols = c(2:ncol(.)),
        names_to = 'gene'
      ) %>%
      dplyr::rename(expression = value) %>%
      mutate(id_to_merge = paste0(cell_type_singler_blueprintencode_main, '_', gene))
    
    # for every sample-gene combination, calculate the percentage of cells in the
    # respective group that has at least 1 transcript (this means we consider it
    # as expressing the gene) and then transform the data into a data frame
    percentage_of_cells_expressing_gene <- vapply(
        group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
          cells_in_current_group <- which(seurat@meta.data$cell_type_singler_blueprintencode_main == x)
          Matrix::rowSums(seurat@assays$SCT@data[genes_to_show,cells_in_current_group] != 0)
        }
      ) %>%
      t() %>%
      as.data.frame() %>%
      mutate(cell_type_singler_blueprintencode_main = rownames(.)) %>%
      select(cell_type_singler_blueprintencode_main, everything()) %>%
      pivot_longer(
        cols = c(2:ncol(.)),
        names_to = 'gene'
      ) %>%
      dplyr::rename(cell_count = value) %>%
      left_join(
        .,
        seurat@meta.data %>%
          group_by(cell_type_singler_blueprintencode_main) %>%
          tally(),
        by = 'cell_type_singler_blueprintencode_main') %>%
      mutate(
        id_to_merge = paste0(cell_type_singler_blueprintencode_main, '_', gene),
        percent_cells = cell_count / n
      )
    
    # merge the two data frames created before and plot the data
    p <- left_join(
        expression_levels_per_group,
        percentage_of_cells_expressing_gene %>% select(id_to_merge, percent_cells),
        by = 'id_to_merge'
      ) %>%
      mutate(
        cell_type_singler_blueprintencode_main = factor(cell_type_singler_blueprintencode_main, levels = rev(group_ids)),
        gene = factor(gene, levels = genes_to_show)
      ) %>%
      arrange(gene) %>%
      ggplot(aes(gene, cell_type_singler_blueprintencode_main)) +
      geom_point(aes(color = expression, size = percent_cells)) +
      scale_color_distiller(
        palette = 'Reds',
        direction = 1,
        name = 'Log-normalised\nexpression',
        guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")
      ) +
      scale_size(name = 'Percent\nof cells', labels = scales::percent) +
      labs(y = 'Cell type', color = 'Expression') +
      coord_fixed() +
      theme_bw() +
      theme(
        axis.title = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      )
    
    ggsave('plots/marker_genes_by_cell_type_as_dot_plot.png', p, height = 5, width = 6)
    
    图片.png
    group_ids <- levels(seurat@meta.data$seurat_clusters)
    
    genes_to_show <- seurat@misc$marker_genes$cerebro_seurat$seurat_clusters %>%
      group_by(seurat_clusters) %>%
      arrange(p_val_adj) %>%
      filter(row_number() == 1) %>%
      arrange(seurat_clusters) %>%
      pull(gene)
    
    expression_levels_per_group <- vapply(
        group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
          cells_in_current_group <- which(seurat@meta.data$seurat_clusters == x)
          Matrix::rowMeans(seurat@assays$SCT@data[genes_to_show,cells_in_current_group])
        }
      ) %>%
      t() %>%
      as.data.frame() %>%
      mutate(cluster = rownames(.)) %>%
      select(cluster, everything()) %>%
      pivot_longer(
        cols = c(2:ncol(.)),
        names_to = 'gene'
      ) %>%
      dplyr::rename(expression = value) %>%
      mutate(id_to_merge = paste0(cluster, '_', gene))
    
    percentage_of_cells_expressing_gene <- vapply(
        group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
          cells_in_current_group <- which(seurat@meta.data$seurat_cluster == x)
          Matrix::rowSums(seurat@assays$SCT@data[genes_to_show,cells_in_current_group] != 0)
        }
      ) %>%
      t() %>%
      as.data.frame() %>%
      mutate(cluster = rownames(.)) %>%
      select(cluster, everything()) %>%
      pivot_longer(
        cols = c(2:ncol(.)),
        names_to = 'gene'
      ) %>%
      dplyr::rename(cell_count = value) %>%
      left_join(
        .,
        seurat@meta.data %>%
          group_by(seurat_clusters) %>%
          tally() %>%
          dplyr::rename(cluster = seurat_clusters),
        by = 'cluster') %>%
      mutate(
        id_to_merge = paste0(cluster, '_', gene),
        percent_cells = cell_count / n
      )
    
    p <- left_join(
        expression_levels_per_group,
        percentage_of_cells_expressing_gene %>% select(id_to_merge, percent_cells),
        by = 'id_to_merge'
      ) %>%
      mutate(
        cluster = factor(cluster, levels = rev(group_ids)),
        gene = factor(gene, levels = genes_to_show)
      ) %>%
      arrange(gene) %>%
      ggplot(aes(gene, cluster)) +
      geom_point(aes(color = expression, size = percent_cells)) +
      scale_color_distiller(
        palette = 'Reds',
        direction = 1,
        name = 'Log-normalised\nexpression',
        guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")
      ) +
      scale_size(name = 'Percent\nof cells', labels = scales::percent) +
      labs(y = 'Cluster', color = 'Expression') +
      coord_fixed() +
      theme_bw() +
      theme(
        axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      )
    
    ggsave('plots/marker_genes_by_cluster_as_dot_plot.png', p, height = 7, width = 8)
    
    图片.png

    Gene set enrichment analysis

    # function to read GMT file
    read_GMT_file <- function(file) {
      gmt <- readr::read_delim(
        file,
        delim = ';',
        col_names = c('X1'),
        col_types = readr::cols()
      )
    
      gene_set_genes <- list()
      for ( i in seq_len(nrow(gmt)) )
      {
        temp_genes <- strsplit(gmt$X1[i], split = '\t')[[1]] %>% unlist()
        temp_genes <- temp_genes[3:length(temp_genes)]
        gene_set_genes[[i]] <- temp_genes
      }
      gene_set_loaded <- list(
        genesets = gene_set_genes,
        geneset.names = lapply(strsplit(gmt$X1, split = '\t'), '[', 1) %>% unlist(),
        geneset.description = lapply(
            strsplit(gmt$X1, split = '\t'), '[', 2
          ) %>% unlist()
      )
    
      return(gene_set_loaded)
    }
    
    # load gene sets from GMT file
    gene_sets <- read_GMT_file('h.all.v7.2.symbols.gmt')
    
    # set gene set names
    names(gene_sets$genesets) <- gene_sets$geneset.names
    
    # get indices of cells which are either HSC or monocytes
    cells_to_analyze <- seurat@meta.data %>%
      mutate(row_number = row_number()) %>%
      filter(grepl(cell_type_singler_blueprintencode_main, pattern = 'HSC|Monocytes')) %>%
      arrange(cell_type_singler_blueprintencode_main) %>%
      pull(row_number)
    
    # get list of genes unique genes across all gene sets
    genes_to_analyze <- gene_sets$genesets %>% unlist() %>% unique()
    # filter gene list for those which are present in the data set
    genes_to_analyze <- genes_to_analyze[which(genes_to_analyze %in% rownames(seurat@assays$SCT@counts))]
    
    # get expression matrix and reduce it to cells and genes of interest
    expression_matrix <- seurat@assays$SCT@counts[ genes_to_analyze , cells_to_analyze] %>% as.matrix()
    
    # perform GSVA
    gsva <- GSVA::gsva(
      expression_matrix,
      gset.idx.list = gene_sets$genesets,
      parallel.sz = 1
    )
    
    # load limma library for statistical testing
    library(limma)
    
    # generate design matrix
    design_matrix <- tibble(
      control = 1,
      test = c(
        rep(0, seurat@meta.data %>% filter(cell_type_singler_blueprintencode_main == 'HSC') %>% nrow()),
        rep(1, seurat@meta.data %>% filter(cell_type_singler_blueprintencode_main == 'Monocytes') %>% nrow())
      )
    )
    
    head(design_matrix)
    # A tibble: 6 x 2
    #   control  test
    #     <dbl> <dbl>
    # 1       1     0
    # 2       1     0
    # 3       1     0
    # 4       1     0
    # 5       1     0
    # 6       1     0
    
    # fit linear model, followed by empirical Bayes statistics for differential
    # enrichment analysis
    fit <- lmFit(gsva, design_matrix)
    fit <- eBayes(fit)
    
    # prepare data for plotting
    data <- topTable(fit, coef = 'test', number = 50) %>%
      mutate(gene_set = rownames(fit$t)) %>%
      arrange(t) %>%
      mutate(
        gene_set = factor(gene_set, levels = gene_set),
        just = ifelse(t < 0, 0, 1),
        nudge_y = ifelse(t < 0, 1, -1),
        color = ifelse(t < -5 | t > 5, 'black', 'grey')
      )
    
    DataFrame(data)
    # DataFrame with 50 rows and 10 columns
    #         logFC     AveExpr         t      P.Value    adj.P.Val         B                            gene_set      just   nudge_y       color
    #     <numeric>   <numeric> <numeric>    <numeric>    <numeric> <numeric>                            <factor> <numeric> <numeric> <character>
    # 1   -0.369443  -0.1569690  -35.1619 1.26066e-186 1.05055e-185   415.980  HALLMARK_TGF_BETA_SIGNALING                0         1       black
    # 2   -0.251413  -0.0820012  -27.2261 1.96152e-127 8.91598e-127   279.761  HALLMARK_NOTCH_SIGNALING                   0         1       black
    # 3   -0.288169  -0.1202293  -26.1464 1.40712e-119 5.41201e-119   261.689  HALLMARK_ESTROGEN_RESPONSE_EARLY           0         1       black
    # 4   -0.135034  -0.1511146  -22.3565  8.50194e-93  2.36165e-92   200.094  HALLMARK_INTERFERON_ALPHA_RESPONSE         0         1       black
    # 5   -0.203651  -0.1049498  -22.0728  7.44006e-91  1.95791e-90   195.628  HALLMARK_INTERFERON_GAMMA_RESPONSE         0         1       black
    # ...       ...         ...       ...          ...          ...       ...                                 ...       ...       ...         ...
    # 46   0.200259  0.25341594   35.9250 2.31901e-192 2.31901e-191   429.182 HALLMARK_WNT_BETA_CATENIN_SIGNALING         1        -1       black
    # 47   0.293113 -0.08115610   36.1995 2.00784e-194 2.50980e-193   433.929 HALLMARK_MITOTIC_SPINDLE                    1        -1       black
    # 48   0.338449  0.11583774   36.2560 7.56196e-195 1.26033e-193   434.906 HALLMARK_CHOLESTEROL_HOMEOSTASIS            1        -1       black
    # 49   0.251678  0.00262136   37.5117 2.86772e-204 7.16930e-203   456.592 HALLMARK_HYPOXIA                            1        -1       black
    # 50   0.282907 -0.05021404   48.5971 1.72523e-285 8.62616e-284   643.571 HALLMARK_TNFA_SIGNALING_VIA_NFKB            1        -1       black
    
    # plot t-value
    p <- ggplot(data = data, aes(x = gene_set, y = t, fill = t)) +
      geom_col() +
      geom_hline(yintercept = c(-5,5), linetype = 'dashed', color = 'grey80') +
      geom_text(
        aes(
          x = gene_set,
          y = 0,
          label = gene_set,
          hjust = just,
          color = color
        ),
        nudge_y = data$nudge_y, size = 3
      ) +
      scale_x_discrete(name = '', labels = NULL) +
      scale_y_continuous(name = 't-value', limits = c(-55,55)) +
      scale_fill_distiller(palette = 'Spectral', limits = c(-max(data$t), max(data$t))) +
      scale_color_manual(values = c('black' = 'black', 'grey' = 'grey')) +
      coord_flip() +
      theme_bw() +
      theme(
        panel.grid = element_blank(),
        axis.ticks.y =  element_blank(),
        legend.position = 'none'
      )
    
    ggsave('plots/gsva_hallmark_gene_sets_monocytes_vs_hsc.png', height = 7.5, width = 7.5)
    
    图片.png
    通讯热图
    library(pheatmap)
    library(argparse)
    
    heatmap <- function (Matrix,labels){
         plot_heatmap <- pheatmap(Matrix,scale='none',cluster_cols = F,cluster_rows = F,cellwidth = 20,cellheight = 20,color = colorRampPalette(colors = c("white","red"))(100),display_numbers=labels)
         return(plot_heatmap)
    }
    
    getSig <- function(dc) {
    if(dc > pvalue){sc <- ""}
    if(dc < pvalue){sc <- "*"}}
    
    parser = ArgumentParser()
    parser$add_argument("--LR_score", help="the file of sample.LR.score.xls",required =T)
    parser$add_argument("--pvalue", help="the file of sample.LR.score.xls",default = '0.1')
    parser$add_argument("--outdir", help="the outdir",default='.')
    args <- parser$parse_args()
    str(args)
    
    LR_score = args$LR_score
    pvalue = args$pvalue
    outdir = args$outdir
    
    pvalue <- as.numeric(pvalue)
    
    if(!dir.exists(outdir)){
      dir.create(outdir)
    }
    
    setwd(outdir)
    file <- read.table(LR_score,sep='\t',header=T,row.names=1,check.names=F)
    len <- as.numeric(length(colnames(file)))
    Max <- max(as.numeric(sapply(strsplit(colnames(file),'_'),"[",2)[1:len/2]))
    
    for (i in 1:Max){
    index_score <- as.array(which(sapply(strsplit(colnames(file),'_'),"[",1) == paste0('cluster',as.character(i))))
    index_pvalue <- as.array(which(sapply(strsplit(colnames(file),'_'),"[",2) == paste0('pvalue',as.character(i))))
    index <- c(index_score,index_pvalue)
    Matrix <- file[,index]
    LR <- rownames(as.matrix(sort(apply(Matrix,1,sum),decreasing=T)[1:30]))
    Matrix_LR <- Matrix[LR,]
    label <- as.matrix(sapply(Matrix_LR[,as.numeric(Max+1):as.numeric(length(colnames(Matrix_LR)))],function(x){ifelse(x>pvalue,sc<-"",sc<-"*")}),ncol=Max)
    pdf(paste0('cluster',i,'_others_LR.pdf'),width=6,height=14)
    heatmap(Matrix_LR[,1:Max],label)
    dev.off()
    png(paste0('cluster',i,'_others_LR.png'),width=6*200,height=14*200,res=200,type = 'cario-png')
    dev.off()
    }
    
    图片.png

    生活很好,有你更好

    相关文章

      网友评论

        本文标题:10X单细胞(10X空间转录组)分析回顾之一些细节绘图操作

        本文链接:https://www.haomeiwen.com/subject/tjqnhrtx.html