10X单细胞（10X空间转录组）分析回顾之一些细节绘图操作

作者: 单细胞空间交响乐 | 来源:发表于2022-01-26 10:30 被阅读0次

临近年底，我们要总结，马上回家了，大家今天到底挣了点什么？？反省一下，今天的自己，有没有更优秀，这一篇我给大家分享一些单细胞分析图片的细节操作，图片是我们分析结果的展示，要美观大方，就算我们过年要相亲了，自己也要打扮的精精神神的，😄

图片.png

第一张图，Cluster之间的相关性

sce <- as.SingleCellExperiment(seurat)
reducedDim(sce, 'PCA_sub') <- reducedDim(sce, 'PCA')[,1:15, drop = FALSE]

ass_prob <- bootstrapCluster(sce, FUN = function(x) {
    g <- buildSNNGraph(x, use.dimred = 'PCA_sub')
    igraph::cluster_walktrap(g)$membership
  },
  clusters = sce$seurat_clusters
)

p <- ass_prob %>%
  as_tibble() %>%
  rownames_to_column(var = 'cluster_1') %>%
  pivot_longer(
    cols = 2:ncol(.),
    names_to = 'cluster_2',
    values_to = 'probability'
  ) %>%
  mutate(
    cluster_1 = as.character(as.numeric(cluster_1) - 1),
    cluster_1 = factor(cluster_1, levels = rev(unique(cluster_1))),
    cluster_2 = factor(cluster_2, levels = unique(cluster_2))
  ) %>%
  ggplot(aes(cluster_2, cluster_1, fill = probability)) +
  geom_tile(color = 'white') +
  geom_text(aes(label = round(probability, digits = 2)), size = 2.5) +
  scale_x_discrete(name = 'Cluster', position = 'top') +
  scale_y_discrete(name = 'Cluster') +
  scale_fill_gradient(
    name = 'Probability', low = 'white', high = '#c0392b', na.value = '#bdc3c7',
    limits = c(0,1),
    guide = guide_colorbar(
      frame.colour = 'black', ticks.colour = 'black', title.position = 'left',
      title.theme = element_text(hjust = 1, angle = 90),
      barwidth = 0.75, barheight = 10
    )
  ) +
  coord_fixed() +
  theme_bw() +
  theme(
    legend.position = 'right',
    panel.grid.major = element_blank()
  )

ggsave('plots/cluster_stability.png', p, height = 6, width = 7)

图片.png

Silhouette plot

library(cluster)

distance_matrix <- dist(Embeddings(seurat[['pca']])[, 1:15])
clusters <- seurat@meta.data$seurat_clusters
silhouette <- silhouette(as.numeric(clusters), dist = distance_matrix)
seurat@meta.data$silhouette_score <- silhouette[,3]

mean_silhouette_score <- mean(seurat@meta.data$silhouette_score)

p <- seurat@meta.data %>%
  mutate(barcode = rownames(.)) %>%
  arrange(seurat_clusters,-silhouette_score) %>%
  mutate(barcode = factor(barcode, levels = barcode)) %>%
  ggplot() +
  geom_col(aes(barcode, silhouette_score, fill = seurat_clusters), show.legend = FALSE) +
  geom_hline(yintercept = mean_silhouette_score, color = 'red', linetype = 'dashed') +
  scale_x_discrete(name = 'Cells') +
  scale_y_continuous(name = 'Silhouette score') +
  scale_fill_manual(values = custom_colors$discrete) +
  theme_bw() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )
ggsave('plots/silhouette_plot.png', p, height = 4, width = 8)

图片.png

Cluster tree

seurat <- BuildClusterTree(
  seurat,
  dims = 1:15,
  reorder = FALSE,
  reorder.numeric = FALSE
)

tree <- seurat@tools$BuildClusterTree
tree$tip.label <- paste0("Cluster ", tree$tip.label)

p <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = custom_colors$discrete[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

ggsave('plots/cluster_tree.png', p, height = 4, width = 6)

图片.png

比例图

temp_labels <- seurat@meta.data %>%
  group_by(sample) %>%
  tally()

p1 <- table_samples_by_clusters %>%
  select(-c('total_cell_count')) %>%
  reshape2::melt(id.vars = 'sample') %>%
  mutate(sample = factor(sample, levels = levels(seurat@meta.data$sample))) %>%
  ggplot(aes(sample, value)) +
  geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
  geom_text(
    data = temp_labels,
    aes(x = sample, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
    color = 'black', size = 2.8
  ) +
  scale_fill_manual(name = 'Cluster', values = custom_colors$discrete) +
  scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
  coord_cartesian(clip = 'off') +
  theme_bw() +
  theme(
    legend.position = 'left',
    plot.title = element_text(hjust = 0.5),
    text = element_text(size = 16),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
    plot.margin = margin(t = 20, r = 0, b = 0, l = 0, unit = 'pt')
  )

temp_labels <- seurat@meta.data %>%
  group_by(seurat_clusters) %>%
  tally() %>%
  dplyr::rename('cluster' = seurat_clusters)

p2 <- table_clusters_by_samples %>%
  select(-c('total_cell_count')) %>%
  reshape2::melt(id.vars = 'cluster') %>%
  mutate(cluster = factor(cluster, levels = levels(seurat@meta.data$seurat_clusters))) %>%
  ggplot(aes(cluster, value)) +
  geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
  geom_text(
    data = temp_labels, aes(x = cluster, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
    color = 'black', size = 2.8
  ) +
  scale_fill_manual(name = 'Sample', values = custom_colors$discrete) +
  scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
  coord_cartesian(clip = 'off') +
  theme_bw() +
  theme(
    legend.position = 'right',
    plot.title = element_text(hjust = 0.5),
    text = element_text(size = 16),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_blank(),
    plot.margin = margin(t = 20, r = 0, b = 0, l = 10, unit = 'pt')
  )

ggsave(
  'plots/composition_samples_clusters_by_percent.png',
  p1 + p2 +
  plot_layout(ncol = 2, widths = c(
    seurat@meta.data$sample %>% unique() %>% length(),
    seurat@meta.data$seurat_clusters %>% unique() %>% length()
  )),
  width = 18, height = 8
)

图片.png

temp_labels <- seurat@meta.data %>%
  group_by(sample) %>%
  tally()

p1 <- table_samples_by_cell_cycle %>%
  select(-c('total_cell_count')) %>%
  reshape2::melt(id.vars = 'sample') %>%
  mutate(sample = factor(sample, levels = levels(seurat@meta.data$sample))) %>%
  ggplot(aes(sample, value)) +
  geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
  geom_text(
    data = temp_labels,
    aes(x = sample, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
    color = 'black', size = 2.8
  ) +
  scale_fill_manual(name = 'Cell cycle', values = custom_colors$cell_cycle) +
  scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
  coord_cartesian(clip = 'off') +
  theme_bw() +
  theme(
    legend.position = 'none',
    plot.title = element_text(hjust = 0.5),
    text = element_text(size = 16),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
    plot.margin = margin(t = 20, r = 0, b = 0, l = 0, unit = 'pt')
  )

temp_labels <- seurat@meta.data %>%
  group_by(seurat_clusters) %>%
  tally() %>%
  dplyr::rename('cluster' = seurat_clusters)

p2 <- table_clusters_by_cell_cycle %>%
  select(-c('total_cell_count')) %>%
  reshape2::melt(id.vars = 'cluster') %>%
  mutate(cluster = factor(cluster, levels = levels(seurat@meta.data$seurat_clusters))) %>%
  ggplot(aes(cluster, value)) +
  geom_bar(aes(fill = variable), position = 'fill', stat = 'identity') +
  geom_text(
    data = temp_labels,
    aes(x = cluster, y = Inf, label = paste0('n = ', format(n, big.mark = ',', trim = TRUE)), vjust = -1),
    color = 'black', size = 2.8
  ) +
  scale_fill_manual(name = 'Cell cycle', values = custom_colors$cell_cycle) +
  scale_y_continuous(name = 'Percentage [%]', labels = scales::percent_format(), expand = c(0.01,0)) +
  coord_cartesian(clip = 'off') +
  theme_bw() +
  theme(
    legend.position = 'right',
    plot.title = element_text(hjust = 0.5),
    text = element_text(size = 16),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_blank(),
    plot.margin = margin(t = 20, r = 0, b = 0, l = 10, unit = 'pt')
  )

ggsave(
  'plots/composition_samples_clusters_by_cell_cycle_by_percent.png',
  p1 + p2 +
  plot_layout(ncol = 2, widths = c(
    seurat@meta.data$sample %>% unique() %>% length(),
    seurat@meta.data$seurat_clusters %>% unique() %>% length()
  )),
  width = 18, height = 8
)

图片.png

SNN graph

library(ggnetwork)

SCT_snn <- seurat@graphs$SCT_snn %>%
  as.matrix() %>%
  ggnetwork() %>%
  left_join(seurat@meta.data %>% mutate(vertex.names = rownames(.)), by = 'vertex.names')

p1 <- ggplot(SCT_snn, aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_edges(color = 'grey50', alpha = 0.05) +
  geom_nodes(aes(color = sample), size = 0.5) +
  scale_color_manual(
    name = 'Sample', values = custom_colors$discrete,
    guide = guide_legend(ncol = 1, override.aes = list(size = 2))
  ) +
  theme_blank() +
  theme(legend.position = 'left') +
  annotate(
    geom = 'text', x = Inf, y = -Inf,
    label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
    vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
  )

p2 <- ggplot(SCT_snn, aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_edges(color = 'grey50', alpha = 0.05) +
  geom_nodes(aes(color = seurat_clusters), size = 0.5) +
  scale_color_manual(
    name = 'Cluster', values = custom_colors$discrete,
    guide = guide_legend(ncol = 1, override.aes = list(size = 2))
  ) +
  theme_blank() +
  theme(legend.position = 'right') +
  annotate(
    geom = 'text', x = Inf, y = -Inf,
    label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
    vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
  )

ggsave(
  'plots/snn_graph_by_sample_cluster.png',
  p1 + p2 + plot_layout(ncol = 2),
  height = 5, width = 11
)

图片.png

UMAP图

plot_umap_by_nCount <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
  ggplot(aes(UMAP_1, UMAP_2, color = nCount_RNA)) +
  geom_point(size = 0.2) +
  theme_bw() +
  scale_color_viridis(
    guide = guide_colorbar(frame.colour = 'black', ticks.colour = 'black'),
    labels = scales::comma,
  ) +
  labs(color = 'Number of\ntranscripts') +
  theme(legend.position = 'left') +
  coord_fixed() +
  annotate(
    geom = 'text', x = Inf, y = -Inf,
    label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
    vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
  )

plot_umap_by_sample <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
  ggplot(aes(UMAP_1, UMAP_2, color = sample)) +
  geom_point(size = 0.2) +
  theme_bw() +
  scale_color_manual(values = custom_colors$discrete) +
  labs(color = 'Sample') +
  guides(colour = guide_legend(override.aes = list(size = 2))) +
  theme(legend.position = 'right') +
  coord_fixed() +
  annotate(
    geom = 'text', x = Inf, y = -Inf,
    label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
    vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
  )

plot_umap_by_cluster <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
  ggplot(aes(UMAP_1, UMAP_2, color = seurat_clusters)) +
  geom_point(size = 0.2) +
  theme_bw() +
  scale_color_manual(
    name = 'Cluster', values = custom_colors$discrete,
    guide = guide_legend(ncol = 2, override.aes = list(size = 2))
  ) +
  theme(legend.position = 'left') +
  coord_fixed() +
  annotate(
    geom = 'text', x = Inf, y = -Inf,
    label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
    vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
  )

plot_umap_by_cell_cycle <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
  ggplot(aes(UMAP_1, UMAP_2, color = cell_cycle_seurat)) +
  geom_point(size = 0.2) +
  theme_bw() +
  scale_color_manual(values = custom_colors$cell_cycle) +
  labs(color = 'Cell cycle') +
  guides(colour = guide_legend(override.aes = list(size = 2))) +
  theme(legend.position = 'right') +
  coord_fixed() +
  annotate(
    geom = 'text', x = Inf, y = -Inf,
    label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
    vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
  )

ggsave(
  'plots/umap.png',
  plot_umap_by_nCount + plot_umap_by_sample +
  plot_umap_by_cluster + plot_umap_by_cell_cycle +
  plot_layout(ncol = 2),
  height = 6,
  width = 8.5
)

图片.png

Alluvial plots

## get sample and cluster names
samples <- levels(seurat@meta.data$sample)
clusters <- levels(seurat@meta.data$seurat_clusters)

## create named vector holding the color assignments for both samples and
## clusters
color_assignments <- setNames(
  c(custom_colors$discrete[1:length(samples)], custom_colors$discrete[1:length(clusters)]),
  c(samples,clusters)
)

## prepare data for the plot; factor() calls are necessary for the right order
## of columns (first samples then clusters) and boxes within each column (
## cluster 1, 2, 3, ..., not 1, 10, 11, ...)
data <- seurat@meta.data %>%
  group_by(sample,seurat_clusters) %>%
  tally() %>%
  ungroup() %>%
  gather_set_data(1:2) %>%
  dplyr::mutate(
    x = factor(x, levels = unique(x)),
    y = factor(y, levels = unique(y))
  )

DataFrame(data)
# DataFrame with 114 rows and 6 columns
#       sample seurat_clusters         n        id               x        y
#     <factor>        <factor> <integer> <integer>        <factor> <factor>
# 1          A               0       301         1          sample        A
# 2          A               1       137         2          sample        A
# 3          A               2       121         3          sample        A
# 4          A               3       223         4          sample        A
# 5          A               4        78         5          sample        A
# ...      ...             ...       ...       ...             ...      ...
# 110        E              14        19        53 seurat_clusters       14
# 111        E              15        18        54 seurat_clusters       15
# 112        E              16        10        55 seurat_clusters       16
# 113        E              17         9        56 seurat_clusters       17
# 114        E              18        15        57 seurat_clusters       18

## create sample and cluster labels; hjust defines whether a label will be
## aligned to the right (1) or to the left (0); the nudge_x parameter is used
## to move the label outside of the boxes
data_labels <- tibble(
    group = c(
      rep('sample', length(samples)),
      rep('seurat_clusters', length(clusters))
    )
 ) %>%
  mutate(
    hjust = ifelse(group == 'sample', 1, 0),
    nudge_x = ifelse(group == 'sample', -0.1, 0.1)
  )

DataFrame(data_labels)
# DataFrame with 22 rows and 3 columns
#               group     hjust   nudge_x
#         <character> <numeric> <numeric>
# 1            sample         1      -0.1
# 2            sample         1      -0.1
# 3            sample         1      -0.1
# 4   seurat_clusters         0       0.1
# 5   seurat_clusters         0       0.1
# ...             ...       ...       ...
# 18  seurat_clusters         0       0.1
# 19  seurat_clusters         0       0.1
# 20  seurat_clusters         0       0.1
# 21  seurat_clusters         0       0.1
# 22  seurat_clusters         0       0.1

## create plot
p1 <- ggplot(data, aes(x, id = id, split = y, value = n)) +
  geom_parallel_sets(aes(fill = seurat_clusters), alpha = 0.75, axis.width = 0.15) +
  geom_parallel_sets_axes(aes(fill = y), color = 'black', axis.width = 0.1) +
  geom_text(
    aes(y = n, split = y), stat = 'parallel_sets_axes', fontface = 'bold',
    hjust = data_labels$hjust, nudge_x = data_labels$nudge_x
  ) +
  scale_x_discrete(labels = c('Sample','Cluster')) +
  scale_fill_manual(values = color_assignments) +
  theme_bw() +
  theme(
    legend.position = 'none',
    axis.title = element_blank(),
    axis.text.x = element_text(face = 'bold', colour = 'black', size = 15),
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank()
  )

clusters <- levels(seurat@meta.data$seurat_clusters)
cell_types <- sort(unique(seurat@meta.data$cell_type_singler_blueprintencode_main))

color_assignments <- setNames(
  c(custom_colors$discrete[1:length(clusters)], custom_colors$discrete[1:length(cell_types)]),
  c(clusters,cell_types)
)

data <- seurat@meta.data %>%
  dplyr::rename(cell_type = cell_type_singler_blueprintencode_main) %>%
  dplyr::mutate(cell_type = factor(cell_type, levels = cell_types)) %>%
  group_by(seurat_clusters, cell_type) %>%
  tally() %>%
  ungroup() %>%
  gather_set_data(1:2) %>%
  dplyr::mutate(
    x = factor(x, levels = unique(x)),
    y = factor(y, levels = c(clusters,cell_types))
  )

data_labels <- tibble(
    group = c(
      rep('seurat_clusters', length(clusters)),
      rep('cell_type', length(cell_types))
    )
 ) %>%
  mutate(
    hjust = ifelse(group == 'seurat_clusters', 1, 0),
    nudge_x = ifelse(group == 'seurat_clusters', -0.1, 0.1)
  )

p2 <- ggplot(data, aes(x, id = id, split = y, value = n)) +
  geom_parallel_sets(aes(fill = seurat_clusters), alpha = 0.75, axis.width = 0.15) +
  geom_parallel_sets_axes(aes(fill = y), color = 'black', axis.width = 0.1) +
  geom_text(
    aes(y = n, split = y), stat = 'parallel_sets_axes', fontface = 'bold',
    hjust = data_labels$hjust, nudge_x = data_labels$nudge_x
  ) +
  scale_x_discrete(labels = c('Cluster','Cell type')) +
  scale_fill_manual(values = color_assignments) +
  theme_bw() +
  theme(
    legend.position = 'none',
    axis.title = element_blank(),
    axis.text.x = element_text(face = 'bold', colour = 'black', size = 15),
    axis.text.y = element_blank(),
    axis.ticks = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank()
  )

ggsave(
  'plots/samples_clusters_cell_types_alluvial.png',
  p1 + p2 + plot_layout(ncol = 2),
  height = 6, width = 8
)

图片.png

UMAP by cell type

UMAP_centers_cell_type <- tibble(
    UMAP_1 = as.data.frame(seurat@reductions$UMAP@cell.embeddings)$UMAP_1,
    UMAP_2 = as.data.frame(seurat@reductions$UMAP@cell.embeddings)$UMAP_2,
    cell_type = seurat@meta.data$cell_type_singler_blueprintencode_main
  ) %>%
  group_by(cell_type) %>%
  summarize(x = median(UMAP_1), y = median(UMAP_2))

p <- bind_cols(seurat@meta.data, as.data.frame(seurat@reductions$UMAP@cell.embeddings)) %>%
  ggplot(aes(UMAP_1, UMAP_2, color = cell_type_singler_blueprintencode_main)) +
  geom_point(size = 0.2) +
  geom_label(
    data = UMAP_centers_cell_type,
    mapping = aes(x, y, label = cell_type),
    size = 3.5,
    fill = 'white',
    color = 'black',
    fontface = 'bold',
    alpha = 0.5,
    label.size = 0,
    show.legend = FALSE
  ) +
  theme_bw() +
  expand_limits(x = c(-22,15)) +
  scale_color_manual(values = custom_colors$discrete) +
  labs(color = 'Cell type') +
  guides(colour = guide_legend(override.aes = list(size = 2))) +
  theme(legend.position = 'right') +
  coord_fixed() +
  annotate(
    geom = 'text', x = Inf, y = -Inf,
    label = paste0('n = ', format(nrow(seurat@meta.data), big.mark = ',', trim = TRUE)),
    vjust = -1.5, hjust = 1.25, color = 'black', size = 2.5
  )

ggsave(
  'plots/umap_by_cell_type_singler_blueprintencode_main.png',
  p,
  height = 4,
  width = 6
)

图片.png

Dot plot (multiple genes)

# cells will be grouped by samples that they have been assigned to
group_ids <- unique(seurat@meta.data$cell_type_singler_blueprintencode_main)

# select a set of genes for which we want to show expression
genes_to_show <- seurat@misc$marker_genes$cerebro_seurat$cell_type_singler_blueprintencode_main %>%
  group_by(cell_type_singler_blueprintencode_main) %>%
  arrange(p_val_adj) %>%
  filter(row_number() == 1) %>%
  arrange(cell_type_singler_blueprintencode_main) %>%
  pull(gene)

# for every sample-gene combination, calculate the average expression across
# all cells and then transform the data into a data frame
expression_levels_per_group <- vapply(
    group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
      cells_in_current_group <- which(seurat@meta.data$cell_type_singler_blueprintencode_main == x)
      Matrix::rowMeans(seurat@assays$SCT@data[genes_to_show,cells_in_current_group])
    }
  ) %>%
  t() %>%
  as.data.frame() %>%
  mutate(cell_type_singler_blueprintencode_main = rownames(.)) %>%
  select(cell_type_singler_blueprintencode_main, everything()) %>%
  pivot_longer(
    cols = c(2:ncol(.)),
    names_to = 'gene'
  ) %>%
  dplyr::rename(expression = value) %>%
  mutate(id_to_merge = paste0(cell_type_singler_blueprintencode_main, '_', gene))

# for every sample-gene combination, calculate the percentage of cells in the
# respective group that has at least 1 transcript (this means we consider it
# as expressing the gene) and then transform the data into a data frame
percentage_of_cells_expressing_gene <- vapply(
    group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
      cells_in_current_group <- which(seurat@meta.data$cell_type_singler_blueprintencode_main == x)
      Matrix::rowSums(seurat@assays$SCT@data[genes_to_show,cells_in_current_group] != 0)
    }
  ) %>%
  t() %>%
  as.data.frame() %>%
  mutate(cell_type_singler_blueprintencode_main = rownames(.)) %>%
  select(cell_type_singler_blueprintencode_main, everything()) %>%
  pivot_longer(
    cols = c(2:ncol(.)),
    names_to = 'gene'
  ) %>%
  dplyr::rename(cell_count = value) %>%
  left_join(
    .,
    seurat@meta.data %>%
      group_by(cell_type_singler_blueprintencode_main) %>%
      tally(),
    by = 'cell_type_singler_blueprintencode_main') %>%
  mutate(
    id_to_merge = paste0(cell_type_singler_blueprintencode_main, '_', gene),
    percent_cells = cell_count / n
  )

# merge the two data frames created before and plot the data
p <- left_join(
    expression_levels_per_group,
    percentage_of_cells_expressing_gene %>% select(id_to_merge, percent_cells),
    by = 'id_to_merge'
  ) %>%
  mutate(
    cell_type_singler_blueprintencode_main = factor(cell_type_singler_blueprintencode_main, levels = rev(group_ids)),
    gene = factor(gene, levels = genes_to_show)
  ) %>%
  arrange(gene) %>%
  ggplot(aes(gene, cell_type_singler_blueprintencode_main)) +
  geom_point(aes(color = expression, size = percent_cells)) +
  scale_color_distiller(
    palette = 'Reds',
    direction = 1,
    name = 'Log-normalised\nexpression',
    guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")
  ) +
  scale_size(name = 'Percent\nof cells', labels = scales::percent) +
  labs(y = 'Cell type', color = 'Expression') +
  coord_fixed() +
  theme_bw() +
  theme(
    axis.title = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

ggsave('plots/marker_genes_by_cell_type_as_dot_plot.png', p, height = 5, width = 6)

图片.png

group_ids <- levels(seurat@meta.data$seurat_clusters)

genes_to_show <- seurat@misc$marker_genes$cerebro_seurat$seurat_clusters %>%
  group_by(seurat_clusters) %>%
  arrange(p_val_adj) %>%
  filter(row_number() == 1) %>%
  arrange(seurat_clusters) %>%
  pull(gene)

expression_levels_per_group <- vapply(
    group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
      cells_in_current_group <- which(seurat@meta.data$seurat_clusters == x)
      Matrix::rowMeans(seurat@assays$SCT@data[genes_to_show,cells_in_current_group])
    }
  ) %>%
  t() %>%
  as.data.frame() %>%
  mutate(cluster = rownames(.)) %>%
  select(cluster, everything()) %>%
  pivot_longer(
    cols = c(2:ncol(.)),
    names_to = 'gene'
  ) %>%
  dplyr::rename(expression = value) %>%
  mutate(id_to_merge = paste0(cluster, '_', gene))

percentage_of_cells_expressing_gene <- vapply(
    group_ids, FUN.VALUE = numeric(length(genes_to_show)), function(x) {
      cells_in_current_group <- which(seurat@meta.data$seurat_cluster == x)
      Matrix::rowSums(seurat@assays$SCT@data[genes_to_show,cells_in_current_group] != 0)
    }
  ) %>%
  t() %>%
  as.data.frame() %>%
  mutate(cluster = rownames(.)) %>%
  select(cluster, everything()) %>%
  pivot_longer(
    cols = c(2:ncol(.)),
    names_to = 'gene'
  ) %>%
  dplyr::rename(cell_count = value) %>%
  left_join(
    .,
    seurat@meta.data %>%
      group_by(seurat_clusters) %>%
      tally() %>%
      dplyr::rename(cluster = seurat_clusters),
    by = 'cluster') %>%
  mutate(
    id_to_merge = paste0(cluster, '_', gene),
    percent_cells = cell_count / n
  )

p <- left_join(
    expression_levels_per_group,
    percentage_of_cells_expressing_gene %>% select(id_to_merge, percent_cells),
    by = 'id_to_merge'
  ) %>%
  mutate(
    cluster = factor(cluster, levels = rev(group_ids)),
    gene = factor(gene, levels = genes_to_show)
  ) %>%
  arrange(gene) %>%
  ggplot(aes(gene, cluster)) +
  geom_point(aes(color = expression, size = percent_cells)) +
  scale_color_distiller(
    palette = 'Reds',
    direction = 1,
    name = 'Log-normalised\nexpression',
    guide = guide_colorbar(frame.colour = "black", ticks.colour = "black")
  ) +
  scale_size(name = 'Percent\nof cells', labels = scales::percent) +
  labs(y = 'Cluster', color = 'Expression') +
  coord_fixed() +
  theme_bw() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

ggsave('plots/marker_genes_by_cluster_as_dot_plot.png', p, height = 7, width = 8)

图片.png

Gene set enrichment analysis

# function to read GMT file
read_GMT_file <- function(file) {
  gmt <- readr::read_delim(
    file,
    delim = ';',
    col_names = c('X1'),
    col_types = readr::cols()
  )

  gene_set_genes <- list()
  for ( i in seq_len(nrow(gmt)) )
  {
    temp_genes <- strsplit(gmt$X1[i], split = '\t')[[1]] %>% unlist()
    temp_genes <- temp_genes[3:length(temp_genes)]
    gene_set_genes[[i]] <- temp_genes
  }
  gene_set_loaded <- list(
    genesets = gene_set_genes,
    geneset.names = lapply(strsplit(gmt$X1, split = '\t'), '[', 1) %>% unlist(),
    geneset.description = lapply(
        strsplit(gmt$X1, split = '\t'), '[', 2
      ) %>% unlist()
  )

  return(gene_set_loaded)
}

# load gene sets from GMT file
gene_sets <- read_GMT_file('h.all.v7.2.symbols.gmt')

# set gene set names
names(gene_sets$genesets) <- gene_sets$geneset.names

# get indices of cells which are either HSC or monocytes
cells_to_analyze <- seurat@meta.data %>%
  mutate(row_number = row_number()) %>%
  filter(grepl(cell_type_singler_blueprintencode_main, pattern = 'HSC|Monocytes')) %>%
  arrange(cell_type_singler_blueprintencode_main) %>%
  pull(row_number)

# get list of genes unique genes across all gene sets
genes_to_analyze <- gene_sets$genesets %>% unlist() %>% unique()
# filter gene list for those which are present in the data set
genes_to_analyze <- genes_to_analyze[which(genes_to_analyze %in% rownames(seurat@assays$SCT@counts))]

# get expression matrix and reduce it to cells and genes of interest
expression_matrix <- seurat@assays$SCT@counts[ genes_to_analyze , cells_to_analyze] %>% as.matrix()

# perform GSVA
gsva <- GSVA::gsva(
  expression_matrix,
  gset.idx.list = gene_sets$genesets,
  parallel.sz = 1
)

# load limma library for statistical testing
library(limma)

# generate design matrix
design_matrix <- tibble(
  control = 1,
  test = c(
    rep(0, seurat@meta.data %>% filter(cell_type_singler_blueprintencode_main == 'HSC') %>% nrow()),
    rep(1, seurat@meta.data %>% filter(cell_type_singler_blueprintencode_main == 'Monocytes') %>% nrow())
  )
)

head(design_matrix)
# A tibble: 6 x 2
#   control  test
#     <dbl> <dbl>
# 1       1     0
# 2       1     0
# 3       1     0
# 4       1     0
# 5       1     0
# 6       1     0

# fit linear model, followed by empirical Bayes statistics for differential
# enrichment analysis
fit <- lmFit(gsva, design_matrix)
fit <- eBayes(fit)

# prepare data for plotting
data <- topTable(fit, coef = 'test', number = 50) %>%
  mutate(gene_set = rownames(fit$t)) %>%
  arrange(t) %>%
  mutate(
    gene_set = factor(gene_set, levels = gene_set),
    just = ifelse(t < 0, 0, 1),
    nudge_y = ifelse(t < 0, 1, -1),
    color = ifelse(t < -5 | t > 5, 'black', 'grey')
  )

DataFrame(data)
# DataFrame with 50 rows and 10 columns
#         logFC     AveExpr         t      P.Value    adj.P.Val         B                            gene_set      just   nudge_y       color
#     <numeric>   <numeric> <numeric>    <numeric>    <numeric> <numeric>                            <factor> <numeric> <numeric> <character>
# 1   -0.369443  -0.1569690  -35.1619 1.26066e-186 1.05055e-185   415.980  HALLMARK_TGF_BETA_SIGNALING                0         1       black
# 2   -0.251413  -0.0820012  -27.2261 1.96152e-127 8.91598e-127   279.761  HALLMARK_NOTCH_SIGNALING                   0         1       black
# 3   -0.288169  -0.1202293  -26.1464 1.40712e-119 5.41201e-119   261.689  HALLMARK_ESTROGEN_RESPONSE_EARLY           0         1       black
# 4   -0.135034  -0.1511146  -22.3565  8.50194e-93  2.36165e-92   200.094  HALLMARK_INTERFERON_ALPHA_RESPONSE         0         1       black
# 5   -0.203651  -0.1049498  -22.0728  7.44006e-91  1.95791e-90   195.628  HALLMARK_INTERFERON_GAMMA_RESPONSE         0         1       black
# ...       ...         ...       ...          ...          ...       ...                                 ...       ...       ...         ...
# 46   0.200259  0.25341594   35.9250 2.31901e-192 2.31901e-191   429.182 HALLMARK_WNT_BETA_CATENIN_SIGNALING         1        -1       black
# 47   0.293113 -0.08115610   36.1995 2.00784e-194 2.50980e-193   433.929 HALLMARK_MITOTIC_SPINDLE                    1        -1       black
# 48   0.338449  0.11583774   36.2560 7.56196e-195 1.26033e-193   434.906 HALLMARK_CHOLESTEROL_HOMEOSTASIS            1        -1       black
# 49   0.251678  0.00262136   37.5117 2.86772e-204 7.16930e-203   456.592 HALLMARK_HYPOXIA                            1        -1       black
# 50   0.282907 -0.05021404   48.5971 1.72523e-285 8.62616e-284   643.571 HALLMARK_TNFA_SIGNALING_VIA_NFKB            1        -1       black

# plot t-value
p <- ggplot(data = data, aes(x = gene_set, y = t, fill = t)) +
  geom_col() +
  geom_hline(yintercept = c(-5,5), linetype = 'dashed', color = 'grey80') +
  geom_text(
    aes(
      x = gene_set,
      y = 0,
      label = gene_set,
      hjust = just,
      color = color
    ),
    nudge_y = data$nudge_y, size = 3
  ) +
  scale_x_discrete(name = '', labels = NULL) +
  scale_y_continuous(name = 't-value', limits = c(-55,55)) +
  scale_fill_distiller(palette = 'Spectral', limits = c(-max(data$t), max(data$t))) +
  scale_color_manual(values = c('black' = 'black', 'grey' = 'grey')) +
  coord_flip() +
  theme_bw() +
  theme(
    panel.grid = element_blank(),
    axis.ticks.y =  element_blank(),
    legend.position = 'none'
  )

ggsave('plots/gsva_hallmark_gene_sets_monocytes_vs_hsc.png', height = 7.5, width = 7.5)

图片.png

通讯热图

library(pheatmap)
library(argparse)

heatmap <- function (Matrix,labels){
     plot_heatmap <- pheatmap(Matrix,scale='none',cluster_cols = F,cluster_rows = F,cellwidth = 20,cellheight = 20,color = colorRampPalette(colors = c("white","red"))(100),display_numbers=labels)
     return(plot_heatmap)
}

getSig <- function(dc) {
if(dc > pvalue){sc <- ""}
if(dc < pvalue){sc <- "*"}}

parser = ArgumentParser()
parser$add_argument("--LR_score", help="the file of sample.LR.score.xls",required =T)
parser$add_argument("--pvalue", help="the file of sample.LR.score.xls",default = '0.1')
parser$add_argument("--outdir", help="the outdir",default='.')
args <- parser$parse_args()
str(args)

LR_score = args$LR_score
pvalue = args$pvalue
outdir = args$outdir

pvalue <- as.numeric(pvalue)

if(!dir.exists(outdir)){
  dir.create(outdir)
}

setwd(outdir)
file <- read.table(LR_score,sep='\t',header=T,row.names=1,check.names=F)
len <- as.numeric(length(colnames(file)))
Max <- max(as.numeric(sapply(strsplit(colnames(file),'_'),"[",2)[1:len/2]))

for (i in 1:Max){
index_score <- as.array(which(sapply(strsplit(colnames(file),'_'),"[",1) == paste0('cluster',as.character(i))))
index_pvalue <- as.array(which(sapply(strsplit(colnames(file),'_'),"[",2) == paste0('pvalue',as.character(i))))
index <- c(index_score,index_pvalue)
Matrix <- file[,index]
LR <- rownames(as.matrix(sort(apply(Matrix,1,sum),decreasing=T)[1:30]))
Matrix_LR <- Matrix[LR,]
label <- as.matrix(sapply(Matrix_LR[,as.numeric(Max+1):as.numeric(length(colnames(Matrix_LR)))],function(x){ifelse(x>pvalue,sc<-"",sc<-"*")}),ncol=Max)
pdf(paste0('cluster',i,'_others_LR.pdf'),width=6,height=14)
heatmap(Matrix_LR[,1:Max],label)
dev.off()
png(paste0('cluster',i,'_others_LR.png'),width=6*200,height=14*200,res=200,type = 'cario-png')
dev.off()
}

图片.png

生活很好，有你更好

10X单细胞（10X空间转录组）分析回顾之一些细节绘图操作

第一张图，Cluster之间的相关性

Silhouette plot

Cluster tree

比例图

SNN graph

UMAP图

Alluvial plots

UMAP by cell type

Dot plot (multiple genes)

Gene set enrichment analysis

通讯热图

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读

单细胞测序技术

ggplot2高级绘图操作

单细胞数据分析

单细胞测序

学习

单细胞测序