与之前学过的AUCell一样,Seurat里面自带的函数AddModuleScore也可以给细胞打分,我用之前拿的数据,把AUCell换成AddModuleScore玩一下。
1.提取指定GOterm的基因
在这篇文章里提到,Reactive oxygen species (ROS) 活性氧相关的17个GO条目,可以把这些基因提取出来。使用msigdbr包搞定。Data_Sheet_2.XLSX是文章的附件
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7606976/bin/Data_Sheet_2.XLSX
rm(list = ls())
library(tidyverse)
dat = rio::import("Data_Sheet_2.XLSX")
g = dat[-1,1]
g
## [1] "GO_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
## [2] "GO_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"
## [3] "GO_POSITIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
## [4] "GO_NEGATIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
## [5] "GO_NEGATIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"
## [6] "GO_POSITIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"
## [7] "GO_MITOCHONDRIAL_ELECTRON_TRANSPORT_CYTOCHROME_C_TO_OXYGEN"
## [8] "GO_SUPEROXIDE_GENERATING_NADPH_OXIDASE_ACTIVITY"
## [9] "GO_HYDROGEN_PEROXIDE_METABOLIC_PROCESS"
## [10] "GO_HYDROGEN_PEROXIDE_CATABOLIC_PROCESS"
## [11] "GO_PENTOSE_METABOLIC_PROCESS"
## [12] "GO_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"
## [13] "GO_REGULATION_OF_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"
## [14] "GO_CELLULAR_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"
## [15] "GO_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
## [16] "GO_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"
## [17] "GO_NEGATIVE_REGULATION_OF_RESPONSE_TO_REACTIVE_OXYGEN_SPECIES"
17条term👆
2.从msigdbr查找这些term
library(msigdbr)
GO_df = msigdbr(species = "Homo sapiens",category = "C5") %>%
dplyr::select(gene_symbol,gs_name,gs_subcat)
dim(GO_df)
## [1] 1424471 3
table(GO_df$gs_subcat)
##
## GO:BP GO:CC GO:MF HPO
## 721379 115769 122674 464649
GO_df = GO_df[GO_df$gs_subcat!="HPO",]
table(g %in% GO_df$gs_name)
##
## FALSE
## 17
全部匹配失败啦?
其实是因为前缀不同
head(GO_df$gs_name,3)
## [1] "GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS"
## [2] "GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS"
## [3] "GOBP_10_FORMYLTETRAHYDROFOLATE_METABOLIC_PROCESS"
head(g,3)
## [1] "GO_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
## [2] "GO_REACTIVE_OXYGEN_SPECIES_METABOLIC_PROCESS"
## [3] "GO_POSITIVE_REGULATION_OF_REACTIVE_OXYGEN_SPECIES_BIOSYNTHETIC_PROCESS"
把前缀去掉来匹配,顺便把大写变成小写,变得易读一点。
sn = GO_df$gs_name %>%
str_to_lower()%>%
str_replace("_","///") %>%
str_split("///",simplify = T)%>%
.[,2] %>%
str_replace_all("_"," ")
head(sn)
## [1] "10 formyltetrahydrofolate metabolic process"
## [2] "10 formyltetrahydrofolate metabolic process"
## [3] "10 formyltetrahydrofolate metabolic process"
## [4] "10 formyltetrahydrofolate metabolic process"
## [5] "10 formyltetrahydrofolate metabolic process"
## [6] "10 formyltetrahydrofolate metabolic process"
GO_df = mutate(GO_df,short_name = sn)
g = str_to_lower(g)%>%
str_replace("_","///") %>%
str_split("///",simplify = T)%>%
.[,2] %>%
str_replace_all("_"," ")
head(g)
## [1] "reactive oxygen species biosynthetic process"
## [2] "reactive oxygen species metabolic process"
## [3] "positive regulation of reactive oxygen species biosynthetic process"
## [4] "negative regulation of reactive oxygen species biosynthetic process"
## [5] "negative regulation of reactive oxygen species metabolic process"
## [6] "positive regulation of reactive oxygen species metabolic process"
table(g %in% GO_df$short_name)
##
## FALSE TRUE
## 1 16
经过数据框搜索,发现匹配不上的那个其实是因为少了俩空格。。。
所以直接改一下就好。
“superoxide generating nadph oxidase activity”
“superoxide generating nad p h oxidase activity”
g[!(g %in% GO_df$short_name)] = "superoxide generating nad p h oxidase activity"
table(g %in% GO_df$short_name)
##
## TRUE
## 17
rosgene = GO_df %>%
filter(short_name %in% g) %>%
pull(gene_symbol) %>%
unique()
length(rosgene)
## [1] 402
2.打分
与之前做过的Aucell不同,这个是seurat自带的函数,使用起来更加简单
sce.all是seurat+singleR得出的对象,markers是findallmarkers得到的数据框。
load("sce.all.Rdata")
load("makers.Rdata")
k = markers$p_val_adj < 0.05 &
abs(markers$avg_log2FC) > 1.5 &
markers$pct.1 > 0.5 &
markers$pct.2 < 0.5
table(k)
## k
## FALSE TRUE
## 13100 1483
gi = intersect(markers[k,"gene"],rosgene)
#32个
library(Seurat)
DotPlot(sce.all,features = gi,cols = "RdYlBu",
group.by = "RNA_snn_res.0.5")+RotatedAxis()
sce.all = AddModuleScore(object = sce.all,list(gi))
colnames(sce.all@meta.data)
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA" "patient"
## [5] "time" "sample_ID" "clst" "TSNE_x"
## [9] "TSNE_y" "percent.mt" "percent.hb" "RNA_snn_res.0.5"
## [13] "seurat_clusters" "Cluster1"
这个分数是直接加到meta.data里面的咯。列名也有默认,叫Cluster1.
3.FeaturePlot可视化
标准版:
library(Seurat)
FeaturePlot(sce.all,features = "Cluster1")
定制版:
dat<- data.frame(sce.all@meta.data,
sce.all@reductions$umap@cell.embeddings,
sce.all@active.ident)
colnames(dat)[ncol(dat)] = "seurat_annotation"
class_avg <- dat %>%
group_by(seurat_annotation) %>%
summarise(
UMAP_1 = median(UMAP_1),
UMAP_2 = median(UMAP_2)
)
library(ggpubr)
ggplot(dat, aes(UMAP_1, UMAP_2)) +
geom_point(aes(colour = Cluster1)) + viridis::scale_color_viridis(option="A") +
ggrepel::geom_label_repel(aes(label = seurat_annotation),
data = class_avg,
label.size = 0,
segment.color = NA)+
theme(legend.position = "none") + theme_bw()
dat$score_group = ifelse(dat$Cluster1>median(dat$Cluster1),"active","non_active")
4.活跃和不活跃组的细胞比例条形图
ggplot(dat,aes(score_group,fill = seurat_annotation))+
geom_bar(position = "fill", alpha = 0.9)+
scale_fill_brewer(palette = "Set1")+
theme_classic()
网友评论