marker gene
三阴性乳腺癌三阴性乳腺癌(TNBC)是指雌激素受体(ER)、孕激素受体(PR)和人表皮生长因子受体(HER2)均为阴性;
ER、PR、HER2并不是基因的规范命名,接下来找到三者对应的symbol规范命名genecard(https://www.genecards.org/)网站搜索结果:上述三者分别对应(ESR1、ESR2)、PGR、ERBB2
rm(list=ls())
options(stringsAsFactors = FALSE)
load(file = 'exp_group.Rdata')
exp[1:4,1:4]
library(hgu133plus2.db)
ids <- toTable(hgu133plus2SYMBOL)
index <- ids$symbol %in% c("ERBB2",'ESR1','ESR2','PGR')
np <- ids[index,1]#probe_id
ng <- ids[index,2]#symbol
marker_dat <- exp[np,]
rownames(marker_dat) <- paste(ng,np,sep=':')
n=t(scale(t(marker_dat)))
n[n>2]=2;n[n< -2]= -2
group_dat <- data.frame(group=group_list,row.names = colnames(exp))
library(pheatmap)
pheatmap(n, annotation_col = group_dat,
show_colnames = F)
head(n)
n[1:3,1:3]
1
wdata <- data.frame(v=as.numeric(marker_dat['ERBB2:210930_s_at',]))
library(ggpubr)
gghistogram(wdata, x='v',y="..density..",
add_density = T,
add = "mean",rug = T)
2.png
PAM50分类器
PAM50的GEP分型主要根据50个基因的表达情况可将乳腺癌分为不同的亚型,包括
表达雌激素受体(ER)相关因子的亚型(管腔型,有A、B型两种);
表达人类表皮生长因子受体2(HER2)相关通路因子的亚型(HER2过表达亚型);
表达基底因子但不表达激素受体通路的基底样乳腺癌(BLBC)亚型。
rm(list=ls())
options(stringsAsFactors = FALSE)
load(file='exp_group.Rdata')
library(hgu133plus2.db)
ids <- toTable(hgu133plus2SYMBOL)
dat <- exp[ids$probe_id,]
ids$median <- apply(dat,1,median)
ids <- ids[order(ids$symbol,ids$median,decreasing = T),]
ids <- ids[!duplicated(ids$symbol),]
dat <- dat[ids$probe_id,]
rownames(dat) <- ids$symbol
dat[1:3,1:3]
ddata=t(dat)
ddata[1:4,1:4]
s=colnames(ddata);head(s)
library(org.Hs.eg.db)
s2g <- toTable(org.Hs.egSYMBOL)
head(s2g)
g=s2g[match(s,s2g$symbol),1];head(g)
dannot=data.frame(probe=s,
"Gene.Symbol" =s,
"EntrezGene.ID"=g)
head(dannot)
dim(dannot) # 20161 3
ddata=ddata[,!is.na(dannot$EntrezGene.ID)]#去除列的NA值
dannot=dannot[!is.na(dannot$EntrezGene.ID),] #去除行的NA值
dim(dannot) # 20161 3
BiocManager::install('genefu')
library(genefu)
s <- molecular.subtyping(sbt.model = "pam50", data=ddata,
annot = dannot, do.mapping = T)
table(s$subtype)
# Basal Her2 LumB LumA Normal
# 134 18 46 54 13
pam50genes <- pam50$centroids.map[c(1,3)]
pam50genes[pam50genes$probe=='CDCA1',1]='NUF2'
pam50genes[pam50genes$probe=='KNTC2',1]='NDC80'
pam50genes[pam50genes$probe=='ORC6L',1]='ORC6'
x=dat
x=x[pam50genes$probe[pam50genes$probe %in% rownames(x)],]
subtype <- as.character(s$subtype)
tmp <- data.frame(group=group_list,
subtypes=subtype)
rownames(tmp) <- colnames(x)
library(pheatmap)
par=par(mfrow=c(1,2))
pheatmap(x, show_rownames = T,
show_colnames = F,
annotation_col = tmp)
3.png
n=t(scale(t(x)))
n[n>2]=2; n[n< -2]= -2
n[1:4,1:4]
pheatmap(n, show_rownames = T,
show_colnames = F,
annotation_col = tmp)
4.png
网友评论