##############
#参考链接:[https://mp.weixin.qq.com/s/0VPAy8f9u5ol8ryFZbmk7Q]
BiocManager::install("PoisonAlien/TCGAmutations")
library(TCGAmutations)
library(maftools)
library(dplyr)
proj='TCGA-LIHC'
laml = tcga_load(study = "LIHC")
laml
###############
#载入分组的信息Cluster.rda
#将两个表的样本ID统一
#> Hugo_Symbol Chromosome Start_Position End_Position
#> 1 NMT2 10 15154934 15154934
#> 2 ARHGAP42 11 100845194 100845194
#> 3 RAG1 11 36597064 36597064
#> 4 NLRP14 11 7064166 7064166
library(stringr)
length(unique(str_sub(mut$Tumor_Sample_Barcode,1,16)))
# 以病人为中心,表达矩阵按病人ID去重复
#exprSet <- exp_id
#k = !duplicated(str_sub(colnames(exprSet),1,12));table(k)
#exprSet = exprSet[,k]
#调整meta的ID顺序与exprSet列名一致
#meta=meta[match(str_sub(colnames(exprSet),1,12),meta$ID),]
#identical(meta$ID,str_sub(colnames(exprSet),1,12))
#colnames(exprSet) <- str_sub(colnames(exprSet),1,16)
#k = colnames(exprSet) %in% unique(str_sub(mut$Tumor_Sample_Barcode,1,16));table(k)
#expm = exprSet[,k]
#重新赋值避免影响原始数据
maf_object<- laml
a=laml@data
#############整理分组信息
ex2 = read.csv("SASP_group.csv",
row.names = 1, check.names = F)
# 选择 group 列并保留行名
Cluster <- ex2[, "group", drop = FALSE] # drop = FALSE提取一列时保证提取后还是数据框
table(Cluster$group)
Cluster$Cluster=ifelse(Cluster$group=="high MRGPI","high MRGPI",'low MRGPI')
Cluster$Cluster =factor(Cluster$Cluster,levels=c("high MRGPI",'low MRGPI'))
#Cluster <- Cluster[, "Cluster", drop = FALSE]
save(Cluster,file = "Cluster.Rda")
#确定分组信息的ID完全在突变数据里面,此时需要对突变患者编码进行截取前12位
Cluster=Cluster[rownames(Cluster)%in%substr(maf_object@data$Tumor_Sample_Barcode, 1, 12),,drop = FALSE]
save(Cluster,file = "Cluster.Rda")
load("Cluster.Rda")
#group列转换成因子
Cluster$group =factor(Cluster$group,levels=c("high MRGPI",'low MRGPI'))
#删除cluster列,行名为患者ID,列为group,且为因子
Cluster=Cluster[,-2,drop=FALSE]
class(Cluster$group)
#”factor“
table(Cluster$group)
#high MRGPI low MRGPI
#178 179
save(Cluster,file = "Cluster.Rda")
#设置分组信息
#手动读取分组Cluster.rda
#将行名改为列
Cluster=rownames_to_column(Cluster,var = "symbol")
#根据分组信息对患者ID进行重新排序
Cluster=Cluster[order(Cluster$group),]
#行名为空
rownames(Cluster)=NULL
#行名转换为患者ID
Cluster=column_to_rownames(Cluster,var="symbol")
save(Cluster,file = "Cluster.Rda")
#载入分组的信息Cluster.rda
#将两个表的样本ID统一
#把突变样本ID转化为分组样本的患者ID
a$Tumor_Sample_Barcode = substring(a$Tumor_Sample_Barcode,1,12)
group=rownames_to_column(Cluster,var="sample")
colnames(group)[1]="Tumor_Sample_Barcode"
#把分组文件各自拆分
group_high=group[group$group=="high MRGPI",]#178个
group_low=group[group$group=="low MRGPI",]#179个
#提取cluster的maf文件
maf_1=a[a$Tumor_Sample_Barcode%in%group_high$Tumor_Sample_Barcode,]
maf_2=a[a$Tumor_Sample_Barcode%in%group_low$Tumor_Sample_Barcode,]
#构建两组的maf文件
maf.coad_high=read.maf(maf=maf_1)
maf.coad_low=read.maf(maf=maf_2)
# 3.2作图
#绘制ICD-high瀑布图
oncoplot(maf = maf.coad_high,
top = 20, #显示前10个的突变基因信息
fontSize = 0.6, #设置字体大小
showTumorSampleBarcodes = F)
#绘制ICD-low瀑布图
oncoplot(maf = maf.coad_low,
top = 20, #显示前30个的突变基因信息
fontSize = 0.6, #设置字体大小
showTumorSampleBarcodes = F)
#maftools自带可视化函数plotmaf总结,可以比较分析统计maf文件的数据。
#if (as.numeric(dev.cur()) != 1) graphics.off()
plotmafSummary(maf = maf.coad_high, rmOutlier = TRUE,
#showBarcodes = FALSE,
addStat = 'median', dashboard = TRUE, titvRaw = FALSE)
plotmafSummary(maf = maf.coad_low, rmOutlier = TRUE,
#showBarcodes = FALSE,
addStat = 'median', dashboard = TRUE, titvRaw = FALSE)
###########3
#归纳每个基因的突变
getGeneSummary(laml)
lollipopPlot(maf = laml, gene = 'LIPG',
AACol = 'HGVSp_Short', showMutationRate = TRUE)
#指定基因的突变
g <- c("ADRA1A","TRPM8","AR","EPHX2","WEE1","PFKFB3","PIM3","ESRRA","RORC","LIPG","P2RY1","SFRP1","PLK3","AHCY","PPARD","ADCY1")
oncoplot(maf = laml,genes = g, fontSize = 0.7)
网友评论