从PRAD开始重新得到差异基因数目(n=2852),与铁死亡基因(n=255)的交集,得到19个基因做热图
library(ComplexHeatmap)
library(limma )
library(edgeR)
library(TCGAbiolinks)
library(EDASeq)
setwd("C:\\Users\\Administrator.DESKTOP-4UQ3Q0K\\Desktop\\国奖")
seq<-read.csv("mk_fm.csv")
setwd("C:\\Users\\Administrator.DESKTOP-4UQ3Q0K\\Desktop")
# 标准化
query <- GDCquery(project ="TCGA-PRAD",
data.category = "Transcriptome Profiling",
data.type ="Gene Expression Quantification" ,
workflow.type="HTSeq - Counts")
# 551个barcode
samplesDown <- getResults(query,cols=c("cases"))
# samplesDown中筛选出TP(主要实体瘤)样本的barcode
# 498个肿瘤样本barcode
dataSmTP <- TCGAquery_SampleTypes(barcode = samplesDown,
typesample = "TP")
# 52个正常组织barcode
dataSmNT <- TCGAquery_SampleTypes(barcode = samplesDown,
typesample = "NT")
dataSmTP_short <- dataSmTP[1:498]
dataSmNT_short <- dataSmNT[1:52]
# 根据前面的筛选,再次请求数据
queryDown <- GDCquery(project = "TCGA-PRAD",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - Counts",
barcode = c(dataSmTP, dataSmNT))
#读取下载的数据并将其准备到R对象中,在工作目录生成brca_case1.rda文件,
#同时还产生Human_genes__GRCh38_p12_.rda文件
GDCdownload(query = queryDown)
dataPrep1 <- GDCprepare(query = queryDown, save = TRUE, save.filename = "prad_case1.rda")
#数据处理
# 去除dataPrep1中的异常值,dataPrep数据中含有肿瘤组织和正常组织的数据
dataPrep <- TCGAanalyze_Preprocessing(object = dataPrep1,
cor.cut = 0.6,
datatype = "HTSeq - Counts")
#使用EDASeq进行标准化
dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
geneInfo = geneInfoHT,
method = "gcContent")
# 使用分位数来过滤基因
dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm, method = "quantile", qnt.cut = 0.25)
# 挑选正常样本:NT
samplesNT <- TCGAquery_SampleTypes(barcode = colnames(dataFilt), typesample = c("NT"))
# 挑选肿瘤样本:TP
samplesTP <- TCGAquery_SampleTypes(barcode = colnames(dataFilt), typesample = c("TP"))
# 差异表达分析
dataDEGs <- TCGAanalyze_DEA(
mat1 = dataFilt[, samplesNT],
mat2 = dataFilt[, samplesTP],
Cond1type = "Normal",
Cond2type = "Tumor",
fdr.cut = 0.01 ,
logFC.cut = 1,
method = "glmLRT"
)
# 获取差异表达基因的表达水平
dataDEGsFiltLevel <- TCGAanalyze_LevelTab(
dataDEGs, "Tumor", "Normal",
dataFilt[, samplesTP], dataFilt[, samplesNT]
)
Ensembl_ID<- data.frame(Ensembl_ID=row.names(dataDEGsFiltLevel))
rownames(Ensembl_ID)<-Ensembl_ID[,1]
dataDEGsFiltLevel<-cbind(Ensembl_ID,dataDEGsFiltLevel)
get_map=function(input){
if(is.character(input)){
if(!file.exists(input)) stop("Bad input file.")
message('Treat input as file')
input=data.table::fread(input,header=FALSE)
}else{
data.table::setDT(input)
}
input=input[input[[3]]=='gene',]
pattern_id=".*gene_id \"([^;]+)\";.*"
pattern_name=".*gene_name \"([^;]+)\";.*"
gene_id=sub(pattern_id,"\\1",input[[9]])
gene_name=sub(pattern_name,"\\1",input[[9]])
Ensembl_ID_TO_Genename<-data.frame(gene_id=gene_id,
gene_name=gene_name,
stringsAsFactors = FALSE)
return(Ensembl_ID_TO_Genename)
}
Ensembl_ID_TO_Genename<-get_map("gencode.v39.annotation.gtf.gz")
gtf_Ensembl_ID<-substr(Ensembl_ID_TO_Genename[,1],1,15)
Ensembl_ID_TO_Genename<-data.frame(gtf_Ensembl_ID,Ensembl_ID_TO_Genename[,2])
colnames(Ensembl_ID_TO_Genename)<-c("Ensembl_ID","gene_id")
#融合数据
mergeRawCounts<-merge(Ensembl_ID_TO_Genename,dataDEGsFiltLevel,by="Ensembl_ID")
dataDEGsFiltLevel<-mergeRawCounts[,-c(1,3)]
#差异基因筛选
etSig <-dataDEGsFiltLevel[which(dataDEGsFiltLevel$FDR<0.05&abs(dataDEGsFiltLevel$logFC)>1),]
# 加入一列,up_down 体现上下调信息
etSig[which(etSig$logFC>0),"up_down"]<-"Up"
etSig[which(etSig$logFC<0),"up_down"]<-"Down"
# 保存数据到本地
options(scipen = 200)
write.table(etSig,".\\基因差异表达水平.xls",sep ="\t",col.names=TRUE, row.names = FALSE, quote =FALSE, na="")
#绘制热图
library(ComplexHeatmap)
library('circlize')
library(dplyr)
library(ggplot2)
library(ggrepel)
library(pheatmap)
# 去除name列里面的引号
data= gsub('["]', '', data)
# 去除name列里面的逗号
data= gsub('[,]', '', data)
#data<-etSig[c(-2,-3,-6,-7,-8,-9,-10)]
data<-read.table("txt.txt",sep="\t")
data<-data[c(-2,-3,-6,-7,-8,-9,-10)]
rownames(data)<-data[,1]
colnames(data)<-data[1,]
data<-data[,-1]
data<-data[-1,]
data<-apply(data, c(1,2),as.numeric)
Group = factor("Tumor", "Normal")
anndf <- data.frame(Group)
由于只有一组数据,一开始热图颜色没有渐变,不太美观,尝试了多种颜色方案,选择了最后一种
htmap<- pheatmap(data, scale = "row",
border="white",
cellwidth = 18,
cellheight = 6,
color = mycol,
fontsize = 7,
fontsize_col = 6,
fontsize_row = 6,
treeheight_row=30,
treeheight_col=30,
annotation_col=anndf,
annotation_colors=anncol,
annotation_legend=F,
legend_breaks=c(-2.5,-1.5,0,1.5,2.5),
legend_labels=c("-2.5","-1.5","0","1.5","2.5"),
angle_col = 90)
order <- htmap$tree_row$order
genelist <- row.names(data)[order]
cluster <- htmap$tree_row
#绘制聚类树;
plot(cluster,hang = -1,cex=0.6,axes=FALSE,ann=FALSE)
df_scaled <- t(scale(t(data)))
range(df_scaled)
col_fun = colorRamp2(c(-0.8,0,0.7), c("greenyellow","white", "red"))
class = anno_block(gp = gpar(fill = c("#c77cff","#FF9999","#99CC00","#FF9900"),
col="white"),height = unit(5, "mm"),
labels = c("Tumor", "Normal"),
labels_gp = gpar(col = "white", fontsize = 8,fontface="bold"))
group= HeatmapAnnotation(group=class)
Heatmap(df_scaled,
rect_gp = gpar(col="white"),
row_names_gp = gpar(fontsize = 6),
column_names_gp = gpar(fontsize = 8),
col = col_fun,
column_split = 4,
row_split = 4,
column_title = NULL,
row_title = NULL,
cluster_rows = TRUE,
show_row_dend = TRUE, #当基因数较多时可以隐藏行方向的聚类树 = FALSE;
top_annotation =group,
name = "Exp")
library(RColorBrewer)
pheatmap(data, color = colorRampPalette(brewer.pal(9, "PiYG"))(100),display_numbers = TRUE,number_color = "white")

也可为色块标上数值:
pheatmap(data, color = colorRampPalette(brewer.pal(9, "PiYG"))(100),display_numbers = TRUE,number_color = "white")

网友评论