美文网首页
2018-01-14

2018-01-14

作者: 一路向前_莫问前程_前程似锦 | 来源:发表于2018-01-14 16:35 被阅读51次

TCGA数据库下载


library(TCGAbiolinks)

library(stringr)

##使用TCGAbiolinks从GDC Data Portal上下载

query = GDCquery(project = "TCGA-LAML", legacy = FALSE, experimental.strategy = "RNA-Seq", data.category = "Transcriptome Profiling", data.type = "Gene Expression Quantification", workflow.type = "HTSeq - Counts")

##

GDCdownload(query)

#GDCprepare: Reads the data downloaded and prepare it into an R object

dataAssy = GDCprepare(query, summarizedExperiment = F)

rownames(dataAssy) = dataAssy[,1]

dataAssy = dataAssy[,-1]

colnames(dataAssy) = str_match(colnames(dataAssy), "(TCGA-[^-]*-[^-]*-[^-]*)")[,2]

dataAssyout = cbind(rownames(dataAssy), dataAssy)

colnames(dataAssyout)[1] = "Symbol"

dataAssyout$Symbol=as.character(dataAssyout$Symbol)

str(dataAssyout)

tt=tail(dataAssyout)

tt$Symbol=as.character(tt$Symbol)

#for(i in 1:nrow(dataAssyout)){

# dataAssyout$Symbol[i]=str_split(dataAssyout$Symbol[i],"\\.")[[1]][1]

#}

my_function=function(x) {x=str_split(x,"\\.")[[1]][1]

}

tt$Symbol=apply(data.frame(tt$Symbol),1,my_function)

dataAssyout$Symbol=apply(data.frame(dataAssyout$Symbol),1,my_function)

head(dataAssyout)

##去掉前五行

dataAssyout2=dataAssyout[-c(1:5),]

head(dataAssyout2)

相关文章

网友评论

      本文标题:2018-01-14

      本文链接:https://www.haomeiwen.com/subject/xvnkoxtx.html