rm(list = ls())
setwd("/home/project/XCL/22.2.RNA/Data/CleanData/rsemresults")
library(data.table)
library(dplyr)
###########################读入文件
all.files <- c(paste(24:29,".genes.results",sep=""))
#############################
uniq.num <- function(x){
y<-fread(x,data.table = F)
y<-y[,c(1,5,6)]
return(y)
}
uniq.num.df <- data.frame(num = lapply(all.files, uniq.num))
#去掉4,7,10,13,16列,seq(4,16,by=3)
uniq.num.df <- uniq.num.df[,-seq(4,16,by=3)]
#########更改列名
name<-c("CDVAT1","CDVAT2","CDVAT3","HFDVAT1","HFDVAT2","HFDVAT3")
#2,4,6,12列为
colnames(uniq.num.df)[seq(2,12,by=2)]<-c(paste(name,".count",sep=""))
##3,5,13列为
colnames(uniq.num.df)[seq(3,13,by=2)]<-c(paste(name,".TPM",sep=""))
##
colnames(uniq.num.df)[1]<-"ENSEMBL"
#############将ENSEMBL ID转换成Gene ID(ENTREZID)或者Symbol
library(org.Mm.eg.db)
#huang
g2e=toTable(org.Mm.egENSEMBL)
g2s=toTable(org.Mm.egSYMBOL)
idname<-merge(g2e,g2s,by="gene_id")
names(idname)<-c("order","ENSEMBL","symbol")
n.totol<-merge(idname,uniq.num.df,by="ENSEMBL")
###得到表达矩阵
write.csv(n.totol,file="/home/yifan/project/XCL/22.2.RNA/Data/CleanData/R/n.totol.csv",col.names = T)
#统计重复基因数目
table(duplicated(n.totol$symbol))
library(limma)
#对重复基因名取平均表达量
if(sum(duplicated(n.totol$symbol))>0) #判断是否有重复基因
expr<-avereps(n.totol,ID=n.totol$symbol)
#获得expr表达矩阵
expr<-as.data.frame(expr)
##############过滤低表达基因
gene<-expr$symbol
expr<-lapply(expr,as.numeric) %>% as.data.frame()
expr<-expr[,-c(1:3)]
row.names(expr)<-gene
#过滤半数样本以上为0
filter.gene=ncol(expr)/2
keep<-rowSums(expr>0) >= filter.gene #a Count>0 in at least half number of samples
expr.filter <- expr[keep,]
##去掉TPM,2,4,12
expr<-expr.filter[,-seq(2,12,by=2)]
############################差异表达
library(DESeq2)
library(dplyr)
###制作group来分组
group<-c(rep("Control",3),rep("Model",3))
metadata<-data.frame(group)
#取整
round1<-round(expr)
#差异
mRNA_dds<-DESeqDataSetFromMatrix(countData=round1 ,
colData=metadata,
design=~group)
mRNA2_dds<- DESeq(mRNA_dds) #运行很慢
#####################contrast后面三个参数,先列名,再分子和分母(前除以后,正为前者高表达)
resMvsC<- results(mRNA2_dds ,contrast = c("group","Model","Control"),tidy=TRUE) #results提取结果
write.csv(resMvsC, file ="/home/project/XCL/22.2.RNA/Data/CleanData/R/resMvsC.csv",col.names = T )
网友评论