读取表达矩阵
setwd("E:/8.差异表达基因/")
a <- read.table("RNAmatrix.txt",header = T)
去重复
b <- a[a$gene_name,]
去掉第一行第一列
rownames(c) <- c[,1]
c <- c[,-1]
选择所需数据
a <- c[,-()]
c <- c[,10:15]
dat1<-as.data.frame(c)
dim(dat1)
dat1[1:4,1:4]
很多表达量为0的样本,直接选择在某个基因如果在3个样品中的表达量为零,则直接舍去。
apply(dat1,1,function(x){sum(floor(x)==0)>3})
dat1<-dat1[!apply(dat1,1,function(x){sum(floor(x)==0)>3}),]
dim(dat1)
head(dat1)
write.csv(dat1,"dat1.csv")
boxplot(dat1)
差异很大取log归一化
dat3 <- log(dat1)#下游分析的结果有缺失值,故选择log(dat2 + 1)
dat4 <- log(dat1 + 1)
boxplot(dat3)
boxplot(dat4)
write.csv(dat3,"dat3.csv")
write.csv(dat4,"dat4.csv")
差异基因分析
library(limma)
group <- c(rep("normal",50),rep("cancer",374))
head(group)
View(group)
group <- factor(group)
design <- model.matrix(~0 + group)
colnames(design) <- levels(group)
design
contrast.matrix <- makeContrasts(normal - cancer,
levels=design)
contrast.matrix
fit <- lmFit(dat4,design)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)
allDiff1=topTable(fit2,adjust='fdr',coef=1,number=Inf)
save(dat4,group,allDiff1,file = "RNAmatrix_result.txt")
write.csv(allDiff1, file = "normal_vs_cancer.csv" )
网友评论