美文网首页
差异分析什么是control

差异分析什么是control

作者: 牛顿大尉 | 来源:发表于2020-12-18 11:53 被阅读0次

另外一个就是指定哪一组作为control组,在计算log2FD时 ,需要明确control组,默认会字符串顺序对分组的名字进行排序,排在前面的作为control组,这种默认行为选出的control可能与我们的实验设计不同,所以必须明确指定control组。

library(DESeq2)
setwd('F:/wgcna/差异表达/')
raw_count=read.csv('lung.csv',sep=",",header=T)
count_data<-raw_count[,2:7]
row.names(count_data) <- raw_count[,1]
condition <- factor(c("L.cow_lung","L.cow_lung","L.cow_lung" ,"H.yak_lung","H.yak_lung","H.yak_lung"))
col_data <- data.frame(row.names = colnames(count_data), condition)
colData
dds <- DESeqDataSetFromMatrix(countData = count_data,colData = col_data,design = ~ condition)

nrow(dds)
dds_filter <- dds[ rowSums(counts(dds))>1, ]
nrow(dds_filter)#过滤了多少
dds_out = DESeq(dds_filter)

res = results(dds_out, contrast=c("condition", "L.cow_lung", "H.yak_lung"))
res = res[order(res$pvalue),]

head(res)
summary(res)
#write.csv(res,file="lung_deseq.csv")
table(res$padj<0.01)
#提取基因
diff_gene_deseq2 <-subset(res, padj < 0.01 & abs(log2FoldChange) > 1)
dim(diff_gene_deseq2)
head(diff_gene_deseq2)
summary(diff_gene_deseq2)
#write.csv(diff_gene_deseq2,file= "lung_DEG_0.05_2.csv")
#---------------可视化-----------------
#PCA
vsdata <- rlog(dds, blind=FALSE)#vst和rlog都是归一化的方法
plotPCA(vsdata, intgroup="condition")
#beatifule pca plot
#pcaData <- plotPCA(vsdata, intgroup=c("condition"), returnData=TRUE)
#percentVar <- round(100 * attr(pcaData, "percentVar"))
pdf(file = 'lung_pca.pdf')
#ggplot(pcaData, aes(PC1, PC2, color=condition, shape=condition)) +
#  geom_point(size=3) +
# xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  #ylab(paste0("PC2: ",percentVar[2],"% variance")) +
  #coord_fixed()
#+ggtitle("A")+theme(plot.title = element_text(hjust = 0.5))


#想要显示名称的话
pcaData <- plotPCA(vsdata, intgroup=c("condition"), returnData=TRUE)
plot(pcaData[,1:2],pch=19,col=c("red","red","red","blue","blue","blue"))
text(pcaData[,1],pcaData[,2]+0.2,row.names(pcaData),cex=0.5)
dev.off()

pdf(file = '火山图_lung.pdf')
EnhancedVolcano(res,
                lab = rownames(res),
                x = 'log2FoldChange',
                y = 'pvalue',
                xlim = c(-8, 8),
                )
dev.off

library(genefilter)
library(pheatmap)

rld <- rlogTransformation(dds_out,blind = F)
#write.csv(assay(rld),file="mm.DESeq2.pseudo.counts.csv")

topVarGene <- head(order(rowVars(assay(rld)),decreasing = TRUE),20)
mat  <- assay(rld)[ topVarGene, ]
### mat <- mat - rowMeans(mat) 减去一个平均值,让数值更加集中。第二个图
anno <- as.data.frame(colData(rld)[,c("condition","sizeFactor")])
pdf(file = 'lung_heatmap.pdf')
pheatmap(mat, annotation_col = anno)## clustering_method参数设定不同聚类方法,默认为"complete",可以设定为'ward', 'ward.D', 'ward.D2', 'single', 'complete', 'average', 'mcquitty', 'median' or 'centroid'

dev.off()

相关文章

网友评论

      本文标题:差异分析什么是control

      本文链接:https://www.haomeiwen.com/subject/oepxnktx.html