美文网首页走进转录组
RNA-seq下游分析(2)-数据过滤及标准化

RNA-seq下游分析(2)-数据过滤及标准化

作者: 灵活胖子的进步之路 | 来源:发表于2020-12-09 22:18 被阅读0次
rm(list = ls())#清空列表
options(stringsAsFactors = F)#设定全局变量

library(foreign)#加载外部数据需要的包
library(stringr)#处理字符串需要的包
library(dplyr)#清洗数据需要的包

lname <- load(file = "group.Rdata")载入第1步的矩阵数据及分组数据
lname#查看数据名称
数据名称
keep_agc <- rowSums(liver.count>0) >= floor(0.75*ncol(liver.count))#生成程逻辑值向量,过滤所有样本中表达都为零及百分之75样本中都不表达的样本
filter.liver.count <- liver.count[keep_agc,]#生成肝过滤矩阵

keep_acc <- rowSums(colon.count>0) >= floor(0.75*ncol(colon.count))
filter.colon.count<- colon.count[keep_acc,]#生成肠过滤矩阵

keep_wbc <- rowSums(wbc.count>0) >= floor(0.75*ncol(wbc.count))
filter.wbc.count<- wbc.count[keep_wbc,]#生成血标本过滤矩阵

以下对数据进行标准化

library(preprocessCore)#标准化需要包

#以下对count数进行log2转化并进行标准化

normal.log2.colon = normalize.quantiles(log2(as.matrix(filter.colon.count)+1))
colnames(normal.log2.colon) = colnames(filter.colon.count)
rownames(normal.log2.colon) = rownames(filter.colon.count)

normal.log2.liver = normalize.quantiles(log2(as.matrix(filter.liver.count)+1))
colnames(normal.log2.liver) = colnames(filter.liver.count)
rownames(normal.log2.liver) = rownames(filter.liver.count)

normal.log2.wbc = normalize.quantiles(log2(as.matrix(filter.wbc.count)+1))
colnames(normal.log2.wbc) = colnames(filter.wbc.count)
rownames(normal.log2.wbc) = rownames(filter.wbc.count)


library(edgeR)#对样本进行CPM转化需要的包

#以下对count数进行cpm转化并进行标准化
cpm_normal_liver <- normalize.quantiles(log2(cpm(filter.liver.count)+1))
colnames(cpm_normal_liver) = colnames(filter.liver.count)
rownames(cpm_normal_liver) = rownames(filter.liver.count)


cpm_normal_colon <- normalize.quantiles(log2(cpm(filter.colon.count)+1))
colnames(cpm_normal_colon) = colnames(filter.colon.count)
rownames(cpm_normal_colon) = rownames(filter.colon.count)

cpm_normal_wbc <- normalize.quantiles(log2(cpm(filter.wbc.count)+1))
colnames(cpm_normal_wbc) = colnames(filter.wbc.count)
rownames(cpm_normal_wbc) = rownames(filter.wbc.count)

#保存标准化的数据及原始数据
save(liver.list,colon.list,wbc.list,
     filter.colon.count,filter.liver.count,filter.wbc.count,
     cpm_normal_colon,cpm_normal_liver,cpm_normal_wbc,
     normal.log2.liver,normal.log2.colon,normal.log2.wbc, 
     file = "normal_data.Rdata")

相关文章

网友评论

    本文标题:RNA-seq下游分析(2)-数据过滤及标准化

    本文链接:https://www.haomeiwen.com/subject/nyukgktx.html