rm(list = ls())#清空列表
options(stringsAsFactors = F)#设定全局变量
library(foreign)#加载外部数据需要的包
library(stringr)#处理字符串需要的包
library(dplyr)#清洗数据需要的包
lname <- load(file = "group.Rdata")载入第1步的矩阵数据及分组数据
lname#查看数据名称

keep_agc <- rowSums(liver.count>0) >= floor(0.75*ncol(liver.count))#生成程逻辑值向量,过滤所有样本中表达都为零及百分之75样本中都不表达的样本
filter.liver.count <- liver.count[keep_agc,]#生成肝过滤矩阵
keep_acc <- rowSums(colon.count>0) >= floor(0.75*ncol(colon.count))
filter.colon.count<- colon.count[keep_acc,]#生成肠过滤矩阵
keep_wbc <- rowSums(wbc.count>0) >= floor(0.75*ncol(wbc.count))
filter.wbc.count<- wbc.count[keep_wbc,]#生成血标本过滤矩阵
以下对数据进行标准化
library(preprocessCore)#标准化需要包
#以下对count数进行log2转化并进行标准化
normal.log2.colon = normalize.quantiles(log2(as.matrix(filter.colon.count)+1))
colnames(normal.log2.colon) = colnames(filter.colon.count)
rownames(normal.log2.colon) = rownames(filter.colon.count)
normal.log2.liver = normalize.quantiles(log2(as.matrix(filter.liver.count)+1))
colnames(normal.log2.liver) = colnames(filter.liver.count)
rownames(normal.log2.liver) = rownames(filter.liver.count)
normal.log2.wbc = normalize.quantiles(log2(as.matrix(filter.wbc.count)+1))
colnames(normal.log2.wbc) = colnames(filter.wbc.count)
rownames(normal.log2.wbc) = rownames(filter.wbc.count)
library(edgeR)#对样本进行CPM转化需要的包
#以下对count数进行cpm转化并进行标准化
cpm_normal_liver <- normalize.quantiles(log2(cpm(filter.liver.count)+1))
colnames(cpm_normal_liver) = colnames(filter.liver.count)
rownames(cpm_normal_liver) = rownames(filter.liver.count)
cpm_normal_colon <- normalize.quantiles(log2(cpm(filter.colon.count)+1))
colnames(cpm_normal_colon) = colnames(filter.colon.count)
rownames(cpm_normal_colon) = rownames(filter.colon.count)
cpm_normal_wbc <- normalize.quantiles(log2(cpm(filter.wbc.count)+1))
colnames(cpm_normal_wbc) = colnames(filter.wbc.count)
rownames(cpm_normal_wbc) = rownames(filter.wbc.count)
#保存标准化的数据及原始数据
save(liver.list,colon.list,wbc.list,
filter.colon.count,filter.liver.count,filter.wbc.count,
cpm_normal_colon,cpm_normal_liver,cpm_normal_wbc,
normal.log2.liver,normal.log2.colon,normal.log2.wbc,
file = "normal_data.Rdata")
网友评论