导读
qiime2会直接给出各个分类水平的丰度表,用R语言也可以从otu表中抽提出各个水平的丰度表。这次仅分享代码,不模拟数据了。
一、数据
一个常见的OTU表
二、抽提门丰度表
代码思路:
1 strsplit后,phylum不是na也不是""则记下,否则记行数到delete
2 添加phylum列,删除注释列和记到delete的phylum
3 按Phylum排序,获取Phylum unique列表
4 提取第一个phylum所有行到新表,apply列求和
5 遍历剩下的phylum列表,提取每个phylum,apply按列求和,添加到新表
6 数据归一化,保存
rare = read.table("rare.txt", header=T, sep="\t", row.names=1, comment.char="")
# 提取phylum,删除无用
phylum=c()
delete=c()
count = 1
for(i in rare[, length(rare)])
{
tmp = unlist(strsplit(as.character(i), split="; |__"))
phylum = c(phylum, tmp[4])
if(is.na(tmp[4]) | tmp[4] == "")
{
delete = c(delete, count)
}
count = count + 1
}
rare$phylum = phylum
rare = rare[, -(ncol(rare)-1)] # 删除注释
rare2 = rare[-delete, ] # 删除NA和""
rare2 = rare2[order(rare2$phylum, decreasing=F),]
plist = unique(rare2$phylum)
# 合并行,相同门
rare3 = data.frame(apply(rare2[rare2$phylum==plist[1], c(1:(ncol(rare2)-1))], 2, sum))
colnames(rare3)[1] = plist[1]
for(i in 2:length(plist))
{
tmp = apply(rare2[rare2$phylum==plist[i], c(1:(ncol(rare2)-1))], 2, sum)
rare3 = cbind(rare3, tmp)
colnames(rare3)[i] = plist[i]
}
rare3 = data.frame(t(rare3))
# 数据归一化
norm = rare3
sample_sum = apply(rare3, 2, sum)
for(i in 1:nrow(rare3))
{
for(j in 1:ncol(rare3))
{
norm[i, j] = rare3[i, j]/sample_sum[j]
}
}
apply(norm, 2, sum) # 检测
write.csv(norm, file="phylum_rare_norm.csv")
三、抽提family_genus丰度表
代码思路:
1 strsplit后,family genus不是na也不是""则记下,否则记行数到delete
2 添加family_genus列,删除注释列和记到delete的family_genus
3 按family_genus排序,获取family_genus unique列表
4 提取第一个family_genus所有行到新表,apply列求和
5 遍历剩下的family_genus列表,提取每个family_genus,apply按列求和,添加到新表
6 数据归一化,保存
rare = read.table("rare.txt", header=T, sep="\t", row.names=1, comment.char="")
# 提取genus,删除无用
genus=c()
delete=c()
count = 1
for(i in rare[, length(rare)])
{
tmp = unlist(strsplit(as.character(i), split="; |__"))
# split=";|__" # 注意有无空格
genus = c(genus, paste(tmp[10], tmp[12], sep="_"))
if(is.na(tmp[10]) | tmp[10] == "" | is.na(tmp[12]) | tmp[12] == "")
{
delete = c(delete, count)
}
count = count + 1
}
rare$genus = genus
rare = rare[, -(ncol(rare)-1)] # 删除注释
rare2 = rare[-delete, ] # 删除NA和""
rare2 = rare2[order(rare2$genus, decreasing=F),]
plist = unique(rare2$genus)
# 合并行,相同门
rare3 = data.frame(apply(rare2[rare2$genus==plist[1], c(1:(ncol(rare2)-1))], 2, sum))
colnames(rare3)[1] = plist[1]
for(i in 2:length(plist))
{
tmp = apply(rare2[rare2$genus==plist[i], c(1:(ncol(rare2)-1))], 2, sum)
rare3 = cbind(rare3, tmp)
colnames(rare3)[i] = plist[i]
}
rare3 = data.frame(t(rare3))
# 数据归一化
norm = rare3
sample_sum = apply(rare3, 2, sum)
for(i in 1:nrow(rare3))
{
for(j in 1:ncol(rare3))
{
norm[i, j] = rare3[i, j]/sample_sum[j]
}
}
apply(norm, 2, sum) # 检测
write.csv(norm, file="genus_rare_norm.csv")
网友评论