美文网首页ggplot集锦
R:从otu表中提取门、属丰度表

R:从otu表中提取门、属丰度表

作者: 胡童远 | 来源:发表于2020-10-16 09:51 被阅读0次

    导读

    qiime2会直接给出各个分类水平的丰度表,用R语言也可以从otu表中抽提出各个水平的丰度表。这次仅分享代码,不模拟数据了。

    一、数据

    一个常见的OTU表

    二、抽提门丰度表

    代码思路:
    1 strsplit后,phylum不是na也不是""则记下,否则记行数到delete
    2 添加phylum列,删除注释列和记到delete的phylum
    3 按Phylum排序,获取Phylum unique列表
    4 提取第一个phylum所有行到新表,apply列求和
    5 遍历剩下的phylum列表,提取每个phylum,apply按列求和,添加到新表
    6 数据归一化,保存

    rare = read.table("rare.txt", header=T, sep="\t", row.names=1, comment.char="")
    
    # 提取phylum,删除无用
    phylum=c()
    delete=c()
    count = 1
    for(i in rare[, length(rare)])
    {
        tmp = unlist(strsplit(as.character(i), split="; |__"))
        phylum = c(phylum, tmp[4])
        if(is.na(tmp[4]) | tmp[4] == "")
        {
            delete = c(delete, count)
        }
        count = count + 1
    }
    
    rare$phylum = phylum
    rare = rare[, -(ncol(rare)-1)]  # 删除注释
    rare2 = rare[-delete, ]  # 删除NA和""
    rare2 = rare2[order(rare2$phylum, decreasing=F),]
    plist = unique(rare2$phylum)
    
    # 合并行,相同门
    rare3 = data.frame(apply(rare2[rare2$phylum==plist[1], c(1:(ncol(rare2)-1))], 2, sum))
    colnames(rare3)[1] = plist[1]
    for(i in 2:length(plist))
    {
        tmp = apply(rare2[rare2$phylum==plist[i], c(1:(ncol(rare2)-1))], 2, sum)
        rare3 = cbind(rare3, tmp)
        colnames(rare3)[i] = plist[i]
    }
    rare3 = data.frame(t(rare3))
    
    # 数据归一化
    norm = rare3
    sample_sum = apply(rare3, 2, sum)
    for(i in 1:nrow(rare3))
    {
        for(j in 1:ncol(rare3))
        {
            norm[i, j] = rare3[i, j]/sample_sum[j]
        }
    }
    apply(norm, 2, sum)  # 检测
    write.csv(norm, file="phylum_rare_norm.csv")
    

    三、抽提family_genus丰度表

    代码思路:
    1 strsplit后,family genus不是na也不是""则记下,否则记行数到delete
    2 添加family_genus列,删除注释列和记到delete的family_genus
    3 按family_genus排序,获取family_genus unique列表
    4 提取第一个family_genus所有行到新表,apply列求和
    5 遍历剩下的family_genus列表,提取每个family_genus,apply按列求和,添加到新表
    6 数据归一化,保存

    rare = read.table("rare.txt", header=T, sep="\t", row.names=1, comment.char="")
    
    # 提取genus,删除无用
    genus=c()
    delete=c()
    count = 1
    for(i in rare[, length(rare)])
    {
        tmp = unlist(strsplit(as.character(i), split="; |__"))
        # split=";|__"  # 注意有无空格
        genus = c(genus, paste(tmp[10], tmp[12], sep="_"))
        if(is.na(tmp[10]) | tmp[10] == "" | is.na(tmp[12]) | tmp[12] == "")
        {
            delete = c(delete, count)
        }
        count = count + 1
    }
    
    rare$genus = genus
    rare = rare[, -(ncol(rare)-1)]  # 删除注释
    rare2 = rare[-delete, ]  # 删除NA和""
    rare2 = rare2[order(rare2$genus, decreasing=F),]
    plist = unique(rare2$genus)
    
    # 合并行,相同门
    rare3 = data.frame(apply(rare2[rare2$genus==plist[1], c(1:(ncol(rare2)-1))], 2, sum))
    colnames(rare3)[1] = plist[1]
    for(i in 2:length(plist))
    {
        tmp = apply(rare2[rare2$genus==plist[i], c(1:(ncol(rare2)-1))], 2, sum)
        rare3 = cbind(rare3, tmp)
        colnames(rare3)[i] = plist[i]
    }
    rare3 = data.frame(t(rare3))
    
    # 数据归一化
    norm = rare3
    sample_sum = apply(rare3, 2, sum)
    for(i in 1:nrow(rare3))
    {
        for(j in 1:ncol(rare3))
        {
            norm[i, j] = rare3[i, j]/sample_sum[j]
        }
    }
    apply(norm, 2, sum)  # 检测
    write.csv(norm, file="genus_rare_norm.csv")
    

    相关文章

      网友评论

        本文标题:R:从otu表中提取门、属丰度表

        本文链接:https://www.haomeiwen.com/subject/wbyopktx.html