美文网首页
统计fastaq信息,包括Q30、碱基含量信息

统计fastaq信息,包括Q30、碱基含量信息

作者: 南山欧巴 | 来源:发表于2017-07-14 08:57 被阅读0次
    import os,re,sys
    
    infile=file("test.fq.txt")
    
    def qual_stat(qstr):
        q20 = 0
        q30 = 0
        for q in qstr:
            qual = ord(q) - 33# ord 返回对应的ASCII数值
            if qual >= 30:
                q30 += 1
                q20 += 1
            elif qual >= 20:
                q20 += 1
        return q20, q30
    
    arr=[]
    for line in infile:
        line=line.strip()
        arr.append(line)
    j=0
    a,g,c,t,n,dna=0,0,0,0,0,0
    total_count = 0
    q20_count = 0
    q30_count = 0
    
    for i in range(len(arr)):
        if arr[i].startswith("@"):
            j+=1
            #dna+=arr[i+1].count("A")+arr[i+1].count("G")+arr[i+1].count("C")+arr[i+1].count("T")
            dna+=len(arr[i+1])
            a+=arr[i+1].count("A")
            g+=arr[i+1].count("G")
            c+=arr[i+1].count("C")
            t+=arr[i+1].count("T")
            n+=arr[i+1].count("N")
            #total_count += len(arr[i+3])
            q20, q30 = qual_stat(arr[i+3])
            #q20_count += q20
            q30_count += q30
    ap=float(a)*100/float(dna)
    gp=float(g)*100/float(dna)
    cp=float(c)*100/float(dna)
    tp=float(t)*100/float(dna)
    np=float(n)*100/float(dna)
    q30=float(q30_count)*100/float(dna)
    print "Total Reads:",j
    print "Total Bases:",dna
    print "Q30 Bases:",q30_count
    print "q30 percents:",("%.2f" % q30)+"%"
    print "A:",a,("%.2f" % ap)+"%"
    print "G:",g,("%.2f" % gp)+"%"
    print "C:",c,("%.2f" % cp)+"%"
    print "T:",t,("%.2f" % tp)+"%"
    print "N:",n,("%.2f" % np)+"%"
    

    相关文章

      网友评论

          本文标题:统计fastaq信息,包括Q30、碱基含量信息

          本文链接:https://www.haomeiwen.com/subject/avknhxtx.html