美文网首页
二代测序:Phred33和Phred64格式判断

二代测序:Phred33和Phred64格式判断

作者: bioshimmer | 来源:发表于2024-01-25 16:47 被阅读0次

    原理:取10000行,求平均碱基质量,来判断格式

    import gzip
    from pickle import bytes_types
    from statistics import mean
    def line_chr_ascii(aline):
        #print(f'line_chr_ascii:{aline}')
        nline = []
        for i in aline:
            nline.append(ord(i))
        return nline
    def main():        
        inputfile = "/data3/Pugionium_cornutum_population/pc-y101-1/3_1_GCCAAT_L008_R1_001.fastq.gz"
        readnum = 0
        qual_list = []
        f = gzip.open(inputfile,'r')
        i = 0
        while i<10001:#取10000行
            line = f.readline().rstrip()
            line = bytes.decode(line)
            #print(line)
            if '@' in line:
                    readnum = readnum+1
                    i = i+1
            elif '+' in line:
                line = f.readline().rstrip()
                line = bytes.decode(line)
                line = line_chr_ascii(line)
                qual_list = qual_list+line
                i = i+2
            else:
                i=i+1
            
        f.close()
        qual_mean = mean(qual_list)
        print(f'read numbers total:{readnum}')
        print(f'qual mean:{qual_mean}')
        print(f'Phred 33:{qual_mean-33}')
        print(f'Phred 64:{qual_mean-64}')
        readnum = None
        qual_list = None
    
    image.png

    很明显是phred33格式的

    相关文章

      网友评论

          本文标题:二代测序:Phred33和Phred64格式判断

          本文链接:https://www.haomeiwen.com/subject/abjvodtx.html