原理:取10000行,求平均碱基质量,来判断格式
import gzip
from pickle import bytes_types
from statistics import mean
def line_chr_ascii(aline):
#print(f'line_chr_ascii:{aline}')
nline = []
for i in aline:
nline.append(ord(i))
return nline
def main():
inputfile = "/data3/Pugionium_cornutum_population/pc-y101-1/3_1_GCCAAT_L008_R1_001.fastq.gz"
readnum = 0
qual_list = []
f = gzip.open(inputfile,'r')
i = 0
while i<10001:#取10000行
line = f.readline().rstrip()
line = bytes.decode(line)
#print(line)
if '@' in line:
readnum = readnum+1
i = i+1
elif '+' in line:
line = f.readline().rstrip()
line = bytes.decode(line)
line = line_chr_ascii(line)
qual_list = qual_list+line
i = i+2
else:
i=i+1
f.close()
qual_mean = mean(qual_list)
print(f'read numbers total:{readnum}')
print(f'qual mean:{qual_mean}')
print(f'Phred 33:{qual_mean-33}')
print(f'Phred 64:{qual_mean-64}')
readnum = None
qual_list = None
image.png
很明显是phred33格式的
网友评论