python生信小练习(三)

作者: 杨亮_SAAS | 来源:发表于2018-02-23 15:26 被阅读25次

    生信菜鸟团的编程练习:

    对FASTQ的操作

    • 5,3段截掉几个碱基
    • 序列长度分布统计
    • FASTQ 转换成 FASTA
    • 统计碱基个数及GC%
      对FASTA的操作
    • 取互补序列
    • 取反向序列
    • DNA to RNA
    • 大小写字母形式输出
    • 每行指定长度输出序列
    • 按照序列长度/名字排序
    • 提取指定ID的序列
    • 随机抽取序列
    def trim(file, terminal5, terminal3):
        fastq = {}
        count = 1
        for line in open(file):
            if count % 4 == 1:           #取第一行作为reads name
                readID = line.strip()
                fastq[readID] = []
            elif count % 4 == 2:         #取第二行作为序列
                seq = line.strip()
                fastq[readID] = seq[terminal5 : -terminal3]           #序列切片操作,截取两端,保留中间序列,并存储为字典
            count += 1
        with open(r'E:\Bioinformatics\Python\practice\PyCharm\practice of biotrainee\trim.txt', 'w') as f:
            for key, value in fastq.items():
                print('{}\n{}'.format(key, value), file = f)
    
    f1 = r'E:\Bioinformatics\Python\practice\chentong\notebook-master\data\test1.fq'
    
    trim(f1, 5, 8)
    
    def readLength(file):
        fastq = {}
        count = 1
        for line in open(file):
            if count % 4 == 1:  # 取第一行作为reads name
                readID = line.strip()
                fastq[readID] = []
            elif count % 4 == 2:  # 取第二行作为序列
                seq = line.strip()
                fastq[readID] = len(seq)  # 序列长度统计,并存储为字典
            count += 1
        for key, value in fastq.items():
            print(value)
    
    
    def fq2fa(file):
        fastq = {}
        count = 1
        for line in open(file):
            if count % 4 == 1:  # 取第一行作为reads name
                readID = line.split(' ')[1:]    #去除@,取第一个空格前字符为ID
                fastq[readID] = []
            elif count % 4 == 2:  # 取第二行作为序列
                seq = line.strip()
                fastq[readID] = seq
            count += 1
        with open(r'E:\Bioinformatics\Python\practice\PyCharm\practice of biotrainee\fq2fa.txt', 'w') as f:
            for key, value in fastq.items():
                print('>{}\n{}'.format(key, value), file = f)
    
    def countGC(file):
        count = 1
        seq = []
        for line in open(file):
            if count % 4 == 2:  # 取第二行作为序列
                seq.append(line.strip())
            count += 1
        seq1 = ''.join(seq)
        gc = 0
        for i in seq1:
            if i == 'G' or i == 'C':
                gc += 1
        print('The number of length is {}'.format(len(seq1)))
        print('GC% is {}%'.format(gc/len(seq1)*100))
    
    def complementary(file):
        fasta = {}
        for line in open(file):
            if line.startswith('>'):
                key = line.strip()
                fasta[key] = []
            else:
                complem = line.strip().replace('A', 't').replace('T', 'a').replace('G', 'c').replace('C', 'g').upper()
                fasta[key].append(complem)
        for key, value in fasta.items():
            print(key)
            value2 = ''.join(value)
            for i in range(0, len(value2), 60):
                print(value2[i: i + 60])
    
    def reverse(file):
        fasta = {}
        for line in open(file):
            if line.startswith('>'):
                key = line.strip()
                fasta[key] = []
            else:
                fasta[key].append(line.strip())
        for key, value in fasta.items():
            print(key)
            rev = ''.join(value)[:: -1]
            for i in range(0, len(rev), 60):
                print(rev[i: i + 60])
    
    def dna2rna(file):
        fasta = {}
        for line in open(file):
            if line.startswith('>'):
                key = line.strip()
                fasta[key] = []
            else:
                seq = list(line.strip())
                for i in range(len(seq)):
                    if seq[i] == 'T':
                        seq[i] = 'U'
                    elif seq[i] == 't':
                        seq[i] = 'u'
                fasta[key].append(''.join(seq))
        for key, value in fasta.items():
            print(key)
            value2 = ''.join(value)
            for i in range(0, len(value2), 60):
                print(value2[i: i + 60])
    
    def upperandlower(file):
        upper = {}
        lower = {}
        for line in open(file):
            if line.startswith('>'):
                key = line.strip()
                upper[key] = []
                lower[key] = []
            else:
                upper[key].append(line.strip().upper())
                lower[key].append(line.strip().lower())
        for key, value in upper.items():
            print(key)
            value2 = ''.join(value)
            for i in range(0, len(value2), 60):
                print(value2[i: i + 60])
        for key, value in lower.items():
            print(key)
            value2 = ''.join(value)
            for i in range(0, len(value2), 60):
                print(value2[i: i + 60])
    
    def sortLength():
        fasta = {}
        for line in open(file):
            if line.startswith('>'):
                key = line.strip()
                fasta[key] = []
            else:
                fasta[key].append(line.strip())
        for key, value in fasta.items():
            seq = ''.join(value)
            fasta[key] = seq
    

    相关文章

      网友评论

      • 不玩手机的蛇佬腔:我借鉴了你的不少方法,在此表示感谢,但是你的有些程序好像并不能达到问题的要求

      本文标题:python生信小练习(三)

      本文链接:https://www.haomeiwen.com/subject/kmsxxftx.html