美文网首页
根据id提取fasta序列

根据id提取fasta序列

作者: 纵纵纵小鸮 | 来源:发表于2022-07-27 09:05 被阅读0次

    def get_trans(intrans, outtrans):

        with open(intrans, "r") as myfile:

            chr19_name = []

            database = {}

            f = myfile.readlines()

            for line in f:

                if line.startswith('>'):

                    lin = line.strip().split(" ")

                    chr = lin[0].split("-")[1]

                    keys = line.lstrip('>').strip()

                    database[keys] = []

                    if chr == "chr19":

                        chr19_name.append(keys)

                else:

                    database[keys].append(line.strip())

            print(len(chr19_name))

        with open(outtrans, "w") as outfile:

            for key in database.keys():

                if key in chr19_name:

                    keyname = ">" + key + "\n"

                    fa = "\n".join(database[key]) +"\n"

                    outfile.write(keyname)

                    outfile.write(fa)

    根据染色体提取gff文件:

    def get_gff(ingff, outgff):

        with open(ingff, "r") as mygff:

            with open(outgff, "w") as myout:

                gff_li = []

                f = mygff.readlines()

                for line in f:

                    lin = line.strip().split("\t")

                    name = lin[0]

                    if name == "chr19":

                        gff_li.append(line)

                        myout.write(line)

            myout.close()

            print(len(gff_li))

    相关文章

      网友评论

          本文标题:根据id提取fasta序列

          本文链接:https://www.haomeiwen.com/subject/oeaairtx.html