#先重新生成初步的cds文件
perl /ldfssz1/MS_OP/USER/00.genome/06.GenomeAnnotation/02.gene/01.StructuralAnnotation/00.gene/03.maker/getGene.pl GCA_019343175.1_Ogib_1.0_genomic.gff GCA_019343175.1_Ogib_1.0_genomic.fna > Ogib-raw.cds
#再利用原有的gff得到初步的bed文件,注意 这个的key为ID
python3 -m jcvi.formats.gff bed --type=mRNA --key=ID GCA_019343175.1_Ogib_1.0_genomic.gff -o Ogib-raw.bed
#然后再处理初步的raw.cds
/ldfssz1/MS_OP/USER/seqkit grep -f <(cut -f 4 Ogib-raw.bed) Ogib-raw.cds | /ldfssz1/MS_OP/USER/seqkit seq -i > Ogib.cds
#最后替换掉不要的字符ID
vi Ogib.cds # :%s/rna-gnl|WGS:JAFNEN|//g
vi Ogib.raw.bed # :%s/rna-gnl|WGS:JAFNEN|//g > mv Ogib.raw.bed Ogib.bed
修改前gff:
##gff-version 3
#!gff-spec-version 1.21
#!processor NCBI annotwriter
#!genome-build Ogib_1.0
#!genome-build-accession NCBI_Assembly:GCA_019343175.1
##sequence-region CM033270.1 1 49491545
##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=931172
CM033270.1 Genbank region 1 49491545 . + . ID=CM033270.1:1..49491545;Dbxref=taxon:931172;Name=LG01;collected-by=Frederik Hendrickx;collection-date=2019-03-27;country=Belgium: Tielt-Winge%2C Walenbos;dev-stage=adult;gbkey=Src;genome=chromosome;identified-by=Hendrickx Frederik;isolate=W744_W776;isolation-source=pool of 12 gibbosus (Gg) and 6 tuberosus (gg) offspring of W744 (f%2C Gg) x W776 (m%2C gg) cross;linkage-group=LG01;mol_type=genomic DNA;tissue-type=whole body
CM033270.1 Genbank gene 115832 120915 . + . ID=gene-JTE90_000925;Name=JTE90_000925;gbkey=Gene;gene_biotype=protein_coding;locus_tag=JTE90_000925
CM033270.1 Genbank mRNA 115832 120915 . + . ID=rna-gnl|WGS:JAFNEN|g932.t1;Parent=gene-JTE90_000925;gbkey=mRNA;locus_tag=JTE90_000925;orig_protein_id=gnl|WGS:JAFNEN|g932.t1.CDS4;orig_transcript_id=gnl|WGS:JAFNEN|g932.t1;product=hypothetical protein
CM033270.1 Genbank exon 115832 116130 . + . ID=exon-gnl|WGS:JAFNEN|g932.t1-1;Parent=rna-gnl|WGS:JAFNEN|g932.t1;gbkey=mRNA;locus_tag=JTE90_000925;orig_protein_id=gnl|WGS:JAFNEN|g932.t1.CDS4;orig_transcript_id=gnl|WGS:JAFNEN|g932.t1;product=hypothetical protein
CM033270.1 Genbank exon 116676 117024 . + . ID=exon-gnl|WGS:JAFNEN|g932.t1-2;Parent=rna-gnl|WGS:JAFNEN|g932.t1;gbkey=mRNA;locus_tag=JTE90_000925;orig_protein_id=gnl|WGS:JAFNEN|g932.t1.CDS4;orig_transcript_id=gnl|WGS:JAFNEN|g932.t1;product=hypothetical protein
CM033270.1 Genbank exon 119917 120174 . + . ID=exon-gnl|WGS:JAFNEN|g932.t1-3;Parent=rna-gnl|WGS:JAFNEN|g932.t1;gbkey=mRNA;locus_tag=JTE90_000925;orig_protein_id=gnl|WGS:JAFNEN|g932.t1.CDS4;orig_transcript_id=gnl|WGS:JAFNEN|g932.t1;product=hypothetical protein
CM033270.1 Genbank exon 120634 120915 . + . ID=exon-gnl|WGS:JAFNEN|g932.t1-4;Parent=rna-gnl|WGS:JAFNEN|g932.t1;gbkey=mRNA;locus_tag=JTE90_000925;orig_protein_id=gnl|WGS:JAFNEN|g932.t1.CDS4;orig_transcript_id=gnl|WGS:JAFNEN|g932.t1;product=hypothetical protein
CM033270.1 Genbank CDS 115832 116130 . + 0 ID=cds-KAG8178410.1;Parent=rna-gnl|WGS:JAFNEN|g932.t1;Dbxref=NCBI_GP:KAG8178410.1;Name=KAG8178410.1;gbkey=CDS;locus_tag=JTE90_000925;orig_transcript_id=gnl|WGS:JAFNEN|g932.t1;product=hypothetical protein;protein_id=KAG8178410.1
CM033270.1 Genbank CDS 116676 117024 . + 1 ID=cds-KAG8178410.1;Parent=rna-gnl|WGS:JAFNEN|g932.t1;Dbxref=NCBI_GP:KAG8178410.1;Name=KAG8178410.1;gbkey=CDS;locus_tag=JTE90_000925;orig_transcript_id=gnl|WGS:JAFNEN|g932.t1;product=hypothetical protein;protein_id=KAG8178410.1
修改前cds:
>lcl|CM033270.1_cds_KAG8178410.1_1 [protein=hypothetical protein] [protein_id=KAG8178410.1] [location=join(115832..116130,116676..117024,119917..120174,120634..120915)] [gbkey=CDS]
ATGTACGACGAGGGTGACTACATGAACATCGAGGCGGAGGTGGTGGACCCGCCCCCTCTGGCCACGCCCACCTCTATGCT
CCGCCTACTCCTCAACGACTCCTCCTACAACACCTGCCTCAACGCGTCCGGAAACGCCTCCGACTGCCTCGTCA
>lcl|CM033270.1_cds_KAG8178411.1_2 [protein=hypothetical protein] [protein_id=KAG8178411.1] [location=complement(join(274090..274182,276089..276244,277270..277335,280085..280129,283091..283189,283731..283826))] [gbkey=CDS]
修改中的bed:
CM033270.1 115831 120915 rna-gnl|WGS:JAFNEN|g932.t1 0 +
CM033270.1 274089 283826 rna-gnl|WGS:JAFNEN|g933.t1 0 -
CM033270.1 296836 313992 rna-gnl|WGS:JAFNEN|g934.t1 0 +
CM033270.1 314014 349328 rna-gnl|WGS:JAFNEN|g935.t1 0 +
CM033270.1 392062 406150 rna-gnl|WGS:JAFNEN|g936.t1 0 -
CM033270.1 406800 411410 rna-gnl|WGS:JAFNEN|g937.t1 0 +
CM033270.1 415405 464590 rna-gnl|WGS:JAFNEN|g938.t1 0 -
修改后bed:
CM033270.1 115831 120915 g932.t1 0 +
CM033270.1 274089 283826 g933.t1 0 -
CM033270.1 296836 313992 g934.t1 0 +
CM033270.1 314014 349328 g935.t1 0 +
CM033270.1 392062 406150 g936.t1 0 -
CM033270.1 406800 411410 g937.t1 0 +
CM033270.1 415405 464590 g938.t1 0 -
CM033270.1 415405 464590 g938.t2 0 -
CM033270.1 415405 464590 g938.t3 0 -
CM033270.1 467143 468161 g939.t1 0 -
CM033270.1 487031 490127 g940.t1 0 -
CM033270.1 490836 505647 g941.t1 0 +
CM033270.1 514650 515337 g942.t1 0 +
CM033270.1 568169 568409 g943.t1 0 +
CM033270.1 586980 591160 g944.t1 0 -
修改后cds:
>g10347.t1
ATGGAAATTATAACGGGCTCCTTTGAGGTACTTCTCATTCACCGATTCCCGCATGAGAGG
TGTATGAAGCACAAATCTCTCTCCGATGCCGACGTGGTGTACGGCCACATGGTGTTTACC
CCTTCCCCTTACCACAACGACAGCGTAGGGGGTGGAGGGTGGGAAGAAGATTTGGAGGAA
ATTCTGCGCAAGGAGGGCTGCTTCCACCCGGACGCTGCTTTTCCTAGCCAACTGCACCAC
ATGAACGATCATGACAGCCTAGAGATAGAAGGTGAGAGGAGTCACCTTAAATCTGCTTTA
TACCAGAGAAGAGCTAAACTAATTTGA
>g10348.t1
网友评论