-
遇到问题,鼠和人的参考基因组,染色体序列名称一样,需要进行区分,不然和gtf对应时有问题
-
给(fastq,gtf)染色体名前面加上物种名区分
## genome ====
less GRCh38.primary_assembly.genome.fa |grep '>' | head
less GRCm39.primary_assembly.genome.fa |grep '>'| head
sed 's/>/>hg38_/g' GRCh38.primary_assembly.genome.fa > GRCh38_GRCm39_pri_assemby.fa
sed 's/>/>mm10_/g' GRCm39.primary_assembly.genome.fa >> GRCh38_GRCm39_pri_assemby.fa
less GRCh38_GRCm39_pri_assemby.fa |grep '>' | head
less GRCh38_GRCm39_pri_assemby.fa |grep '>' | tail
## gtf ====
gtf_mm10=gencode.vM34.primary_assembly.annotation.gtf
gtf_hg38=gencode.v45.primary_assembly.annotation.gtf
out_gtf=MIX_gencode.vM34_v45_pri.annotation.gtf
cat $gtf_hg38 | grep '^##' > $out_gtf
cat $gtf_mm10 | grep '^##' >> $out_gtf
cat $gtf_hg38 |
grep -v '^##' |
awk -F $'\t' 'BEGIN {OFS = FS} {$1=("hg38_" $1); print}' \
>> $out_gtf
cat $gtf_mm10 |
grep -v '^##' |
awk -F $'\t' 'BEGIN {OFS = FS} {$1=("mm10_" $1); print}' \
>> $out_gtf
cat GRCh38_GRCm39_pri_assemby.fa | awk '$0 ~ ">" {if (NR > 1) {print c;} c=0;printf substr($0,2,100) "\t"; } $0 !~ ">" {c+=length($0);} END { print c; }' > mix_genome_length.txt
![](https://img.haomeiwen.com/i27913461/b8d65917d2377d0a.png)
![](https://img.haomeiwen.com/i27913461/f29cac2b4acc7ceb.png)
网友评论