- 得到了cadidate SNPs list
Scaffold861: 64225
Scaffold44:1265163
Scaffold121: 842037
Scaffold225: 196363
Scaffold266: 463359
Scaffold68: 899972
...
- 获取vcf文件
file=338cand.snps
sed -i 's/\t/:/g' $file
sed -i 's/ /:/g' $file
sed -i 's/:::/:/g' $file
sed -i 's/::/:/g' $file
sed -i 's/:/\t/g' $file
zgrep -F -f $file gatk4.SNP_VF_SV.vcf.gz > 338cands_no_header.vcf
bcftools view -h gatk4.SNP_VF_SV.vcf.gz > header
cat header 338cands_no_header.vcf > 338cands.vcf
bcftools view -H 338cands.vcf|wc -l
grep -v "#" 338cands.vcf|grep Scaffold|wc -l
检查数目是否一致
SNP注释
DIR=/home/mmcui/poolseq/gatk_hard_filtering/howto_filter
VCF=338cands
java -Xmx4g -jar ~/annotation/snpEff/snpEff.jar -v Agla_Btl03082013 $DIR/$VCF.vcf > $VCF.ann.vcf
上下游1000bp序列.png
Scaffold1:1235332-1237332
Scaffold1:4378505-4380505
Scaffold1:4078202-4080202
Scaffold1:1639666-1641666
Scaffold1:1555820-1557820
retrieve flanking region
while read line
do
samtools faidx 210Scaffolds.fasta $line >>flanking_sequences.snps
done < 337snps.positions
网友评论