1. 注释annovar
mkdir annovar && cd annovar
sample="7E5241"
#/home/vip2/7E5241.chr1_2_raw.vcf
#/home/qmcui/software/ANNOVAR/annovar/convert2annovar.pl
/home/qmcui/software/ANNOVAR/annovar/convert2annovar.pl -format vcf4old /home/vip2/7E5241.chr1_2_raw.vcf >${sample}.annovar
#做人类的比对,annovar 中默认有hg38,所以用
sample="7E5241"
/home/qmcui/software/ANNOVAR/annovar/annotate_variation.pl -buildver hg38 --outfile ${sample}.anno 7E5241.annovar /home/qmcui/software/ANNOVAR/annovar/humandb





2. IGV可视化
2.1 建立索引
cd project/wes/6.gatk_bam/
ls *fixed.bam
nohup ls *fixed.bam|xargs -i samtools index {} &
2.2 目标基因选取
由于背景知识薄弱,不知道chr1,chr2上对应有哪些基因,故找到gtf注释文件,选取一段序列外显子
cd ~/project/rna/gtf/
zcat gencode.v29.annotation.gtf.gz|less -SN
#line60-line100坐标 chr1:62916-120932
2.3 生成bam文件
samtools view -h 7E5241_marked_fixed.bam chr1:62916-120932 |samtools sort -o 7E5241_marked_fixed.unkown.bam
2.4 IGV可视化
将生成的bam以及对应的bai.bam 下载到本地,IGV中选择hg38,chr1,chr1:62916-120932


3. samtools 找变异(有报错,未找到原因)
samtools mpileup -ugf /home/qmcui/database/reference/index/bwa/hg38.fa ../6.gatk_bam/7E5241_bqsr.bam | bcftools call -vmO z -o out.raw.vcf.gz




4. 找变异
4.1 gvcf文件
3个样本循环(一个样本1.5h的耗时
)
**注意**
只是把样本名称放入循环,并不是样本,所以在input的时候要放入路径
sample是不同的样本,可以把样本写入一个txt文档中
cat bq_bam.txt
7E5239_bqsr.bam
7E5240_bqsr.bam
7E5241_bqsr.bam
^C
cat >bq_bam.sh
ref="/home/qmcui/database/reference/index/hisat/hg38/hg38.fa"
snp="/home/qmcui/dbsnp_146.hg38.vcf.gz"
cat bq_bam.txt| while read sample
do gatk --java-options "-Xmx10G -Djava.io.tmpdir=./" HaplotypeCaller -ERC GVCF -R $ref -I ./bqsr.bam/${sample} --dbsnp $snp -O ${sample}_raw.gvcf 1>${sample}.gvcf.log 2>&1
done
nohup bash bq_bam.sh &
4.2 合并gvcf,生成按染色体来找变异
#先将染色体按条数分开
seq 22|while read id;do echo chr${id};done >bed.txt
cat >>bed.txt
chrX
chrY
chrM
#合并样本的gvcf
cat >gvcf.sh
ref="/home/qmcui/database/reference/index/hisat/hg38/hg38.fa"
snp="/home/qmcui/dbsnp_146.hg38.vcf.gz"
cat bed.txt|while read bed;do gatk --java-options "-Xmx20G -Djava.io.tmpdir=./" GenomicsDBImport -L $bed -R $ref -V 7E5239_bqsr.bam_raw.gvcf -V 7E5240_bqsr.bam_raw.gvcf -V 7E5241_bqsr.bam_raw.gvcf --genomicsdb-workspace-path gvcfs_${bed}.db
gatk --java-options "-Xmx20G -Djava.io.tmpdir=./" GenotypeGVCFs \ -R $ref -V gendb://gvcfs_${bed}.db --dbsnp $snp -O final_${bed}.vcf;done
nohup bash gvcf.sh &
#合并gvcf
gatk MergeVcfs -I final_chr2.vcf -I final_chr1.vcf -O raw.combine.vcf.gz
网友评论