01. fastq数据下载
数据选自千人基因组计划中英格兰和苏格兰的英国人的全外显子测序,包括三男三女
mkdir fastq
cd fastq
# HG00119
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099967/SRR099967_1.fastq.gz &
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099967/SRR099967_2.fastq.gz &
mv SRR099967_1.fastq.gz HG00119_1.fastq.gz
mv SRR099967_2.fastq.gz HG00119_2.fastq.gz
# HG00133
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099969/SRR099969_1.fastq.gz &
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099969/SRR099969_2.fastq.gz &
mv SRR099969_1.fastq.gz HG00133_1.fastq.gz
mv SRR099969_2.fastq.gz HG00133_2.fastq.gz
# HG00145
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099957/SRR099957_1.fastq.gz &
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099957/SRR099957_2.fastq.gz &
mv SRR099957_1.fastq.gz HG00145_1.fastq.gz
mv SRR099957_2.fastq.gz HG00145_2.fastq.gz
# HG00239
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099958/SRR099958_1.fastq.gz &
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099958/SRR099958_2.fastq.gz &
mv SRR099958_1.fastq.gz HG00239_1.fastq.gz
mv SRR099958_2.fastq.gz HG00239_2.fastq.gz
# HG00258
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099954/SRR099954_1.fastq.gz &
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099954/SRR099954_2.fastq.gz &
mv SRR099954_1.fastq.gz HG00258_1.fastq.gz
mv SRR099954_2.fastq.gz HG00258_2.fastq.gz
# HG00265
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099968/SRR099968_1.fastq.gz &
nohup wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR099/SRR099968/SRR099968_2.fastq.gz &
mv SRR099968_1.fastq.gz HG00265_1.fastq.gz
mv SRR099968_1.fastq.gz HG00265_2.fastq.gz
02.质量控制及过滤
mkdir fastqc
fastqc fastqc --outdir fastqc --threads 16 *.gz
cd fastqc
multiqc *zip#将质控结果整合
03.参考基因组及注释文件下载
GATK 在官网中提供了resource bundle,里面包含了所需要的很多数据。 ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/
#参考基因组文件
nohup wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/Homo_sapiens_assembly38.fasta.gz &
#建立索引
gunzip Homo_sapiens_assembly38.fasta.gz
samtools faidx Homo_sapiens_assembly38.fasta
#dbSNP数据库的变异位点vcf文件
nohup wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/dbsnp_146.hg38.vcf.gz &
nohup wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/dbsnp_146.hg38.vcf.gz.tbi &
#这个是比较准确的人indel数据
nohup wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz &
nohup wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi &
nohup wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/Homo_sapiens_assembly38.dict &
nohup wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/1000G_phase1.snps.high_confidence.hg38.vcf.gz &
nohup wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/1000G_phase1.snps.high_confidence.hg38.vcf.gz.tbi &
nohup wget -c ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/funcotator/funcotator_dataSources.v1.6.20190124s.tar.gz &
网友评论