###Get the indels for vcf this is now the bed files
#!/bin/bash
ml bedops
data_dir=yourdir/VariantFiltration
output_dir=yourdir/Indel_vcf
snvs_dir=yourdir/Indel_vcf/snvs
insertions_dir=yourdir/insertions
deletions_dir=yourdir/deletions
mkdir -p $output_dir
mkdir -p $snvs_dir
mkdir -p $insertions_dir
mkdir -p $deletions_dir
ls *.vcf | while read i ;
do
vcf2bed --snvs < $data_dir/${i} > $snvs_dir/${i%_VariantFiltration.vcf*}_snvs.vcf
vcf2bed --insertions < $data_dir/${i}> $insertions_dir/${i%_VariantFiltration.vcf*}_insertions.vcf
vcf2bed --deletions < $data_dir/${i} > $deletions_dir/${i%_VariantFiltration.vcf*}_deletions.vcf
cat $insertions_dir/${i%_VariantFiltration.bed*}_insertions.vcf $deletions_dir/${i%_VariantFiltration.bed*}_deletions.vcf > $output_dir/${i%_VariantFiltration.bed*}_all_indel.bed
done
#############Get gene names form the location
###Get the bed file for Long non-coding RNA gene annotation (Genecode V30)
wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_30/gencode.v30.long_noncoding_RNAs.gff3.gz
#get the bed file for the lncRNAs exon region
perl -alne '{print if $F[2] eq "exon"}' gencode.v30.long_noncoding_RNAs.gff3 |cut -f 1,4,5 > lncRNA_exonV30.bed
#get exon gtf
perl -alne '{print if $F[2] eq "exon"}' gencode.v30.annotation.gtf >gencode.v30.annotation.exon.gtf
#or
#gtf2bed < annotations.gtf | grep 'exon' - | bedops --element-of 1 tags.bed - > tags_overlapping_exons.bed
#gtf2bed --do-not-sort < gencode.v30.annotation.exon.gtf > gencode.v30.annotation.exon.bed
gtf2bed < gencode.v30.annotation.exon.gtf > gencode.v30.annotation.exon.bed
#!/bin/bash
ml java
ml bedops
bed_dir=yourdir/hg38/gencode.v30.annotation.exon.bed
soft_dir=yourdir/biosoft/snpEff_latest_core/snpEff/SnpSift.jar
output_dir=yourdir/Indel_vcf/all_indels_annotation
data_dir=yourdir/Indel_vcf/all_indels
tmp_dir=$output_dir/tmp
mkdir -p $output_dir
mkdir -p $tmp_dir
ls *.vcf| while read i ;
do
cat $data_dir/${i} | java -jar $soft_dir intervals $bed_dir | awk -v OFS="\t" '{print $1,$2,$3}' > $tmp_dir/${i%_all_indel.vcf*}_all_indels_tmp.bed
bedtools intersect -a $tmp_dir/${i%_all_indel.vcf*}_all_indels_tmp.bed -b $gtf_dir -wb | awk -v OFS="\t" '{print $1,$2,$3,$6,$13,$15,$17,$19}' > test.txt
done
网友评论