软件1、cutadapt
input=test.fq.gz
mkdir -p cutadapt
cutadapt_input=$input
cutadapt_out=cutadapt/trimed.fastq.gz
interleaved=--interleaved
cutadapt $interleaved -a AGATCGGAAGAGC -A AGATCGGAAGAGC -q 30 -m 20 --trim-n -O 10 -o $cutadapt_out $cutadapt_input
软件2、megahit
input_fa=$cutadapt_out
assembly_out=assembly_out
megahit --12 $input_fa --k-max 149 --max-tip-len 200 --min-contig-len 300 -o $assembly_out
软件3、MetaGeneMark
mkdir -p predict_gene
input_dir=assembly_out
predict_gene_out=predict_gene
model_file=../MetaGeneMark_linux_64/mgm/MetaGeneMark_v1.mod
cp ../MetaGeneMark_linux_64/gm_key ~/.gm_key
gmhmmp -d -f G -m $model_file -o $predict_gene_out/out.gff -A $predict_gene_out/final.prot.fa -D $predict_gene_out/final.nucl.fa $input_dir/final.contigs.fa
软件4、cd-hit
mkdir -p unigene_set
python filter_predict_nucl.py $predict_gene_out/final.nucl.fa $predict_gene_out/filter_final.nucl.fa #自写脚本
cd-hit -i $predict_gene_out/filter_final.nucl.fa -o unigene_set/unigene.fa -c 0.95 -aS 0.9 -d 0 -M 10000 -T 0
软件5、diamond
mkdir -p function_anno
#数据库文件需自行下载
database_eggNOG=.../metagenomics/function/database/e5.proteomes
diamond_eggNOG=function_anno/unigene.e5
database_CARD=.../metagenomics/function/database/CARD/CARD.protein
diamond_CARD=function_anno/unigene.CARD
database_CAZy=.../metagenomics/function/database/CAZy/CAZyDB.07202017
diamond_CAZy=function_anno/unigene.CAZyDB
database_PHI=.../metagenomics/function/database/PHI/phi-base_current
diamond_PHI=function_anno/unigene.phi
diamond blastx -d $database_eggNOG -q unigene_set/unigene.fa -o $diamond_eggNOG --evalue 0.00001
diamond blastx -d $database_CARD -q unigene_set/unigene.fa -o $diamond_CARD --evalue 0.00001
diamond blastx -d $database_CAZy -q unigene_set/unigene.fa -o $diamond_CAZy --evalue 0.00001
diamond blastx -d $database_PHI -q unigene_set/unigene.fa -o $diamond_PHI --evalue 0.00001
网友评论