美文网首页走进转录组
铁皮石斛上游分析

铁皮石斛上游分析

作者: 白米饭睡不醒 | 来源:发表于2021-03-17 09:38 被阅读0次

一、从NCBI下载参考基因组

下载地址:https://www.ncbi.nlm.nih.gov/

(铁皮石斛 拉丁文名: Dendrobium catenatum) 参考基因组少时 选择组装参考基因组(老师示例用的asswmbly即组装参考基因组)

铁皮石斛基因组https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/605/985/GCF_001605985.2_ASM160598v2/

下载

#创建并进入目录
~/database/genome/ncbi/Dendrobium-catenatum/GCF_001605985

#下载基因组序列
wget  -c https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/605/985/GCF_001605985.2_ASM160598v2/GCF_001605985.2_ASM160598v2_rna.fna.gz

#下载基因组注释文件
wget -c 
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/605/985/GCF_001605985.2_ASM160598v2/GCF_001605985.2_ASM160598v2_genomic.gff.gz

二、数据过滤

#定义文件夹
位置  ~/project/Dendrobium-Trans/data/cleandata/trim_galore
rawdata=~/project/Dendrobium-Trans/data/rawdata/fastq/
cleandata=~/project/Dendrobium-Trans/data/cleandata/trim_galore

#单个样本过滤
trim_galore --phred33 -q 20 --length 36 --stringency 3 --fastqc --paired --max_n 3 -o $cleandata $rawdata/GPR210305-17-ck_BKDL210012463-1a-AK1583-AK4261_1.fq.gz $rawdata/GPR210305-17-ck_BKDL210012463-1a-AK1583-AK4261_2.fq.gz

trim_galore --phred33 -q 20 --length 36 --stringency 3 --fastqc --paired --max_n 3 -o $cleandata $rawdata/GPR210305-17-VI_BKDL210012463-1a-AK1584-N502_1.fq.gz $rawdata/GPR210305-17-VI_BKDL210012463-1a-AK1584-N502_2.fq.gz

三、数据比对

#解压基因组序列文件
位置  ~/database/genome/ncbi/Dendrobium-catenatum/GCF_001605985
gzip -d GCF_001605985.2_ASM160598v2_rna.fna.gz

#Hisat2构建索引
位置 ~/database/genome/ncbi/Dendrobium-catenatum/GCF_001605985
hisat2-build GCF_001605985.2_ASM160598v2_rna.fna  GCF_001605985.2_ASM160598v2_rna.suoyin

#定义输入输出文件夹
位置  ~/project/Dendrobium-Trans/mapping/Hisat2
index=/home/fanzhiyu_31946012_cyl_150/jiyinzu/fna/GCF_001605985.2_suoyin
inputdir=/home/fanzhiyu_31946012_cyl_150/project/Dendrobium-Trans/data/cleandata/trim_galore
outdir=/home/fanzhiyu_31946012_cyl_150/project/Dendrobium-Trans/mapping/Hisat2/

hisat2 -p 10  -x  ${index} -1 ${inputdir}/GPR210305-17-ck_BKDL210012463-1a-AK1583-AK4261_1_val_1.fq.gz -2 ${inputdir}/GPR210305-17-ck_BKDL210012463-1a-AK1583-AK4261_2_val_2.fq.gz  -S ${outdir}/GPR210305-17-ck.Hisat_aln.sam


vim ha.sh
nohup bash ha.sh &
cat nohup.out 

index=/home/fanzhiyu_31946012_cyl_150/jiyinzu/fna/GCF_001605985.2_suoyin
inputdir=/home/fanzhiyu_31946012_cyl_150/project/Dendrobium-Trans/data/cleandata/trim_galore
outdir=/home/fanzhiyu_31946012_cyl_150/project/Dendrobium-Trans/mapping/Hisat2/

hisat2 -p 10  -x  ${index} -1 ${inputdir}/GPR210305-17-VI_BKDL210012463-1a-AK1584-N502_1_val_1.fq.gz -2 ${inputdir}/GPR210305-17-VI_BKDL210012463-1a-AK1584-N502_2_val_2.fq.gz  -S ${outdir}/GPR210305-17-VI.Hisat_aln.sam


# sam转bam(同时排序)
samtools sort -@ 10 -o GPR210305-17-VI.Hisat_aln.sorted.bam GPR210305-17-VI.Hisat_aln.sam

samtools sort -@ 10 -o GPR210305-17-ck.Hisat_aln.sorted.bam GPR210305-17-ck.Hisat_aln.sam


#对bam文件建索引
samtools index GPR210305-17-VI.Hisat_aln.sorted.bam GPR210305-17-VI.Hisat_aln.sorted.bam.bai

samtools index GPR210305-17-ck.Hisat_aln.sorted.bam GPR210305-17-ck.Hisat_aln.sorted.bam.bai


#数据定量
位置 ~/project/Dendrobium-Trans/expression/featurecounts

## 定义输入输出文件夹
gtf=/home/fanzhiyu_31946012_cyl_150/jiyinzu/gtf/GCF_001605985.2_ASM160598v2_genomic.gtf.gz
inputdir=/home/fanzhiyu_31946012_cyl_150/project/Dendrobium-Trans/mapping/Hisat2

# featureCounts对bam文件进行计数
featureCounts -T 10 -p -t exon -g gene_id -a $gtf -o all.id.txt $inputdir/*.sorted.bam

# 对定量结果质控
multiqc all.id.txt.summary

# 得到表达矩阵
cat all.id.txt | cut -f1,7- > counts.txt


#整理
less -S all.id.txt
sed -i 's#/home/fanzhiyu_31946012_cyl_150/project/Dendrobium-Trans/mapping/Hisat2/##g' all.id.txt
sed -i 's#.Hisat_aln.sorted.bam##g' all.id.txt

less -S all.id.txt |grep -v '#' >count.xls
less -S count.xls
less -S count.xls |cut -f 1,7- >count1.xls
less -S count1.xls |column -t |less -S


相关文章

网友评论

    本文标题:铁皮石斛上游分析

    本文链接:https://www.haomeiwen.com/subject/fspnqltx.html