美文网首页
2023-07-23基因组可视化

2023-07-23基因组可视化

作者: 麦冬花儿 | 来源:发表于2023-08-17 16:19 被阅读0次
## Installing Circos (http://circos.ca/)
#wget http://circos.ca/distribution/circos-0.69-6.tgz -P ~/software/
tar zxf ~/software/circos-0.69-6.tgz -C /opt/biosoft/
cd /opt/biosoft/circos-0.69-6/bin
./circos -modules | perl -e 'while (<>) { print "$1 " if m/missing\s+(\S+)/ }' | perl -p -e 's/^/sudo cpan -i /; s/$/\n/' | sh
./circos -modules
./gddiag
./circos --help
cd ../example/
../bin/circos -conf etc/circos.conf
echo 'PATH=$PATH:/opt/biosoft/circos-0.69-6/bin/' >> ~/.bashrc
source ~/.bashrc
#sudo tar zxf ~/software/usr.tar.gz -C / #培训所有的系统相关的软件都在该压缩包里,即使重装系统,重新将该压缩包重新解压一下就行。同时记得修改/etc/selinux/config 下的SELINUX=disabled
[train@MiWiFi-R3P-srv bin]$ sudo cat /etc/selinux/config 

# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
#     enforcing - SELinux security policy is enforced.
#     permissive - SELinux prints warnings instead of enforcing.
#     disabled - No SELinux policy is loaded.
# See also:
# https://docs.fedoraproject.org/en-US/quick-docs/getting-started-with-selinux/#getting-started-with-selinux-selinux-states-and-modes
#
# NOTE: In earlier Fedora kernel builds, SELINUX=disabled would also
# fully disable SELinux during boot. If you need a system with SELinux
# fully disabled instead of SELinux running with no policy loaded, you
# need to pass selinux=0 to the kernel command line. You can use grubby
# to persistently set the bootloader to boot with selinux=0:
#
#    grubby --update-kernel ALL --args selinux=0
#
# To revert back to SELinux enabled:
#
#    grubby --update-kernel ALL --remove-args selinux
#
SELINUX=disabled
# SELINUXTYPE= can take one of these three values:
#     targeted - Targeted processes are protected,
#     minimum - Modification of targeted policy. Only selected processes are protected.
#     mls - Multi Level Security protection.
SELINUXTYPE=targeted




## Installing IGV (http://software.broadinstitute.org/software/igv/)
#wget https://data.broadinstitute.org/igv/projects/downloads/2.16/IGV_Linux_2.16.1_WithJava.zip -P ~/software/
unzip ~/software/IGV_Linux_2.16.1_WithJava.zip -d /opt/biosoft/
echo 'PATH=$PATH:/opt/biosoft/IGV_Linux_2.16.1/' >> ~/.bashrc
source ~/.bashrc
mkdir -p ~/igv/genomes/
cp ~/software/hg18.genome ~/igv/genomes/


# Installing GBrowse (http://gmod.org/wiki/GBrowse | https://github.com/GMOD/GBrowse) 使用root用户运行下面的命令进行安装:
#wget https://github.com/GMOD/GBrowse/archive/release-2.56.tar.gz -O ~/software/GBrowse-release-2.56.tar.gz
tar zxf /home/train/software/GBrowse-release-2.56.tar.gz -C /opt/biosoft/
cd /opt/biosoft/GBrowse-release-2.56/
perl Makefile.PL
# 程序会检测GBrowse2所依赖的Perl模块是否就绪,若必须依赖的Perl模块全部就绪,才能成功安装GBrowse2
./Build test
./Build install
systemctl restart httpd.service

# 若缺少部分Perl模块,则使用如下命令自动安装
./Build installdeps
# 自动安装一般会由于各种原因而失败。推荐进行如下操作:
# (1)注意安装perl模块,推荐优先设置网速快的CPAN源,使用root用户执行下面的命令,使用国内速度较快的aliyun和163的CPAN源。
#perl -p -i -e "s#urllist.*#urllist' => [q[http://mirrors.aliyun.com/CPAN/], q[http://mirrors.163.com/CPAN/]],#" /usr/share/perl5/CPAN/Config.pm
# (2)难点在于安装BioPerl会失败。需要LibGD支持才能安装BioPerl成功,安装LibGD流程请参考01.CentOS_System_Configuration/system_software_installation.sh部分。
# 单独安装一些perl模块
# cpan -i BioPerl
# cpan -fi Bio::DB::GFF
# cpan -i Bio::DB::SeqFeature
# cpan -i Bio::Graphics
# cpan -i GD::SVG
# cpan -fi Bio::Das
# (3)最后剩下 Bio::DB::BigFile 和 Bio::DB::Sam 模块较难安装。他们的安装分别需要依赖 kent 和 samtools 。
# Installing kent
#wget http://hgdownload.cse.ucsc.edu/admin/jksrc.v449.zip -P ~/software/
sudo dnf --disablerepo=* --enablerepo=media-* -y install libuuid libuuid-devel
unzip /home/train/software/jksrc.v449.zip -d /opt/biosoft/
cd /opt/biosoft/kent/src/
export MACHTYPE=x86_64
mkdir -p ~/bin/x86_64
make CXXFLAGS=-fPIC CFLAGS=-fPIC CPPFLAGS=-fPIC -j 4
cp ./lib/x86_64/jkweb.a ./lib/
mv ~/bin/x86_64/ ../bin/
mv ~/bin/scripts/ ../
# Installing samtools
#wget https://sourceforge.net/projects/samtools/files/samtools/0.1.19/samtools-0.1.19.tar.bz2 -P ~/software/
tar jxf /home/train/software/samtools-0.1.19.tar.bz2 -C /opt/biosoft/
cd /opt/biosoft/samtools-0.1.19/
make clean
make CXXFLAGS=-fPIC CFLAGS=-fPIC CPPFLAGS=-fPIC
# 再次进行安装perl模块
cd /opt/biosoft/GBrowse-release-2.56
./Build installdeps
# 输入 /opt/biosoft/kent/src 路径来安装 Bio::DB::BigFile
# 输入 /opt/biosoft/samtools-0.1.19/ 路径来安装 Bio::DB::Sam
# 安装Bio::BigFile和Bio::DB::Sam模块时可能会由于编译警告而失败,则需要手动安装该模块,修改其Build.PL内容,使-Wformat=1,则能编译成功。
# 最后安装 GBrowse
./Build test
# test步骤继续失败的原因可能是BioPerl安装有问题。
./Build install
systemctl restart httpd.service

# 若安装失败,需要重新安装,则需要删除GBrowse相关的全部文件,然后重新安装
# /bin/rm -rf /etc/gbrowse2 /var/www/html/gbrowse2 /var/tmp/gbrowse2 /var/lib/gbrowse2 /var/www/cgi-bin/gb2 /etc/httpd/conf.d/z_gbrowse2.conf /opt/biosoft/GBrowse-2.56/ /etc/httpd/conf.d/gbrowse2.conf /etc/httpd/modules


## Installing JBrowse (http://jbrowse.org/)
#wget https://github.com/GMOD/jbrowse-components/releases/download/v2.6.1/jbrowse-web-v2.6.1.zip -P ~/software
#wget https://github.com/GMOD/jbrowse-components/releases/download/v2.6.1/jbrowse-desktop-v2.6.1-linux.AppImage -P ~/software
cp ~/software/jbrowse-desktop-v2.6.1-linux.AppImage ~/.local/bin/jbrowse
chmod 755 ~/.local/bin/jbrowse

unzip ~/software/jbrowse-web-v2.6.1.zip -d /opt/biosoft/JBrowse-2.6.1/
cd /opt/biosoft/JBrowse-2.6.1/
cp /etc/httpd/conf/httpd.conf ./
echo '
Alias /JBrowse "/opt/biosoft/JBrowse-2.6.1/"
<Directory "/opt/biosoft/JBrowse-2.6.1/">
    Options MultiViews ExecCGI Indexes FollowSymlinks
    AllowOverride None
    Order allow,deny
    Allow from all
</Directory>' >> httpd.conf
sudo mv httpd.conf /etc/httpd/conf/httpd.conf
sudo systemctl restart httpd.service


## Installing webApollo (http://gmod.org/wiki/WebApollo | https://github.com/GMOD/Apollo)
#wget https://github.com/GMOD/Apollo/archive/2.3.1.tar.gz -O ~/software/Apollo-2.3.1.tar.gz
# 安装 postgresql 数据库
sudo dnf install postgresql postgresql-devel
# 初始化 postgresql 数据库
sudo postgresql-setup initdb
# 启动 postgresql 数据库并设置开机启动 posgresql 服务
sudo systemctl start postgresql.service
sudo systemctl enable postgresql.service
# 修改 postgresql 配置文件,使用用户train具有通过密码连接数据库的权限
sudo cp /var/lib/pgsql/data/pg_hba.conf ./
sudo chown -R train:train pg_hba.conf
perl -p -i -e 'unless (m/^#/) { s/ident/trust/; s/peer/trust/; }' pg_hba.conf
sudo mv pg_hba.conf /var/lib/pgsql/data/pg_hba.conf
sudo chown -R postgres:postgres /var/lib/pgsql/data/pg_hba.conf
sudo chmod 600 /var/lib/pgsql/data/pg_hba.conf
sudo systemctl restart postgresql.service

# 安装 perl 模块
sudo cpan -i BioPerl JSON JSON::XS PerlIO::gzip Heap::Simple Heap::Simple::XS Devel::Size Hash::Merge Bio::GFF3::LowLevel::Parser  Digest::Crc32  Cache::Ref::FIFO File::Next
# 安装 Web Apollo
tar zxf ~/software/WebApollo-2014-04-03.tgz -C /opt/biosoft/
cd /opt/biosoft/WebApollo-2014-04-03/
tar zxf ~/software/apache-tomcat-7.0.57.tar.gz 
cd apache-tomcat-7.0.57/
# 修改 Tomcat 7 的报错设置
perl -p -i -e 's/(autoDeploy=.*)>/$1\n            errorReportValveClass="org.bbop.apollo.web.ErrorReportValve">/' conf/server.xml
# 修改 Tomcat 7 的内存设置,推荐设置 heap size 至少 1G, permgen size 至少 256M,基因组越大,设置越大。
perl -p -i -e 's/cygwin=false/CATALINA_OPTS="-Xms512m -Xmx1g -XX:+CMSClassUnloadingEnabled -XX:+CMSPermGenSweepingEnabled -XX:+UseConcMarkSweepGC -XX:MaxPermSize=256m"\ncygwin=false/' bin/catalina.sh
# Circos 画图
 mkdir -p /home/train/12.genome_visualization/circos
cd /home/train/12.genome_visualization/circos
ln -s ~/00.incipient_data/data_for_genome_assembling/assemblies_of_Malassezia_sympodialis/Malassezia_sympodialis.genome_V01.fasta genome.fasta
ln -s ~/00.incipient_data/data_for_gene_prediction_and_RNA-seq/Malassezia_sympodialis_V01.geneModels.gff3 genome.gff3

# 准备用于circos画图的数据文件
# 通过基因组序列,生成染色体组型文件
circos_create_karyotype_by_genome.pl genome.fasta > karyotype.txt

# 通过基因组序列,生成GC含量结果文件,用于画直方图
circos_create_gc_histogram.pl genome.fasta 10000 > gc.histogram.txt

# 生成SNP密度结果文件,用于画直方图
VCF_get_variants_density_for_circos.pl ~/00.incipient_data/data_for_variants_calling/variants.vcf genome.fasta > variant_density.histogram_for_circos.txt

# 通过转录组数据,得到基因表达量的结果文件,用于画热图
#cut -f 1,2,5 ~/09.RNA-seq_analysis_by_cufflinks/cuffnorm/genes.TPM_TMM.matrix > genes.TPM_TMM.matrix
cp ~/00.incipient_data/data_for_circos/genes.TPM_TMM.matrix ./
circos_create_expression_heatmap.pl genes.TPM_TMM.matrix genome.gff3 > gene_expression.heatmap.txt

# 通过基因组自身比较,用于画links图
makeblastdb -in genome.fasta -dbtype nucl -title genome -parse_seqids -out genome 
blastn -query genome.fasta -db genome -out blast.out -evalue 1e-5 -outfmt 6 -num_threads 8
# 计算耗时~2min。
perl -e 'while (<>) { @_ = split /\t/; print if $_[6] ne $_[8] && $_[2] >= 90 && $_[3] >= 1000 }' blast.out | sort -k 1.13n -k 7n > similarity.txt
perl -e 'while (<>) { @_ = split /\t/; if ($_[3] >= 3000) { print } else { print STDERR } }' similarity.txt > similarity_3000.txt 2> similarity_1000.txt
perl -e 'while (<>) { @_ = split /\t/; print "$_[0]\t$_[6]\t$_[7]\t$_[1]\t$_[8]\t$_[9]\n"; }' similarity_1000.txt > similarity_1000.links.txt
perl -e 'while (<>) { @_ = split /\t/; print "$_[0]\t$_[6]\t$_[7]\t$_[1]\t$_[8]\t$_[9]\n"; }' similarity_3000.txt > similarity_3000.links.txt
rm genome* similarity_1000.txt similarity_3000.txt similarity.txt blast.out

# 准备circos配置文件
cp ~/00.incipient_data/data_for_circos/*.conf ./
[train@MiWiFi-R3P-srv circos]$ ls
circos.conf  circos.png  circos.svg  gc.histogram.txt  genome.fasta  genome.gff3  ideogram.conf  karyotype.txt  links.conf  ticks.conf

# 运行circos程序画图
circos -noparanoid -conf circos.conf 
# real  0m17.757s
# user  0m17.693s
# sys   0m0.064s

eog circos.png


## GBrowse2
mkdir -p /home/train/12.genome_visualization/GBrowse2
cd /home/train/12.genome_visualization/GBrowse2

# 准备需要展示的基因组文件
#cd ~/00.incipient_data/data_for_gbrowse/
#cp ~/00.incipient_data/data_for_genome_assembling/assemblies_of_Malassezia_sympodialis/Malassezia_sympodialis.genome_V01.fasta genome.fasta
#cp ~/10.gene_prediction/augustus/augustus.gff3 ~/10.gene_prediction/genemark_es_et/genemarkES.gff3 ~/10.gene_prediction/genemark_es_et/genemarkET.gff3 ~/10.gene_prediction/snap/snap.gff3 ~/10.gene_prediction/braker/braker.gff3 ~/00.incipient_data/data_for_gene_prediction_and_RNA-seq/Malassezia_sympodialis_V01.geneModels.gff3 ~/00.incipient_data/data_for_gene_prediction_and_RNA-seq/Malassezia_sympodialis_V01.bestGeneModels.gff3 ~/10.gene_prediction/pasa/pasa.gff3 ~/10.gene_prediction/homolog/genewise.gff3 ./
#cp ~/10.gene_prediction/evm/transcript_alignments.gff3 pasa_align.gff3
#cp ~/10.gene_prediction/evm/protein_alignments.gff3 homolog_align.gff3
#cp ~/05.genome_feature_analysis/Rfam/rRNA_out.gff3 rRNA.gff3
#cp ~/05.genome_feature_analysis/tRNAscan-SE/tRNA.gff3 ./
#cp ~/05.genome_feature_analysis/Rfam/snRNA_out.gff3 snRNA.gff3
#cp ~/05.genome_feature_analysis/SSR_detecting_and_primer_design/misa_primer3.gff3 SSR.gff3
#cp ~/05.genome_feature_analysis/repeat_analysis/genome.repeat.gff3 ./
#gbrowse2_create_genome_scaffold_gff3.pl genome.fasta > genome.gff3
#cd /home/train/12.genome_visualization/GBrowse2

cp ~/00.incipient_data/data_for_gbrowse/* ./
gbrowse2_create_genome_scaffold_gff3.pl genome.fasta > genome.gff3

# 创建mysql数据库malassezia_sympodialis_gbrowse2
echo "CREATE DATABASE IF NOT EXISTS malassezia_sympodialis_gbrowse2" | mysql -utrain -p123456
# 或者,在mysql命令行界面输入
# mysql -utrain -p123456
# mysql> CREATE DATABASE malassezia_sympodialis_gbrowse2;
# 若 Mysql 数据库无 train 用户,则进行下面的命令:
# mysql -uroot -p
# mysql> GRANT ALL ON *.* TO ‘train’@’%’ IDENTIFIED BY ‘123456’;
# mysql> FLASH PRIVILEGES;
# mysql> CREATE DATABASE IF NOT EXISTS malassezia_sympodialis_gbrowse2;

# 导入数据,下面命令会先清空数据库,再导入数据
bp_seqfeature_load -c -a DBI::mysql -d malassezia_sympodialis_gbrowse2 -u train -p 123456 genome.fasta *.gff3
# real  18m10.981s
# user  5m28.514s
# sys   1m9.785s
# 若需要追加 GFF3 数据到已存在的 Mysql 数据库中,则不需要加 -c 参素
# bp_seqfeature_load.pl -a DBI::mysql -d Malassezia_sympodialis_gbrowse2 -u train -p 123456 file.gff3

# 展示bam文件
ln -s ~/09.RNA-seq_analysis_by_cufflinks/A.bam .
ln -s ~/09.RNA-seq_analysis_by_cufflinks/D.bam .
samtools index A.bam 
samtools index D.bam 

# GBrowse2 数据源配置文件
# 修改 /etc/gbrowse2/GBrowse.conf 文件
cat /etc/gbrowse2/GBrowse.conf > GBrowse.conf
echo "[MS]
description = Malassezia sympodialis
path        = malassezia_sympodialis.conf" >> GBrowse.conf
sudo mv GBrowse.conf /etc/gbrowse2/GBrowse.conf
# 本物种的配置文件
sudo cp ~/00.incipient_data/data_for_gbrowse/malassezia_sympodialis.conf /etc/gbrowse2/

# Browse 设置用户访问
gbrowse_create_account.pl train
sudo perl -p -i -e 's/^#restrict/restrict/' /etc/gbrowse2/malassezia_sympodialis.conf
# 现在查看 malassezia_sympodialis 的基因组浏览器则需要登录了。
# 修改还原
sudo perl -p -i -e 's/^restrict/#restrict/' /etc/gbrowse2/malassezia_sympodialis.conf


## WebApollo
mkdir -p /home/train/12.genome_visualization/WebApollo
cd /home/train/12.genome_visualization/WebApollo

# 准备数据文件
ln -s ../GBrowse2/genome.fasta ./
ln -s ../GBrowse2/*.gff3 ./
rm pasa_align.gff3 homolog_align.gff3
ln -s ../GBrowse2/*.bam* ./
bam2wig A.bam > A.wig
bam2wig D.bam > D.wig
cal_seq_length.pl genome.fasta > chrom.sizes
wigToBigWig.pl A.wig chrom.sizes A.bw
wigToBigWig.pl D.wig chrom.sizes D.bw

# 设置 PostGres 和 Web Apollo 的用户,数据库和权限
# 创建 posGresql 的用户 train, 密码为 123456
sudo su - postgres
createuser -h localhost -p 5432 -d -R -S train
# Enter password for new role:123456
# Enter it again: 123456
# 创建 PostGres 数据库
createdb -h localhost -p 5432 -U train apollo_malassezia_sympodialis
logout
# 建立数据库的表 (以下使用 train 用户执行)
psql -U train apollo_malassezia_sympodialis < /opt/biosoft/WebApollo-2014-04-03/tools/user/user_database_postgresql.sql
# 创建 Web Apollo 用户 apollo,密码 123456
/opt/biosoft/WebApollo-2014-04-03/tools/user/add_user.pl -D apollo_malassezia_sympodialis -U train -P 123456 -u apollo -p 123456
# 提取基因组序列名,将序列名导入到 postgres 数据库,并使 apollo 用户具有访问这些序列的权限
/opt/biosoft/WebApollo-2014-04-03/tools/user/extract_seqids_from_fasta.pl -p Annotations- -i genome.fasta -o seqids.txt
/opt/biosoft/WebApollo-2014-04-03/tools/user/add_tracks.pl -D apollo_malassezia_sympodialis -U train -P 123456 -t seqids.txt
/opt/biosoft/WebApollo-2014-04-03/tools/user/set_track_permissions.pl -D apollo_malassezia_sympodialis -U train -P 123456 -u apollo -t seqids.txt -a

# 部署 Web Apollo
mkdir annotations data temp
mkdir /opt/biosoft/WebApollo-2014-04-03/apache-tomcat-7.0.57/webapps/webApollo_Malassezia_sympodialis
cd /opt/biosoft/WebApollo-2014-04-03/apache-tomcat-7.0.57/webapps/webApollo_Malassezia_sympodialis
jar -xvf /opt/biosoft/WebApollo-2014-04-03/war/WebApollo.war 
cd jbrowse
chmod 777 bin/*
export PATH=/opt/biosoft/WebApollo-2014-04-03/apache-tomcat-7.0.57/webapps/webApollo_Malassezia_sympodialis/jbrowse/bin/:$PATH
ln -s /home/train/12.genome_visualization/WebApollo/data ./
cd ../config
# modify the files config.xml and blat_config.xml
cp ~/00.incipient_data/data_for_webApollo/config.xml ./

# 将需要展示的数据格式化,构建Track,放入 data 文件夹中
cd ~/12.genome_visualization/WebApollo/
prepare-refseqs.pl --fasta genome.fasta 
add-webapollo-plugin.pl -i data/trackList.json
for i in `ls *.gff3`
do
    x=${i/.gff3/}
    flatfile-to-json.pl -gff $i --arrowheadClass trellis-arrowhead --getSubfeatures --subfeatureClasses '{"wholeCDS": null, "CDS":"brightgreen-80pct", "UTR": "darkgreen-60pct", "exon":"container-100pct"}' --className container-16px --type mRNA --trackLabel $x
done

perl -ne 'if (/\tmult=(\d+)/ && $1 >= 3) {$mult=$1; s/(.*)\t.*/$1/; print "$1\tName=$mult;\n";}' ~/10.gene_prediction/augustus/making_hints/hints.gff > hints.intron.gff3
flatfile-to-json.pl -gff hints.intron.gff3 --getSubfeatures --subfeatureClasses '{"match_part": "darkblue-80pct"}' --className container-10px --trackLabel intron
# JBrowse Track 建立完毕,再创建索引
generate-names.pl

mkdir data/bigwig
cp *bw data/bigwig/
add-bw-track.pl --bw_url bigwig/A.bw --label A_bw --key "simulated BigWig A"
add-bw-track.pl --bw_url bigwig/D.bw --label D_bw --key "simulated BigWig D"

/opt/biosoft/WebApollo-2014-04-03/apache-tomcat-7.0.57/bin/startup.sh

# 访问 localhost:8080/webApollo_Malassezia_sympodialis/jbrowse/

相关文章

网友评论

      本文标题:2023-07-23基因组可视化

      本文链接:https://www.haomeiwen.com/subject/zkbxpdtx.html