美文网首页
plantare结果统计

plantare结果统计

作者: 花生学生信 | 来源:发表于2023-10-08 16:28 被阅读0次

这里统计plantcare网站对群体启动子分析的结果,

查看结果:

所有的下载的结果

因为解压后的文件不含自己样本的名称,所以这里要得到样本编号和id的对应关系:

ls *.gz|sed s'/PlantCARE_//g'|sed s'/_plantCARE.tar.gz//g' >id
对应关系

再和群体对应:


三者关系

批量解压

ls *.gz >plc
cat plc|while read id
do
tar zxvf $id
done
解压后

写个重命名脚本

怕误删所以用cp,在excel表格中用“&”符号就行

提取结果

1、plant growth and development
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'A-box'  >plant_growth/A-box
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'AE-box'  >plant_growth/AE-box
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'Box 4' >plant_growth/Box-4
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'CAT' >plant_growth/CAT-box
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'circadian' >plant_growth/Circadian
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'GA' >plant_growth/GA-motif
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'GATA' >plant_growth/GATA-motif
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'G' >plant_growth/G-box
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i  'GCN4' >plant_growth/GCN4-motif
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i  'GT1' >plant_growth/GT1-motif
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i  'I-box' >plant_growth/I-box
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i  'RY' >plant_growth/RY-element
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i  'MRE' >plant_growth/MRE

排序

cut -f 1 A-box |sort|uniq -c | awk '{print$2"\t"$1}' > A-box.xls
cut -f 1 AE-box |sort|uniq -c | awk '{print$2"\t"$1}' >AE-box.xls
cut -f 1 Box-4 |sort|uniq -c | awk '{print$2"\t"$1}' >Box-4.xls
cut -f 1 CAT-box |sort|uniq -c | awk '{print$2"\t"$1}' >CAT-box.xls
cut -f 1 Circadian |sort|uniq -c | awk '{print$2"\t"$1}' >Circadian.xls
cut -f 1 GA-motif |sort|uniq -c | awk '{print$2"\t"$1}' >GA-motif.xls
cut -f 1 GATA-motif |sort|uniq -c | awk '{print$2"\t"$1}' >GATA-motif.xls
cut -f 1 G-box |sort|uniq -c | awk '{print$2"\t"$1}' >G-box.xls 
cut -f 1 GCN4-motif |sort|uniq -c | awk '{print$2"\t"$1}' >GCN4-motif.xls
cut -f 1 GT1-motif |sort|uniq -c | awk '{print$2"\t"$1}' >GT1-motif.xls 
cut -f 1 I-box |sort|uniq -c | awk '{print$2"\t"$1}' >I-box.xls
cut -f 1 MRE |sort|uniq -c | awk '{print$2"\t"$1}' >MRE.xls
cut -f 1 RY-element |sort|uniq -c | awk '{print$2"\t"$1}' >RY-element.xls

2、Abiotic and biotic stresses

grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'ARE' >biotic_stresses/ARE 
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep   'DRE core' >biotic_stresses/DRE-core
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'LTR' >biotic_stresses/LTR
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'MBS' >biotic_stresses/MBS
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'MYB' |grep -v site>biotic_stresses/MYB
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'MYC' >biotic_stresses/MYC
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'STRE' >biotic_stresses/STRE
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'TC' >biotic_stresses/TC-rich-repeat
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'W' >biotic_stresses/W-box
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'WUN' >biotic_stresses/WUN-motif

排序

cut -f 1 ARE |sort|uniq -c | awk '{print$2"\t"$1}' >ARE.xls
cut -f 1 DRE-core |sort|uniq -c | awk '{print$2"\t"$1}' >DRE-core.xls
cut -f 1 LTR |sort|uniq -c | awk '{print$2"\t"$1}' >LTR.xls
cut -f 1 MBS |sort|uniq -c | awk '{print$2"\t"$1}' >MBS.xls
cut -f 1 MYB |sort|uniq -c | awk '{print$2"\t"$1}' >MYB.xls
cut -f 1 MYC |sort|uniq -c | awk '{print$2"\t"$1}' >MYC.xls
cut -f 1 STRE |sort|uniq -c | awk '{print$2"\t"$1}' >STRE.xls
cut -f 1 TC-rich-repeat |sort|uniq -c | awk '{print$2"\t"$1}' >TC-rich-repeat.xls
cut -f 1 W-box |sort|uniq -c | awk '{print$2"\t"$1}' >W-box.xls
cut -f 1 WUN-motif |sort|uniq -c | awk {'print$2"\t"$1}' >WUN-motif.xls

3、Phytohormone responsive

grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'ABRE' >Phytohormone_responsive/ABRE
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'as-1' >Phytohormone_responsive/as-1
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'CGTCA' >Phytohormone_responsive/CGTCA-motif
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'CARE' >Phytohormone_responsive/CARE
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'ERE'  >Phytohormone_responsive/ERE
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'P' >Phytohormone_responsive/P-box
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'TATC' >Phytohormone_responsive/TATC-box
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'TCA-element' >Phytohormone_responsive/TCA-element
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'TGACG' >Phytohormone_responsive/TGACG-motif
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'TGA-element' >Phytohormone_responsive/TGA-element-motif 

排序

cut -f 1 ABRE |sort |uniq -c |awk '{print$2"\t"$1}'>ABRE.xls
cut -f 1 as-1 |sort |uniq -c |awk '{print$2"\t"$1}'>as-1.xls
cut -f 1 CARE |sort |uniq -c |awk '{print$2"\t"$1}'>CARE.xls
cut -f 1 CGTCA-motif |sort |uniq -c |awk '{print$2"\t"$1}'>CGTCA-motif.xls
cut -f 1 ERE |sort |uniq -c |awk '{print$2"\t"$1}'>ERE.xls
cut -f 1 P-box |sort |uniq -c |awk '{print$2"\t"$1}'>P-box.xls
cut -f 1 TATC-box |sort |uniq -c |awk '{print$2"\t"$1}'>TATC-box.xls
cut -f 1 TCA-element |sort |uniq -c |awk '{print$2"\t"$1}'>TCA-element.xls
cut -f 1 TGACG-motif |sort |uniq -c |awk '{print$2"\t"$1}'>TGACG-motif.xls
cut -f 1 TGA-element-motif |sort |uniq -c |awk '{print$2"\t"$1}'>TGA-element-motif.xls

相关文章

网友评论

      本文标题:plantare结果统计

      本文链接:https://www.haomeiwen.com/subject/fwjvbdtx.html