这里统计plantcare网站对群体启动子分析的结果,
查看结果:

因为解压后的文件不含自己样本的名称,所以这里要得到样本编号和id的对应关系:
ls *.gz|sed s'/PlantCARE_//g'|sed s'/_plantCARE.tar.gz//g' >id

再和群体对应:

批量解压
ls *.gz >plc
cat plc|while read id
do
tar zxvf $id
done

写个重命名脚本

提取结果
1、plant growth and development
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'A-box' >plant_growth/A-box
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'AE-box' >plant_growth/AE-box
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'Box 4' >plant_growth/Box-4
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'CAT' >plant_growth/CAT-box
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'circadian' >plant_growth/Circadian
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'GA' >plant_growth/GA-motif
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'GATA' >plant_growth/GATA-motif
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i -w 'G' >plant_growth/G-box
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i 'GCN4' >plant_growth/GCN4-motif
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i 'GT1' >plant_growth/GT1-motif
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i 'I-box' >plant_growth/I-box
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i 'RY' >plant_growth/RY-element
grep -v Unnamed *_*|grep -v short_function|cut -f 1,2|grep -i 'MRE' >plant_growth/MRE
排序
cut -f 1 A-box |sort|uniq -c | awk '{print$2"\t"$1}' > A-box.xls
cut -f 1 AE-box |sort|uniq -c | awk '{print$2"\t"$1}' >AE-box.xls
cut -f 1 Box-4 |sort|uniq -c | awk '{print$2"\t"$1}' >Box-4.xls
cut -f 1 CAT-box |sort|uniq -c | awk '{print$2"\t"$1}' >CAT-box.xls
cut -f 1 Circadian |sort|uniq -c | awk '{print$2"\t"$1}' >Circadian.xls
cut -f 1 GA-motif |sort|uniq -c | awk '{print$2"\t"$1}' >GA-motif.xls
cut -f 1 GATA-motif |sort|uniq -c | awk '{print$2"\t"$1}' >GATA-motif.xls
cut -f 1 G-box |sort|uniq -c | awk '{print$2"\t"$1}' >G-box.xls
cut -f 1 GCN4-motif |sort|uniq -c | awk '{print$2"\t"$1}' >GCN4-motif.xls
cut -f 1 GT1-motif |sort|uniq -c | awk '{print$2"\t"$1}' >GT1-motif.xls
cut -f 1 I-box |sort|uniq -c | awk '{print$2"\t"$1}' >I-box.xls
cut -f 1 MRE |sort|uniq -c | awk '{print$2"\t"$1}' >MRE.xls
cut -f 1 RY-element |sort|uniq -c | awk '{print$2"\t"$1}' >RY-element.xls
2、Abiotic and biotic stresses
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'ARE' >biotic_stresses/ARE
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep 'DRE core' >biotic_stresses/DRE-core
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'LTR' >biotic_stresses/LTR
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'MBS' >biotic_stresses/MBS
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'MYB' |grep -v site>biotic_stresses/MYB
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'MYC' >biotic_stresses/MYC
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'STRE' >biotic_stresses/STRE
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'TC' >biotic_stresses/TC-rich-repeat
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'W' >biotic_stresses/W-box
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'WUN' >biotic_stresses/WUN-motif
排序
cut -f 1 ARE |sort|uniq -c | awk '{print$2"\t"$1}' >ARE.xls
cut -f 1 DRE-core |sort|uniq -c | awk '{print$2"\t"$1}' >DRE-core.xls
cut -f 1 LTR |sort|uniq -c | awk '{print$2"\t"$1}' >LTR.xls
cut -f 1 MBS |sort|uniq -c | awk '{print$2"\t"$1}' >MBS.xls
cut -f 1 MYB |sort|uniq -c | awk '{print$2"\t"$1}' >MYB.xls
cut -f 1 MYC |sort|uniq -c | awk '{print$2"\t"$1}' >MYC.xls
cut -f 1 STRE |sort|uniq -c | awk '{print$2"\t"$1}' >STRE.xls
cut -f 1 TC-rich-repeat |sort|uniq -c | awk '{print$2"\t"$1}' >TC-rich-repeat.xls
cut -f 1 W-box |sort|uniq -c | awk '{print$2"\t"$1}' >W-box.xls
cut -f 1 WUN-motif |sort|uniq -c | awk {'print$2"\t"$1}' >WUN-motif.xls
3、Phytohormone responsive
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'ABRE' >Phytohormone_responsive/ABRE
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'as-1' >Phytohormone_responsive/as-1
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'CGTCA' >Phytohormone_responsive/CGTCA-motif
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'CARE' >Phytohormone_responsive/CARE
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'ERE' >Phytohormone_responsive/ERE
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'P' >Phytohormone_responsive/P-box
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'TATC' >Phytohormone_responsive/TATC-box
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'TCA-element' >Phytohormone_responsive/TCA-element
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'TGACG' >Phytohormone_responsive/TGACG-motif
grep -v Unnamed *|grep -v short_function|cut -f 1,2|grep -i -w 'TGA-element' >Phytohormone_responsive/TGA-element-motif
排序
cut -f 1 ABRE |sort |uniq -c |awk '{print$2"\t"$1}'>ABRE.xls
cut -f 1 as-1 |sort |uniq -c |awk '{print$2"\t"$1}'>as-1.xls
cut -f 1 CARE |sort |uniq -c |awk '{print$2"\t"$1}'>CARE.xls
cut -f 1 CGTCA-motif |sort |uniq -c |awk '{print$2"\t"$1}'>CGTCA-motif.xls
cut -f 1 ERE |sort |uniq -c |awk '{print$2"\t"$1}'>ERE.xls
cut -f 1 P-box |sort |uniq -c |awk '{print$2"\t"$1}'>P-box.xls
cut -f 1 TATC-box |sort |uniq -c |awk '{print$2"\t"$1}'>TATC-box.xls
cut -f 1 TCA-element |sort |uniq -c |awk '{print$2"\t"$1}'>TCA-element.xls
cut -f 1 TGACG-motif |sort |uniq -c |awk '{print$2"\t"$1}'>TGACG-motif.xls
cut -f 1 TGA-element-motif |sort |uniq -c |awk '{print$2"\t"$1}'>TGA-element-motif.xls
网友评论