输入文件risk.png
perl代码
use strict;
my %hash=();
my $normalFlag=0;
open(RF,"risk.txt") or die $!;
while(my $line=<RF>){
chomp($line);
my @arr=split(/\t/,$line);
$hash{$arr[0]}=$arr[$#arr-1];
}
close(RF);
my @indexs=();
my @riskArr=();
open(RF,"symbol.txt") or die $!;
open(WF,">sampleSymbol.txt") or die $!;
my $normalCount=0;
my $tumorCount=0;
while(my $line=<RF>){
chomp($line);
my @arr=split(/\t/,$line);
if($.==1){
print WF "ID";
for(my $i=1;$i<=$#arr;$i++){
my @samples=split(/\-/,$arr[$i]);
if($samples[3]=~/^0/){
my $sampleName="$samples[0]-$samples[1]-$samples[2]";
if(exists $hash{$sampleName}){
push(@indexs,$i);
push(@riskArr,$hash{$sampleName});
print WF "\t$arr[$i]";
$tumorCount++;
delete($hash{$sampleName});
}
}
}
print WF "\n";
}
else{
print WF $arr[0];
foreach my $col(@indexs){
print WF "\t$arr[$col]";
}
print WF "\n";
}
}
print WF "Risk\t" . join("\t",@riskArr) . "\n";
close(WF);
close(RF);
my $colNum=0;
my $rowNum=0;
%hash=();
my $geneName="Risk";
@indexs=();
my @geneArr=();
open(RF,"sampleSymbol.txt") or die $!;
while(my $line=<RF>){
chomp($line);
my @arr=split(/\t/,$line);
if($.==1){
for(my $i=1;$i<=$#arr;$i++){
my @samples=split(/\-/,$arr[$i]);
if($samples[3]=~/^0/){
push(@indexs,$i);
my $sampleName=$arr[$i];
$hash{$sampleName}=1;
$colNum++;
}
}
}
else{
$rowNum++;
if($arr[0] eq $geneName){
foreach my $col(@indexs){
push(@geneArr,$arr[$col]);
}
}
}
}
close(RF);
my $firstGeneVal=$geneArr[0];
my $geneMed=median(@geneArr);
open(RF,"sampleSymbol.txt") or die $!;
open(WF,">$geneName.gct") or die $!;
print WF "#1.2\n";
print WF "$rowNum\t$colNum\n";
open(CLS,">$geneName.cls") or die $!;
print CLS "$colNum\t2\t1\n";
my @samp1e=(localtime(time));
if($firstGeneVal>$geneMed){
print CLS "#\th\tl\n";
}
else{
print CLS "#\tl\th\n";
}
@indexs=();
my @typeArr=();
while(my $line=<RF>){
chomp($line);
my @arr=split(/\t/,$line);
if($.==1){
print WF "NAME\tDESCRIPTION";
for(my $i=1;$i<=$#arr;$i++){
my @samples=split(/\-/,$arr[$i]);
if($samples[3]=~/^0/){
my $sampleName=$arr[$i];
if(exists $hash{$sampleName}){
push(@indexs,$i);
print WF "\t$arr[$i]";
#delete($hash{$sampleName});
}
}
}
print WF "\n";
}
else{
my $symbolName=$arr[0];
$symbolName=~s/(.+?)\|.+/$1/g;
print WF "$symbolName\tna";
foreach my $col(@indexs){
print WF "\t$arr[$col]";
}
print WF "\n";
if($arr[0] eq $geneName){
foreach my $col(@indexs){
if($arr[$col]>$geneMed){
push(@typeArr,"h");
}
else{
push(@typeArr,"l");
}
}
}
}
}
print CLS join("\t",@typeArr) . "\n";
close(WF);
close(CLS);
close(RF);
unlink<"sampleSymbol.txt">;
sub median{
my (@data) = sort {$a <=> $b} @_; #将原数组重新排序
if(scalar (@data) % 2){
return ($data [@data / 2]); #@data/2的返回值向上取整
}else {
my ($upper ,$lower);
$upper = $data[@data / 2];
$lower = $data[@data / 2 -1];
return (($lower+$upper) / 2);
}
}
输出文件为:
Risk.gct.png
Risk.cls.png
下一步输入文件准备:
输入文件准备.png
-在GSEA官网Molecular Signatures Database 上下载2个gmt文件
-在GSEA官网下载GSEA_Win_4.0.1-installer.exe
-点击GSEA_Win_4.0.1-installer.exe下载软件
下载GSEA软件.png
加载数据
软件界面.png
选择分析模式.png
决定GSEA富集图在左边还是右边.png
如果探针id已经转成gene symbol,就选择FALSE
是否需要gene symbol注释.png
最终界面,点击run运行.png
软件分析完后找到图中文件.png
网友评论