美文网首页
emmax跑GWAS

emmax跑GWAS

作者: 花生学生信 | 来源:发表于2023-07-26 16:04 被阅读0次
    ##提取样本名
    cut -f 1 GJnew.SE.SNP..txt > Geng
    sed -i '1d' Geng  #  删除文件第一列的表头
    
    ###提取vcf子集 
    bcftools view -S Geng  ../pass0.05.recode.vcf > Geng.vcf
    
    ###vcf转ped/map
    plink --vcf Geng.vcf --allow-extra-chr --recode --out sevcf
    
    
    ###转bed
    plink --file sevcf --make-bed --out sevcf --allow-extra-chr
    
    ###bed转tped/tfam
    plink --bfile sevcf --recode12 --output-missing-genotype 0 --transpose --out sevcf --allow-extra-chr
    
    ###创建基于标记的亲缘关系矩阵,使用emmax-kin创建亲缘关系矩阵(首选BN),确保.tped、tfam文件存在相同前缀
    ~/tools/emmax-kin-intel64 sevcf -v -d 10 -o emmax_in_BN.kinf
    
    ###admixture
    ##for K in 1 2 3 4 5 6 7 8 9 10;do admixture --cv sevcf.bed $K | tee log${K}.out;done
    
    ##~/tools/emmax-intel64 -t sevcf -o 1.k -p tra -k emmax_in_BN.kinf 
    
    ##~/tools/emmax-intel64 -t sevcf -o tl.qk -p tra -k emmax_in_BN.kinf -c emmax.cov.txt 
    

    表型提取

    tfam <- read.table("sevcf.tfam", header = F, stringsAsFactors = F)
    tr <- read.table("GJnew.SE.SNP..txt", header = T, check.names = F, stringsAsFactors = F)
    head(tr)
    #tr <- tr[match(tfam$V1, tr$'<Trait>'),]
    tr <- tr[match(tfam$V1, tr$CX),]
    
    #tr[tr == -999] <- NA
    tre <- cbind(tr[,1], tr)
    for(i in 3:ncol(tre)){
      file_name <- paste0("trait/emmax.GJ.", names(tre)[i], ".txt")
     write.table(tre[, c(1, 2, i)], file = file_name, col.names = F, row.names = F, sep = "\t", quote = F)
    
    }
    
    ###将表型读取 ls *.txt >../id
    ##运行
    cat id|while read id
    do
    
    ~/tools/emmax-intel64 -t sevcf -o Gjout/$id -p trait/$id -k emmax_in_BN.kinf 
    
    done
    

    整理成map格式

    #####sevcf.map文件和emmax.ps文件整合
    ###map文件第一列为chr,第二列为ID,第四列为位置
    ###ps文件第四列为p
    ###ID   chr position    p
    
    #!/usr/bin/perl -w
    use strict;
    
    =pod
        Description: combine table
        Author: Ft
        Created:2023-7-26 
        Version: v1.1
    =cut
    
    #use Getopt::Long;
    #use Bio::SeqIO;
    
    my $svmap=$ARGV[0];
    my $ps=$ARGV[1];
    
    my ($ID,$chr,$position,$p,$out);
    
    my %ps=();
    open ps,"<$ps" or die $!;
    while(<ps>){
        chomp;
        my @map=split/\t/;  
        $ps{$map[0]}=$map[3];
    
    }
    close ps;
    
    open OUTFILE ,">$ARGV[2]"; 
    print OUTFILE "ID\tchr\tposition\tp\n";
    
    open svmap, "<$svmap" or die $!;
    while(<svmap>){
        chomp;
        my @map=split/\t/;
        $chr=$map[0],$ID=$map[1],$position=$map[3];
        if(exists $ps{$ID}){
        print OUTFILE "$ID\t$chr\t$position\t$ps{$ID}\n";
    }       
    }
    close svmap;
    close OUT;
    

    画图

    ###
    args<-commandArgs(TRUE)
    library("CMplot")
    pmap <- read.table(args[1], header = T)
    #head(pmap)
    # 阈值计算
    threshold <- 1/nrow(pmap[!is.na(pmap$position),])
    # 画图
    #CMplot(pmap, threshold = threshold, amplify = F, memo = "", file = "tiff", plot.type=c("m","q"))
    CMplot(pmap, threshold = threshold, amplify = F,file = "jpg",file.name=args[2],file.output=T,plot.type=c("m","q"))
    
    结果图
    计算有效snp
    java -jar ~/tools/gec/gec.jar --effect-number --plink-binary sevcf --genome --out gec
    #结果在.sum文件中。有效标记 和建议阈值。建议阈值用 -log10()算LOD值。
    

    相关文章

      网友评论

          本文标题:emmax跑GWAS

          本文链接:https://www.haomeiwen.com/subject/mwukpdtx.html