##提取样本名
cut -f 1 GJnew.SE.SNP..txt > Geng
sed -i '1d' Geng # 删除文件第一列的表头
###提取vcf子集
bcftools view -S Geng ../pass0.05.recode.vcf > Geng.vcf
###vcf转ped/map
plink --vcf Geng.vcf --allow-extra-chr --recode --out sevcf
###转bed
plink --file sevcf --make-bed --out sevcf --allow-extra-chr
###bed转tped/tfam
plink --bfile sevcf --recode12 --output-missing-genotype 0 --transpose --out sevcf --allow-extra-chr
###创建基于标记的亲缘关系矩阵,使用emmax-kin创建亲缘关系矩阵(首选BN),确保.tped、tfam文件存在相同前缀
~/tools/emmax-kin-intel64 sevcf -v -d 10 -o emmax_in_BN.kinf
###admixture
##for K in 1 2 3 4 5 6 7 8 9 10;do admixture --cv sevcf.bed $K | tee log${K}.out;done
##~/tools/emmax-intel64 -t sevcf -o 1.k -p tra -k emmax_in_BN.kinf
##~/tools/emmax-intel64 -t sevcf -o tl.qk -p tra -k emmax_in_BN.kinf -c emmax.cov.txt
表型提取
tfam <- read.table("sevcf.tfam", header = F, stringsAsFactors = F)
tr <- read.table("GJnew.SE.SNP..txt", header = T, check.names = F, stringsAsFactors = F)
head(tr)
#tr <- tr[match(tfam$V1, tr$'<Trait>'),]
tr <- tr[match(tfam$V1, tr$CX),]
#tr[tr == -999] <- NA
tre <- cbind(tr[,1], tr)
for(i in 3:ncol(tre)){
file_name <- paste0("trait/emmax.GJ.", names(tre)[i], ".txt")
write.table(tre[, c(1, 2, i)], file = file_name, col.names = F, row.names = F, sep = "\t", quote = F)
}
###将表型读取 ls *.txt >../id
##运行
cat id|while read id
do
~/tools/emmax-intel64 -t sevcf -o Gjout/$id -p trait/$id -k emmax_in_BN.kinf
done
整理成map格式
#####sevcf.map文件和emmax.ps文件整合
###map文件第一列为chr,第二列为ID,第四列为位置
###ps文件第四列为p
###ID chr position p
#!/usr/bin/perl -w
use strict;
=pod
Description: combine table
Author: Ft
Created:2023-7-26
Version: v1.1
=cut
#use Getopt::Long;
#use Bio::SeqIO;
my $svmap=$ARGV[0];
my $ps=$ARGV[1];
my ($ID,$chr,$position,$p,$out);
my %ps=();
open ps,"<$ps" or die $!;
while(<ps>){
chomp;
my @map=split/\t/;
$ps{$map[0]}=$map[3];
}
close ps;
open OUTFILE ,">$ARGV[2]";
print OUTFILE "ID\tchr\tposition\tp\n";
open svmap, "<$svmap" or die $!;
while(<svmap>){
chomp;
my @map=split/\t/;
$chr=$map[0],$ID=$map[1],$position=$map[3];
if(exists $ps{$ID}){
print OUTFILE "$ID\t$chr\t$position\t$ps{$ID}\n";
}
}
close svmap;
close OUT;
画图
###
args<-commandArgs(TRUE)
library("CMplot")
pmap <- read.table(args[1], header = T)
#head(pmap)
# 阈值计算
threshold <- 1/nrow(pmap[!is.na(pmap$position),])
# 画图
#CMplot(pmap, threshold = threshold, amplify = F, memo = "", file = "tiff", plot.type=c("m","q"))
CMplot(pmap, threshold = threshold, amplify = F,file = "jpg",file.name=args[2],file.output=T,plot.type=c("m","q"))
结果图
计算有效snp
java -jar ~/tools/gec/gec.jar --effect-number --plink-binary sevcf --genome --out gec
#结果在.sum文件中。有效标记 和建议阈值。建议阈值用 -log10()算LOD值。
网友评论