美文网首页
鸡的选择信号分析代码

鸡的选择信号分析代码

作者: 宗肃書 | 来源:发表于2021-01-06 08:52 被阅读0次
    1.使用vcftools进行文件处理
    vcftools --gzvcf merge.DP7.miss0.5.maf0.05.SNP.vcf.gz --chr 1 --chr 2 --chr 3 --chr 4 --chr 5 --chr 6 --chr 7 --chr 8 --chr 9 --chr 10 --chr 11 --chr 12 --chr 13 --chr 14 --chr 15 --chr 16 --chr 17 --chr 18 --chr 19 --chr 20 --chr 21 --chr 22 --chr 23 --chr 24 --chr 25 --chr 26 --chr 27 --chr 28 --chr 29 --chr 30 --chr 31 --chr 32 --chr 33 --recode --stdout | bgzip -c >chicken_chr1-33.vcf.gz   #去掉文件中的非常染色体位置
    
    2.使用plink进行snp数据质控
    plink --vcf chicken_chr1-33.vcf.gz --chr-set 33 --geno 0.1 --maf 0.05 --recode vcf --out chicken_chr1-33_zk
    
    3.使用beagle对VCF文件进行基因型填充
    beagle -Xmx20g gt=chicken_chr1-33_zk.vcf out=chicken_chr1-33_zk_tc ne=1500
    
    4.计算ihs
    R脚本
    library(rehh)
    library(data.table) 
    n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
    for(i in n) {
    hh <- data2haplohh(hap_file = "chicken_chr1-33_zk_tc.vcf.gz",chr.name = i,polarize_vcf = FALSE,vcf_reader = "data.table")
    scan <- scan_hh(hh)
    if (i == 1) {
    wgscan <- scan
    } else {
    wgscan <- rbind(wgscan, scan)
     }
    }
    wgscan.ihs <- ihh2ihs(wgscan,standardize=FALSE)
    wgscan.ihs$ihs$UNIHS <- abs(wgscan.ihs$ihs$UNIHS)   
    cr.cgu <- calc_candidate_regions(wgscan.ihs,threshold = -1,window_size = 5E4,overlap = 25000,min_n_mrk = 10, min_n_extr_mrk = 1, min_perc_extr_mrk = 0,join_neighbors = FALSE,pval = TRUE)       #如果设置步长,也就是overlap,步长必须能被窗口整除,滑动窗口50000,步长25000
    write.table(cr.cgu,file="window.ihs.txt",sep="\t")
    top1<-subset(cr.cgu,cr.cgu$MEAN_MRK> quantile(cr.cgu$MEAN_MRK, 0.95))        #取前5%为显著区域
    write.table(top1,file = "candidate_slide_ihs_0.95.txt",sep="\t")
    top2<-subset(cr.cgu,cr.cgu$MEAN_MRK> quantile(cr.cgu$MEAN_MRK, 0.99))        #取前1%为显著区域
    write.table(top2,file = "candidate_slide_ihs_0.99.txt",sep="\t")
    nohup Rscript ihs.R &   #运行R脚本
    
    按窗口画ihs曼哈顿图
    library(rehh)
    ihs.cgu_eut <- read.table("window.ihs.txt",header = T,sep = ",")
    quantile(ihs.cgu_eut$MEAN_MRK, 0.99)
    names(ihs.cgu_eut)[3] <- "POSITION"       #改第三列的列名
    b<-ihs.cgu_eut[,c(2:3,6)]
    manhattanplot(b,ylab = "ihs",threshold = 2.762571)
    
    5.使用bcftools拆分不同亚群的文件
    准备6个亚群的文件   #在chicken/data目录下
    H1.txt  内容如下
    1_1
    3_3
    2_2
    9_9
    4_4
    5_5
    6_6
    8_8
    11_11
    12_12
    13_13
    14_14
    16_16
    10_10
    17_17
    18_18
    19_19
    20_20
    21_21
    22_22
    23_23
    15_15
    24_24
    25_25
    26_26
    27_27
    28_28
    29_29
    48_48
    30_30
    31_31
    32_32
    33_33
    35_35
    36_36
    37_37
    38_38
    39_39
    49_49
    50_50
    40_40
    41_41
    42_42
    43_43
    34_34
    44_44
    45_45
    46_46
    47_47
    7_7
    
    L1.txt  内容如下
    66_66
    53_53
    54_54
    52_52
    51_51
    59_59
    55_55
    56_56
    58_58
    60_60
    67_67
    65_65
    62_62
    63_63
    64_64
    61_61
    82_82
    68_68
    69_69
    70_70
    71_71
    72_72
    73_73
    74_74
    75_75
    80_80
    77_77
    78_78
    79_79
    76_76
    83_83
    57_57
    91_91
    81_81
    97_97
    84_84
    85_85
    86_86
    87_87
    88_88
    89_89
    90_90
    98_98
    92_92
    93_93
    94_94
    95_95
    96_96
    100_100
    99_99
    
    H2.txt
    1_1
    3_3
    2_2
    9_9
    4_4
    5_5
    6_6
    8_8
    11_11
    12_12
    13_13
    14_14
    16_16
    10_10
    17_17
    18_18
    19_19
    20_20
    21_21
    22_22
    23_23
    15_15
    24_24
    25_25
    26_26
    27_27
    31_31
    38_38
    39_39
    33_33
    35_35
    30_30
    32_32
    36_36
    48_48
    
    L2.txt
    67_67
    65_65
    68_68
    69_69
    70_70
    71_71
    72_72
    73_73
    74_74
    75_75
    80_80
    77_77
    78_78
    79_79
    76_76
    83_83
    57_57
    91_91
    81_81
    97_97
    84_84
    85_85
    86_86
    87_87
    88_88
    89_89
    90_90
    98_98
    92_92
    93_93
    94_94
    95_95
    96_96
    100_100
    99_99
    
    H3.txt
    1_1
    3_3
    2_2
    9_9
    4_4
    5_5
    6_6
    8_8
    11_11
    12_12
    13_13
    14_14
    16_16
    10_10
    17_17
    18_18
    19_19
    20_20
    21_21
    22_22
    
    L3.txt
    80_80
    79_79
    76_76
    81_81
    97_97
    84_84
    85_85
    86_86
    87_87
    88_88
    89_89
    90_90
    98_98
    92_92
    93_93
    94_94
    95_95
    96_96
    100_100
    99_99
    
    tabix -p vcf chicken_chr1-33_zk_tc.vcf.gz
    bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S H1.txt -o H1.vcf.gz
    bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S L1.txt -o L1.vcf.gz
    bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S H2.txt -o H2.vcf.gz
    bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S L2.txt -o L2.vcf.gz
    bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S H3.txt -o H3.vcf.gz
    bcftools view chicken_chr1-33_zk_tc.vcf.gz -Oz -S L3.txt -o L3.vcf.gz
    
    6.使用PopLDdecay软件进行连锁不平衡分析,并作LD衰减图
    git clone https://github.com/BGI-shenzhen/PopLDdecay.git           #在soft目录下使用
    cd PopLDdecay
    chmod 755 configure
    ./configure
    make
    mv PopLDdecay  bin/ 
    ./bin/PopLDdecay -InVCF /public/jychu/chicken/data/chicken_chr1-33_zk_tc.vcf.gz -OutStat /public/jychu/chicken/data/LD/chicken_chr1-33_zk_tc_LD  #在PopLDdecay目录下使用此命令,计算LD值
    ./bin/PopLDdecay -InVCF /public/jychu/chicken/data/H1.vcf.gz -OutStat /public/jychu/chicken/data/LD/H1
    ./bin/PopLDdecay -InVCF /public/jychu/chicken/data/L1.vcf.gz -OutStat /public/jychu/chicken/data/LD/L1
    ./bin/PopLDdecay -InVCF /public/jychu/chicken/data/H2.vcf.gz -OutStat /public/jychu/chicken/data/LD/H2
    ./bin/PopLDdecay -InVCF /public/jychu/chicken/data/L2.vcf.gz -OutStat /public/jychu/chicken/data/LD/L2
    ./bin/PopLDdecay -InVCF /public/jychu/chicken/data/H3.vcf.gz -OutStat /public/jychu/chicken/data/LD/H3
    ./bin/PopLDdecay -InVCF /public/jychu/chicken/data/L3.vcf.gz -OutStat /public/jychu/chicken/data/LD/L3
    
    perl  bin/Plot_OnePop.pl -inFile /public/jychu/chicken/data/LD/chicken_chr1-33_zk_tc_LD.stat.gz -output  /public/jychu/chicken/data/LD/chicken_chr1-33_zk_tc_LD #画图
    多群体画图                            
    
    vim H_multi.list           #在LD目录下
    /public/jychu/chicken/data/LD/H1.stat.gz high_1
    /public/jychu/chicken/data/LD/H2.stat.gz high_2
    /public/jychu/chicken/data/LD/H3.stat.gz high_3
    vim L_multi.list
    /public/jychu/chicken/data/LD/L1.stat.gz low_1
    /public/jychu/chicken/data/LD/L2.stat.gz low_2
    /public/jychu/chicken/data/LD/L3.stat.gz low_3
    
    在PopLDdecay目录下
    perl bin/Plot_MultiPop.pl -inList  /public/jychu/chicken/data/LD/H_multi.list  -output /public/jychu/chicken/data/LD/LD_high
    perl bin/Plot_MultiPop.pl -inList  /public/jychu/chicken/data/LD/L_multi.list  -output /public/jychu/chicken/data/LD/LD_low
    
    7.计算XPEHH值
    vim H1-L1_xpehh.R
    library(rehh)
    library(data.table)
    n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
    for(i in n) {
    hh1 <- data2haplohh(hap_file = "H1.vcf.gz",chr.name = i,polarize_vcf = FALSE,vcf_reader = "data.table")
    scan1 <- scan_hh(hh1)
    if (i == 1) {
    wgscan1 <- scan1
    } else {
    wgscan1 <- rbind(wgscan1, scan1)
     }
    }
    for(i in n) {
    hh2 <- data2haplohh(hap_file = "L1.vcf.gz",chr.name = i,polarize_vcf = FALSE,vcf_reader = "data.table")
    scan2 <- scan_hh(hh2)
    if (i == 1) {
    wgscan2 <- scan2
    } else {
    wgscan2 <- rbind(wgscan2, scan2)
     }
    }
    xpehh.cgu_eut <- ies2xpehh(scan_pop1 =  wgscan1,
                               scan_pop2 =  wgscan2,
                               popname1 = "H1",
                               popname2 = "L1",standardize=FALSE)
    cr.cgu <- calc_candidate_regions(xpehh.cgu_eut,threshold = -100,window_size = 5E4,overlap = 25000,min_n_mrk = 10, min_n_extr_mrk = 1, min_perc_extr_mrk = 0,join_neighbors = FALSE)    
    write.table(cr.cgu,file = "H1_L1_XPEHH_window.txt",sep ="\t") 
    
    source activate dna    #因为此R版本是在dna环境下安装的
    nohup Rscript H1-L1_xpehh.R &   #运行R脚本
    
    剩下两个对比亚群的和上面代码一样
    
    画XPEHH叠加图
    1.按窗口画
    setwd("D:/迅雷下载/选择信号分析/XPEHH")
    xp3<-read.table("H1_L1_XPEHH_window.txt",header = T)
    xp2<-read.table("H2_L2_XPEHH_window.txt",header = T)
    xp1<-read.table("H3_L3_XPEHH_window.txt",header = T)
    xp1$SNP<-seq(1,nrow(xp1),1)
    xp2$SNP<-seq(1,nrow(xp2),1)
    xp3$SNP<-seq(1,nrow(xp3),1)
    library(doBy)
    library(plyr)
    n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
    for (i in n){xp1$SNP[which(xp1$CHR==i)]<-xp1$SNP[which(xp1$CHR==i)]+400*(i-1)}
    chr <- summaryBy(SNP~CHR, xp1, FUN = median)
    for (i in n){xp2$SNP[which(xp2$CHR==i)]<-xp2$SNP[which(xp2$CHR==i)]+400*(i-1)}
    chr <- summaryBy(SNP~CHR, xp2, FUN = median)
    for (i in n){xp3$SNP[which(xp3$CHR==i)]<-xp3$SNP[which(xp3$CHR==i)]+400*(i-1)}
    chr <- summaryBy(SNP~CHR, xp3, FUN = median)
    xp<-cbind(xp1,xp2[,4],xp3[,4])
    xp<-na.omit(xp)
    top1<-subset(xp,xp$MEAN_MRK> quantile(xp$MEAN_MRK, 0.995)&xp$`xp2[, 4]`> quantile(xp$`xp2[, 4]`, 0.995)&xp$`xp3[, 4]`> quantile(xp$`xp3[, 4]`, 0.995)&xp$MEAN_MRK>xp$`xp2[, 4]`&xp$`xp2[, 4]`>xp$`xp3[, 4]`)
    top2<-subset(xp,xp$MEAN_MRK< quantile(xp$MEAN_MRK, 0.005)&xp$`xp2[, 4]`< quantile(xp$`xp2[, 4]`, 0.005)&xp$`xp3[, 4]`< quantile(xp$`xp3[, 4]`, 0.005)&xp$MEAN_MRK<xp$`xp2[, 4]`&xp$`xp2[, 4]`<xp$`xp3[, 4]`)
    csv1<-top1[,c(1:4,6:7)]
    write.csv(csv1,"top1.csv",row.names = FALSE)
    csv2<-top2[,c(1:4,6:7)]
    write.csv(csv2,"top2.csv",row.names = FALSE)
    library(ggplot2)
    p2<-ggplot()+
      geom_point(aes(SNP,xp1$MEAN_MRK), data = xp1,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
      geom_point(aes(SNP,xp2$MEAN_MRK), data = xp2,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
      geom_point(aes(SNP,xp3$MEAN_MRK), data = xp3,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
      geom_point(aes(SNP,top1$MEAN_MRK,color="High"), data = top1,pch=19, alpha=1, size=1.6) + 
      geom_point(aes(SNP,top1$`xp2[, 4]`,color="Mid"), data = top1,pch=19, alpha=1, size=1.4) +
      geom_point(aes(SNP,top1$`xp3[, 4]`,color="Low"), data = top1,pch=19, alpha=1, size=1.2) +
      geom_point(aes(SNP,top2$MEAN_MRK,color="High"), data = top2,pch=19, alpha=1, size=1.6) + 
      geom_point(aes(SNP,top2$`xp2[, 4]`,color="Mid"), data = top2,pch=19, alpha=1, size=1.4) +
      geom_point(aes(SNP,top2$`xp3[, 4]`,color="Low"), data = top2,pch=19, alpha=1, size=1.2) +
        theme_bw(base_size = 16)+
      theme(panel.grid = element_blank(), 
            axis.line = element_line(colour = 'black'),
            panel.background = element_rect(fill = "transparent"),
            axis.title.y = element_text(face = "bold"),
            legend.position = "right",
            legend.text = element_text(face = "bold"),
            legend.background = element_rect(colour = '#DCDCDC'),
            legend.title = element_text(face = "bold"))+
      labs(x = 'Chromosome', y = expression("Xpehh")) +
      scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.05,0)))+
      scale_y_continuous(expand = c(0.01,0),breaks =seq(-6,6,1.5),limits=c(-8,8))+
      scale_color_manual(name="Group",values = c("High"="red","Mid"="#8ec965","Low"="black"),limit=c("High","Mid","Low"))+
    geom_hline(aes(yintercept=0),col="black",lty=2,lwd=0.1)+theme(axis.text.x = element_text(angle = 270,size=7.8,face="bold", hjust = 0.5, vjust = 0.5))
    ggsave("xpehh.png")
    
    2.按位点画
    setwd("D:/迅雷下载/选择信号分析final/XPEHH")
    xp3<-read.table("H1_L1_XPEHH.txt",header = T)
    xp2<-read.table("H2_L2_XPEHH.txt",header = T)
    xp1<-read.table("H3_L3_XPEHH.txt",header = T)
    xp1$SNP<-seq(1,nrow(xp1),1)
    xp2$SNP<-seq(1,nrow(xp2),1)
    xp3$SNP<-seq(1,nrow(xp3),1)
    library(doBy)
    library(plyr)
    n<- c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
    for (i in n){xp1$SNP[which(xp1$CHR==i)]<-xp1$SNP[which(xp1$CHR==i)]+28000*(i-1)-233*(i-2)*(i-1)}
    chr <- summaryBy(SNP~CHR, xp1, FUN = median)
    for (i in n){xp2$SNP[which(xp2$CHR==i)]<-xp2$SNP[which(xp2$CHR==i)]+28000*(i-1)-233*(i-2)*(i-1)}
    chr <- summaryBy(SNP~CHR, xp2, FUN = median)
    for (i in n){xp3$SNP[which(xp3$CHR==i)]<-xp3$SNP[which(xp3$CHR==i)]+28000*(i-1)-233*(i-2)*(i-1)}
    chr <- summaryBy(SNP~CHR, xp3, FUN = median)
    xp<-cbind(xp1,xp2[,3],xp3[,3])
    xp<-na.omit(xp)
    top1<-subset(xp,xp$UNXPEHH_H3_L3> quantile(xp$UNXPEHH_H3_L3, 0.995)&xp$`xp2[, 3]`> quantile(xp$`xp2[, 3]`, 0.995)&xp$`xp3[, 3]`> quantile(xp$`xp3[, 3]`, 0.995)&xp$UNXPEHH_H3_L3>xp$`xp2[, 3]`&xp$`xp2[, 3]`>xp$`xp3[, 3]`)
    top2<-subset(xp,xp$UNXPEHH_H3_L3< quantile(xp$UNXPEHH_H3_L3, 0.005)&xp$`xp2[, 3]`< quantile(xp$`xp2[, 3]`, 0.005)&xp$`xp3[, 3]`< quantile(xp$`xp3[, 3]`, 0.005)&xp$UNXPEHH_H3_L3<xp$`xp2[, 3]`&xp$`xp2[, 3]`<xp$`xp3[, 3]`)
    csv1<-top1[,c(1:3,5:6)]
    write.csv(csv1,"top0.01.csv",row.names = FALSE)
    csv2<-top2[,c(1:3,5:6)]
    write.csv(csv2,"low0.01.csv",row.names = FALSE)
    library(ggplot2)
    p2<-ggplot()+
      geom_point(aes(SNP,xp1$UNXPEHH_H3_L3), data = xp1,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
      geom_point(aes(SNP,xp2$UNXPEHH_H2_L2), data = xp2,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
      geom_point(aes(SNP,xp3$UNXPEHH_H1_L1), data = xp3,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
      geom_point(aes(SNP,top1$UNXPEHH_H3_L3,color="High"), data = top1,pch=19, alpha=1, size=1.6) + 
      geom_point(aes(SNP,top1$`xp2[, 3]`,color="Mid"), data = top1,pch=19, alpha=1, size=1.4) +
      geom_point(aes(SNP,top1$`xp3[, 3]`,color="Low"), data = top1,pch=19, alpha=1, size=1.2) +
      geom_point(aes(SNP,top2$UNXPEHH_H3_L3,color="High"), data = top2,pch=19, alpha=1, size=1.6) + 
      geom_point(aes(SNP,top2$`xp2[, 3]`,color="Mid"), data = top2,pch=19, alpha=1, size=1.4) +
      geom_point(aes(SNP,top2$`xp3[, 3]`,color="Low"), data = top2,pch=19, alpha=1, size=1.2) +
        theme_bw(base_size = 16)+
      theme(panel.grid = element_blank(), 
            axis.line = element_line(colour = 'black'),
            panel.background = element_rect(fill = "transparent"),
            axis.title.y = element_text(face = "bold"),
            legend.position = "right",
            legend.text = element_text(face = "bold"),
            legend.background = element_rect(colour = '#DCDCDC'),
            legend.title = element_text(face = "bold"))+
      labs(x = 'Chromosome', y = expression("Xpehh")) +
      scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.05,0)))+
      scale_y_continuous(expand = c(0.01,0),breaks =seq(-5,5,1),limits=c(-6,6))+
      scale_color_manual(name="Group",values = c("High"="red","Mid"="#22ff96","Low"="black"),limit=c("High","Mid","Low"))+
    geom_hline(aes(yintercept=0),col="black",lty=2,lwd=0.1)+theme(axis.text.x = element_text(angle = 270,size=7.8,face="bold", hjust = 0.5, vjust = 0.5))
    ggsave("xpehh.tiff",width = 12,height = 3.5)
    
    8.使用vcftools计算FST值
    准备上面六个亚群的名称文件
    个体名称要和总群体vcf格式里面的名称一致
    vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H1.txt --weir-fst-pop L1.txt  --out fst_H1vsL1 --fst-window-size 50000  --fst-window-step 25000 
    vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H2.txt --weir-fst-pop L2.txt  --out fst_H2vsL2 --fst-window-size 50000  --fst-window-step 25000
    vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H3.txt --weir-fst-pop L3.txt  --out fst_H3vsL3 --fst-window-size 50000  --fst-window-step 25000
    cat fst_H1vsL1.windowed.weir.fst |awk '{if($5<0)print $1"\t"$2"\t"$3"\t0";else print $1"\t"$2"\t"$3"\t"$6}' > fst_H1-L1.txt
    cat fst_H2vsL2.windowed.weir.fst |awk '{if($5<0)print $1"\t"$2"\t"$3"\t0";else print $1"\t"$2"\t"$3"\t"$6}' > fst_H2-L2.txt
    cat fst_H3vsL3.windowed.weir.fst |awk '{if($5<0)print $1"\t"$2"\t"$3"\t0";else print $1"\t"$2"\t"$3"\t"$6}' > fst_H3-L3.txt
    
    vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H1.txt --weir-fst-pop L1.txt  --out fst_H1vsL1
    vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H2.txt --weir-fst-pop L2.txt  --out fst_H2vsL2
    vcftools --gzvcf chicken_chr1-33_zk_tc.vcf.gz --weir-fst-pop H3.txt --weir-fst-pop L3.txt  --out fst_H3vsL3
    cat fst_H1vsL1.weir.fst |awk '{if($3<0)print $1"\t"$2"\t0";else print $1"\t"$2"\t"$3}' > fst_H1-L1.txt
    cat fst_H2vsL2.weir.fst |awk '{if($3<0)print $1"\t"$2"\t0";else print $1"\t"$2"\t"$3}' > fst_H2-L2.txt
    cat fst_H3vsL3.weir.fst |awk '{if($3<0)print $1"\t"$2"\t0";else print $1"\t"$2"\t"$3}' > fst_H3-L3.txt
    
    
    R语言绘制Fst叠加图
    setwd("D:/迅雷下载/选择信号分析/FST")
    fst3<-read.table("fst_H1-L1.txt",header = T)
    fst2<-read.table("fst_H2-L2.txt",header = T)
    fst1<-read.table("fst_H3-L3.txt",header = T)
    fst1$SNP<-seq(1,nrow(fst1),1)
    fst2$SNP<-seq(1,nrow(fst2),1)
    fst3$SNP<-seq(1,nrow(fst3),1)
    library(doBy)
    library(plyr)
    n<- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
    for (i in n){fst1$SNP[which(fst1$CHROM==i)]<-fst1$SNP[which(fst1$CHROM==i)]+400*(i-1)}
    chr <- summaryBy(SNP~CHROM, fst1, FUN = median)
    for (i in n){fst2$SNP[which(fst2$CHROM==i)]<-fst2$SNP[which(fst2$CHROM==i)]+400*(i-1)}
    chr <- summaryBy(SNP~CHROM, fst2, FUN = median)
    for (i in n){fst3$SNP[which(fst3$CHROM==i)]<-fst3$SNP[which(fst3$CHROM==i)]+400*(i-1)}
    chr <- summaryBy(SNP~CHROM, fst3, FUN = median)
    fst<-cbind(fst1,fst2[,4],fst3[,4])
    fst<-na.omit(fst)
    top2<-subset(fst,fst$WEIGHTED_FST> quantile(fst$WEIGHTED_FST, 0.99)&fst$`fst2[, 4]`> quantile(fst$`fst2[, 4]`, 0.99)&fst$`fst3[, 4]`> quantile(fst$`fst3[, 4]`, 0.99)&fst$WEIGHTED_FST>fst$`fst2[, 4]`&fst$`fst2[, 4]`>fst$`fst3[, 4]`)
    csv2<-top2[,c(1:4,6:7)]
    write.csv(csv2,"top2.txt",row.names = FALSE)
    library(ggplot2)
    p2<-ggplot()+
      geom_point(aes(SNP,fst1$WEIGHTED_FST), data = fst1,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
      geom_point(aes(SNP,fst2$WEIGHTED_FST), data = fst2,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
      geom_point(aes(SNP,fst3$WEIGHTED_FST), data = fst3,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
      geom_point(aes(SNP,top2$WEIGHTED_FST,color="High"), data = top2,pch=19, alpha=1, size=1.6) + 
      geom_point(aes(SNP,top2$`fst2[, 4]`,color="Mid"), data = top2,pch=19, alpha=1, size=1.4) +
      geom_point(aes(SNP,top2$`fst3[, 4]`,color="Low"), data = top2,pch=19, alpha=1, size=1.2) +
        theme_bw(base_size = 16)+
      theme(panel.grid = element_blank(), 
            axis.line = element_line(colour = 'black'),
            panel.background = element_rect(fill = "transparent"),
            axis.title.y = element_text(face = "bold"),
            legend.position = "right",
            legend.text = element_text(face = "bold"),
            legend.background = element_rect(colour = '#DCDCDC'),
            legend.title = element_text(face = "bold"))+
      labs(x = 'Chromosome', y = expression("Fst")) +
      scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHROM,expand=(c(0.05,0)))+
      scale_y_continuous(expand = c(0.01,0),breaks =seq(-0.05,0.25,0.05),limits=c(0,0.25))+
      scale_color_manual(name="Group",values = c("High"="red","Mid"="#8ec965","Low"="black"),limit=c("High","Mid","Low"))+theme(axis.text.x = element_text(angle = 270,size=7.8,face="bold", hjust = 0.5, vjust = 0.5))
    ggsave("fst.png")
    
    按位点画图
    setwd("D:/迅雷下载/选择信号分析final/FST")
    fst3<-read.table("fst_H1-L1.txt",header = T)
    fst2<-read.table("fst_H2-L2.txt",header = T)
    fst1<-read.table("fst_H3-L3.txt",header = T)
    fst1$SNP<-seq(1,nrow(fst1),1)
    fst2$SNP<-seq(1,nrow(fst2),1)
    fst3$SNP<-seq(1,nrow(fst3),1)
    library(doBy)
    library(plyr)
    n<- c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
    for (i in n){fst1$SNP[which(fst1$CHROM==i)]<-fst1$SNP[which(fst1$CHROM==i)]+28000*(i-1)-233*(i-2)*(i-1)}
    chr <- summaryBy(SNP~CHROM, fst1, FUN = median)
    for (i in n){fst2$SNP[which(fst2$CHROM==i)]<-fst2$SNP[which(fst2$CHROM==i)]+28000*(i-1)-233*(i-2)*(i-1)}
    chr <- summaryBy(SNP~CHROM, fst2, FUN = median)
    for (i in n){fst3$SNP[which(fst3$CHROM==i)]<-fst3$SNP[which(fst3$CHROM==i)]+28000*(i-1)-233*(i-2)*(i-1)}
    chr <- summaryBy(SNP~CHROM, fst3, FUN = median)
    fst<-cbind(fst1,fst2[,3],fst3[,3])
    fst<-na.omit(fst)
    top2<-subset(fst,fst$WEIR_AND_COCKERHAM_FST> quantile(fst$WEIR_AND_COCKERHAM_FST, 0.99)&fst$`fst2[, 3]`> quantile(fst$`fst2[, 3]`, 0.99)&fst$`fst3[, 3]`> quantile(fst$`fst3[, 3]`, 0.99)&fst$WEIR_AND_COCKERHAM_FST>fst$`fst2[, 3]`&fst$`fst2[, 3]`>fst$`fst3[, 3]`)
    csv2<-top2[,c(1:3,5:6)]
    write.csv(csv2,"houxun_fst.csv",row.names = FALSE)
    library(ggplot2)
    p2<-ggplot()+
         geom_point(aes(SNP,fst1$WEIR_AND_COCKERHAM_FST), data = fst1,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
         geom_point(aes(SNP,fst2$WEIR_AND_COCKERHAM_FST), data = fst2,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
         geom_point(aes(SNP,fst3$WEIR_AND_COCKERHAM_FST), data = fst3,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=0.8) + 
         geom_point(aes(SNP,top2$WEIR_AND_COCKERHAM_FST,color="High"), data = top2,pch=19, alpha=1, size=1.6) + 
         geom_point(aes(SNP,top2$`fst2[, 3]`,color="Mid"), data = top2,pch=19, alpha=1, size=1.45) +
         geom_point(aes(SNP,top2$`fst3[, 3]`,color="Low"), data = top2,pch=19, alpha=1, size=1.3) +theme_bw(base_size = 16)+
         theme(panel.grid = element_blank(), 
               axis.line = element_line(colour = 'black'),
               panel.background = element_rect(fill = "transparent"),
               axis.title.y = element_text(face = "bold"),
               legend.position = "right",
               legend.text = element_text(face = "bold"),
               legend.background = element_rect(colour = '#DCDCDC'),
               legend.title = element_text(face = "bold"))+
         labs(x = 'Chromosome', y = expression("Fst")) +
         scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHROM,expand=(c(0.05,0)))+
         scale_y_continuous(expand = c(0.01,0),breaks =seq(-0.05,0.30,0.05),limits=c(0,0.30))+
         scale_color_manual(name="Group",values = c("High"="red","Mid"="#22ff96","Low"="black"),limit=c("High","Mid","Low"))+theme(axis.text.x = element_text(angle = 270,size=7.8,face="bold", hjust = 0.5, vjust = 0.5))
    ggsave("fst4.tiff",width = 12,height = 3.5)
    9.使用SnpEff 对SNP结果进行注释
    wget http://sourceforge.net/projects/snpeff/files/snpEff_latest_core.zip  #在家目录下, 下载安装包
    unzip snpEff_latest_core.zip  #进行解压,会产生一个snpEff目录 所有的程序都在这里面
    
    修改 snpEff 目录下的注释文件 snpEff.config,在“Third party databases”行下加入如下内容:
    #  chicken genome, version gall6
    gall6.genome : chicken
    在 snpEff 目录下,创建目录 data, data/gall6, data/genomes
    将 gtf 文件放入gall6目录下,并改名为 genes.gtf;将基因组序列文件放入 genomes 目录下,并改名为 gall6.fa
    wget http://ftp.ensembl.org/pub/release-101/gtf/gallus_gallus/Gallus_gallus.GRCg6a.101.gtf.gz
    mv Gallus_gallus.GRCg6a.101.gtf  genes.gtf
    wget http://ftp.ensembl.org/pub/release-101/fasta/gallus_gallus/dna/Gallus_gallus.GRCg6a.dna.toplevel.fa.gz
    mv Gallus_gallus.GRCg6a.dna.toplevel.fa gall6.fa
    GWAS群体的基因型信息(vcf文件):chicken_chr1-33_zk_tc.vcf 放入snpEff/data目录下。
    数据库建立:在 snpEff 目录下,执行命令:
    java -jar snpEff.jar build -gtf22 -v gall6   #如果成功那么在gall6目录下会有一个".bin"文件产生
    对vcf格式文件进行注释
    在data目录下命令如下:
    java -Xmx16g -jar ~/snpEff/snpEff.jar eff -v gall6  -c ../snpEff/snpEff.config -i vcf chicken_chr1-33_zk_tc.vcf >chicken_chr1-33_zk_tc_zs.vcf
    
    
    10.对候选区域进行注释
    vim FST
    1       44375001        44425000
    1       88050001        88100000     #以TAB键分割
    vim h_xpehhhouxun_fst.txt
    vim l_xpehh
    bgzip chicken_chr1-33_zk_tc_zs.vcf
    tabix -p vcf chicken_chr1-33_zk_tc_zs.vcf.gz    
    bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -R FST > FST_HX.vcf
    bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -R h_xpehh > h_xpehh_HX.vcf
    bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -R l_xpehh > l_xpehh_HX.vcf
    
    11.提取不同位点的注释
    vim houxun_fst.txt
    1       44375001
    1       88050001
    bgzip chicken_chr1-33_zk_tc_zs.vcf
    tabix -p vcf chicken_chr1-33_zk_tc_zs.vcf.gz
    bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -T houxun_fst.txt > houxun_fst.vcf
    bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -T hx_xpehh_high.txt > hx_xpehh_high.vcf
    bcftools filter chicken_chr1-33_zk_tc_zs.vcf.gz -T hx_xpehh_low.txt > hx_xpehh_low.vcf
    
    12.SNP注释
    wget http://ftp.ensembl.org/pub/release-101/variation/vcf/gallus_gallus/gallus_gallus.vcf.gz
    tabix -p vcf gallus_gallus.vcf.gz
    java -jar  ~/snpEff/SnpSift.jar  annotate ~/chicken/finaldate0.2/00-All.vcf.gz  ~/chicken/finaldate0.2/chicken_chr1-33_zk_tc_zs.vcf >chicken_chr1-33_zk_tc_zs_rs.vcf
    11.计算次要等位基因频率
    plink --vcf chicken_chr1-33_zk_tc.vcf.gz --chr-set 33 --freq --out chicken_chr1-33_zk_tc
    
    12.提取红色原鸡的个体文件
    vcftools --gzvcf chicken_chr1-33.vcf.gz --keep Red_junglefowl.txt --recode --stdout | bgzip -c >Red_junglefowl2.vcf.gz
    
    
    GWAS
    1.给填充过的VCF文件加注释
    cat chicken_chr1-33_zk_tc.vcf | head -n 10 > header
    cat chicken_chr1-33_zk_tc.vcf | grep "^#" -v | awk '{$3=$1"_"$2;print $0}' | sed 's/ /\t/g' > body
    cat header body > chicken_chr1-33_zk_tcID.vcf
    
    2.将vcf转成ped
    plink --vcf chicken_chr1-33_zk_tcID.vcf --chr-set 33 --recode --out chicken_chr1-33_zk_tcID
    
    3.基于连锁不平衡过滤SNP:我感觉可以不用,因为我要找重叠位点
    plink --file chicken_chr1-33_zk_tcID --chr-set 33 --indep 10 5 3        #10代表窗口大小,5代表步长,3代表1/3,即窗口内每对snpLD值大于1/3就会删除一对SNP中的一个
    plink --file chicken_chr1-33_zk_tcID --extract plink.prune.in --chr-set 33 --recode --out chicken_chr1-33_zk_tcID_LDfilter
    
    4.将ped/map转换为fam/bed/bim
    plink --file chicken_chr1-33_zk_tcID --chr-set 33 --make-bed --out chicken_chr1-33_zk_tcID
    
    5.检验表型数据是否服从正态分布
    x<-read.table(file = "100个体正太性.txt",header=T)
    shapiro.test(x)
    
    6.把表型数据转换为正态分布
    用SPSS转换为正态性数据:秩分的正态得分的转化
    
    6.把.fam文件第六列的-9(默认值)全部替换为表型值
    awk '{print $1" "$2" "$3" "$4" "$5}' chicken_chr1-33_zk_tcID.fam >col5
    vim col6
    paste col5 col6 >chicken_chr1-33_zk_tcID.fam
    awk '{print $1" "$2" "$3" "$4" "$5" "$6}' chicken_chr1-33_zk_tcID.fam >TEST
    mv TEST chicken_chr1-33_zk_tcID.fam
    
    7.使用gemma进行运行
    gemma -bfile chicken_chr1-33_zk_tcID -gk 2 -o kin   #生成K(亲缘关系)矩阵
    gemma -bfile chicken_chr1-33_zk_tcID -k kin.sXX.txt -lmm 1 -o GE_GWAS         #-lmm 1 使用wald检验计算显著性
    
    8.做pca,把前5个pca当作协变量
    plink --bfile  chicken_chr1-33_zk_tcID --chr-set 33 --pca 5 --out chicken_chr1-33_zk_tcID_pca   #myfile_pca.eigenvec即为我们所需的PCA文件也为协变量的输入文件
    
    9.看PCA图是否把群体分群
    data<-read.table("PCA.txt",header = T,sep = "\t")
    ggplot(data,aes(x=PCA1,y=PCA2,colour=group))+ geom_point()
    ggsave("pca.tiff")
    
    10.加入协变量计算GWAS
    
    同8.使用simpleM脚本计算Meff值,该值作为Bonferroni矫正的分母
    首先需要准备编码的SNP文件,自己编写脚本进行转换
    cat chicken_chr1-33_zk_tc.vcf | grep "^#" -v | awk '{$3=$1"_"$2;print $0}' | sed 's/ /\t/g' > body
    awk '{for(i=10;i<NF;i++)printf("%s ",$i);print $NF}'  body >bodyGT
    sed -i "s/0|0/0/g"  bodyGT
    sed -i "s/0|1/1/g"  bodyGT
    sed -i "s/1|0/1/g"  bodyGT
    sed -i "s/1|1/2/g"  bodyGT
    mv bodyGT snpSample.txt
    Rscript simpleM_Ex.R
    
    出错:需要自己删掉全是1的行
    这里是删掉全是0的行和列的代码
    awk '{show=0; for (i=1; i<=NF; i++) {if ($i!=0) show=1; col[i]+=$i;}} show==1{tr++; for (i=1; i<=NF; i++) vals[tr,i]=$i; tc=NF} END{for(i=1; i<=tr; i++) { for (j=1; j<=tc; j++) { if (col[j]>0) printf("%s%s", vals[i,j], OFS)} print ""; } }' snpSample.txt >snpSample0.txt
    
    因为没有理解代码,我的思路是先把所有的0替换成3,再把所有的1替换为0,这样可以把全为1的行去掉,然后把0替换为1,把3替换为0,就可以完成替换的操作
    sed -i "s/0/3/g"  snpSample.txt
    sed -i "s/1/0/g"  snpSample.txt
    awk '{show=0; for (i=1; i<=NF; i++) {if ($i!=0) show=1; col[i]+=$i;}} show==1{tr++; for (i=1; i<=NF; i++) vals[tr,i]=$i; tc=NF} END{for(i=1; i<=tr; i++) { for (j=1; j<=tc; j++) { if (col[j]>0) printf("%s%s", vals[i,j], OFS)} print ""; } }' snpSample.txt >snpSample1.txt       #大概8分钟
    wc -l snpSample1.txt
    sed -i "s/0/1/g"  snpSample1.txt
    sed -i "s/3/0/g"  snpSample1.txt
    Rscript simpleM_Ex.R
    
    计算等位基因频率差
    vcftools --gzvcf H1.vcf.gz --freq2 --out H1 
    sed -i '1d' H1.frq
    vim header
    chr     pos     allele  N_chr   ref_frq ALT_frq
    cat header H1.frq> H1.freq
    
    setwd("D:/迅雷下载/选择信号分析final0.2/Freq/")
    h1<-read.table(file="H1.freq",header = T)
    h2<-read.table(file="H2.freq",header = T)
    h3<-read.table(file="H3.freq",header = T)
    L1<-read.table(file="L1.freq",header = T)
    L2<-read.table(file="L2.freq",header = T)
    L3<-read.table(file="L3.freq",header = T)
    H1_L1 <- data.frame(CHR = h1$chr, POS = h1$pos,F = h1$ref_frq - L1$ref_frq )
    H2_L2 <- data.frame(CHR = h2$chr, POS = h2$pos,F = h2$ref_frq - L2$ref_frq )
    H3_L3 <- data.frame(CHR = h3$chr, POS = h3$pos,F = h3$ref_frq - L3$ref_frq )
    write.csv(H1_L1,"F_H1-L1.csv",row.names = FALSE)
    write.csv(H2_L2,"F_H2-L2.csv",row.names = FALSE)
    write.csv(H3_L3,"F_H3-L3.csv",row.names = FALSE)
    F3<-H1_L1
    F2<-H2_L2
    F1<-H3_L3
    F1$SNP<-seq(1,nrow(F1),1)
    F2$SNP<-seq(1,nrow(F2),1)
    F3$SNP<-seq(1,nrow(F3),1)
    library(doBy)
    library(plyr)
    n<- c(2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,31,32,33)
    for (i in n){F1$SNP[which(F1$CHR==i)]<-F1$SNP[which(F1$CHR==i)]+38000*(i-1)-233*(i-2)*(i-1)}       #28500
    chr <- summaryBy(SNP~CHR, F1, FUN = median)
    for (i in n){F2$SNP[which(F2$CHR==i)]<-F2$SNP[which(F2$CHR==i)]+38000*(i-1)-233*(i-2)*(i-1)}
    chr <- summaryBy(SNP~CHR, F2, FUN = median)
    for (i in n){F3$SNP[which(F3$CHR==i)]<-F3$SNP[which(F3$CHR==i)]+38000*(i-1)-233*(i-2)*(i-1)}
    chr <- summaryBy(SNP~CHR, F3, FUN = median)
    f<-cbind(F1,F2[,3],F3[,3])
    f<-na.omit(f)
    tdzheng<-subset(f,f$F> f$`F2[, 3]`&f$`F2[, 3]`> f$`F3[, 3]`)
    tdfu<-subset(f,f$F< f$`F2[, 3]`&f$`F2[, 3]`<f$`F3[, 3]`)
    top1<-subset(tdzheng,tdzheng$F>quantile(tdzheng$F, 0.995)&tdzheng$`F3[, 3]`>0)
    csv1<-top1[,c(1:3,5:6)]
    write.csv(csv1,"top0.005.csv",row.names = FALSE)
    top2<-subset(tdfu,tdfu$F<quantile(tdfu$F, 0.005)&tdfu$`F3[, 3]`<0)
    csv2<-top2[,c(1:3,5:6)]
    write.csv(csv2,"low0.005.csv",row.names = FALSE)
    library(ggplot2)
    p2<-ggplot()+
      geom_point(aes(SNP,F1$F), data = F1,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=1) + 
      geom_point(aes(SNP,F2$F), data = F2,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=1) + 
      geom_point(aes(SNP,F3$F), data = F3,color="#DCDCDC",pch=20,show.legend = FALSE, alpha=0.8, size=1) + 
      geom_point(aes(SNP,top1$F,color="High"), data = top1,pch=19, alpha=1, size=1.6) + 
      geom_point(aes(SNP,top1$`F2[, 3]`,color="Mid"), data = top1,pch=19, alpha=1, size=1.4) +
      geom_point(aes(SNP,top1$`F3[, 3]`,color="Low"), data = top1,pch=19, alpha=1, size=1.2) +
      geom_point(aes(SNP,top2$F,color="High"), data = top2,pch=19, alpha=1, size=1.6) + 
      geom_point(aes(SNP,top2$`F2[, 3]`,color="Mid"), data = top2,pch=19, alpha=1, size=1.4) +
      geom_point(aes(SNP,top2$`F3[, 3]`,color="Low"), data = top2,pch=19, alpha=1, size=1.2) +
        theme_bw(base_size = 16)+
      theme(panel.grid = element_blank(), 
            axis.line = element_line(colour = 'black'),
            panel.background = element_rect(fill = "transparent"),
            axis.title.y = element_text(face = "bold"),
            legend.position = "right",
            legend.text = element_text(face = "bold"),
            legend.background = element_rect(colour = '#DCDCDC'),
            legend.title = element_text(face = "bold"))+
      labs(x = 'Chromosome', y = expression("AFD")) +
      scale_x_continuous(breaks = chr$SNP.median,labels = chr$CHR,expand=(c(0.05,0)))+
      scale_y_continuous(expand = c(0.1,0),breaks =seq(-0.5,0.5,0.2),limits=c(-0.5,0.5))+
      scale_color_manual(name="Group",values = c("High"="red","Mid"="#22ff96","Low"="black"),limit=c("High","Mid","Low"))+
    geom_hline(aes(yintercept=0),col="black",lty=2,lwd=0.5)+theme(axis.text.x = element_text(angle = 270,size=7.8,face="bold", hjust = 0.5, vjust = 0.5))
    ggsave("freq5.tiff",width = 12,height = 2.5)
    
    画LD block图,输入文件的数目必须和要画的snp数目一致
    cd ~/soft/haploview
    wget -c https://www.broadinstitute.org/ftp/pub/mpg/haploview/Haploview.jar
    java -jar Haploview.jar --h
    准备两个输入文件
    一个为chicken_chr27.ped  一个为chicken_chr1-33_zk_tcID.info文件
    chicken_chr27.ped是用plink的输出文件
    先提取27号染色体的基因型文件
    vim chr27
    27  4789482 4989482
    bgzip chicken_chr1-33_zk_tcID.vcf
    tabix -p vcf chicken_chr1-33_zk_tcID.vcf.gz
    bcftools filter chicken_chr1-33_zk_tcID.vcf.gz -R chr27 > chr27.vcf
    plink --vcf chr27.vcf --chr-set 27 --recode --out chicken_chr27
    chicken_chr1-33_zk_tcID.info文件需要自己设置,第一列为snpID,第二列为位置,可以加第三列,有第三列的SNP会被绿色高亮,tab键分割
    我需要的是27号染色体 4889482上下游100kb内的所有snp
    awk '{if($1=="27")print $0}' chicken_chr1-33_zk_tcID.map >test.info
    awk '{if($4<="4989482")print $0}' test.info >test1.info
    awk '{if($4>="4789482")print $2"\t"$4}' test1.info >chicken_chr27.info  
    vim chicken_chr27.info  #在显著的snp后面加上标签
    cp chicken_chr27.info chicken_chr27.ped LD_BLOCK
    把ped文件里的-9全部换成0
    sed -i "s/-9/0/g"  chicken_chr27.ped
    cd ~/chicken/final0.2/GWAS/LD_BLOCK
    java -Xmx16g -jar ~/soft/haploview/Haploview.jar -n -q -log chicken_chr27.log -pedfile chicken_chr27.ped -info chicken_chr27.info -blockoutput GAB -png -check -mendel -dprime -compressedpng -includeTagsFile 
    
    
    
    cat Gallus_gallus.GRCg6a.101.gtf | grep "^#" -v | awk '{$2=null;$3=null;$6=null;$7=null;$8=null;print $0}' >Gallus_gallus.GRCg6a.101.bed   #去掉文件中的某几列
    sed '/^$/d' test.txt|awk 'NR==1{max=$1;next}{max=max>$1?max:$1}END{print max}'   #求文件的最大值
    
    
    可变剪切
    conda activate rmats
    echo "H1_sort.bam H2_sort.bam H4_sort.bam" | tr ' ' '\n' > UVJ_H.txt 
    echo "L1_sort.bam L2_sort.bam L3_sort.bam" | tr ' ' '\n' > UVJ_L.txt 
    RNASeq-MATS.py --b1 UVJ_H.txt --b2 UVJ_L.txt --gtf ~/reference/annotate/chicken/Gallus_gallus.GRCg6a.101.gtf --od /public/jychu/YangGe/cleandata/bam/rMATS/UVJ_H_vs_L_rMATS --tmp /public/jychu/YangGe/cleandata/bam/rMATS/UVJ_H_vs_L_rMATS -t paired --readLength 151 --cstat 0.0001 --nthread 6
    

    相关文章

      网友评论

          本文标题:鸡的选择信号分析代码

          本文链接:https://www.haomeiwen.com/subject/gsiwoktx.html