美文网首页群体遗传学
MatrixEQTL-----eQTL分析

MatrixEQTL-----eQTL分析

作者: GenomeStudy | 来源:发表于2024-02-29 22:53 被阅读0次

    好久没有发简书了,去学习新技术新知识去了,实在是没有时间!!现在有点时间,记录分享一下新知识~
    这段时间也是被这个eQTL搞得很头疼,现在终于弄出来,记录记录!

    Matrix eQTL是一款针对大型矩阵可以超快运行进行eQTL分析的软件(http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/)。

    一、首先就是数据的准备!(很重要)

    先要准备4个文件(5个)

    1.SNP信息文件和SNP位置文件,SNP信息文件需要012类型

    好多人没有这个类型的snp文件,下面我就一步一步的教你们

    #!/bin/bash
    vcf=/your_path/all.rename_sorted.snp.vcf
    
    ## MAF、缺失率过滤
    plink --vcf $vcf  --make-bed --geno 0.1 --maf 0.05 --out snp.v1
    
    plink --bfile  snp.v1 --hardy
    echo "计算所有位点的HWE的P值;生成plink.hwe"
    
    plink --bfile snp.v1 -hwe 1e-4 -make-bed -out plink.hwe
    echo "设定过滤标准1e-4l"
    
    plink --bfile plink.hwe --recodeA --out snp.v2
    echo "生成 .raw 文件,即为 012 矩阵,缺失的点标记为 NA"
    
    cat snp.v2.raw | cut -d" " -f1,7- | sed 's/_[A-Z]//g' > genotype.txt
    echo "2-6 列删除,并格式化 RS ID"
    
    awk '{i=1;while(i <= NF){col[i]=col[i] $i " ";i=i+1}} END {i=1;while(i<=NF){print col[i];i=i+1}}' genotype.txt | sed 's/[ \t]*$//g' > genotype_transpose.txt
    echo "用 awk 进行矩阵转置"
    
    sed -i "s# #\t#g" genotype_transpose.txt
    
    # SNP 位置信息可从 .map 文件获取:
    plink --bfile  plink.hwe --recode --out snp.v3
    
    echo "snpid chr pos" >snpsloc.txt
    awk '{print $2,"chr"$1,$4}' snp.v3.map >>snpsloc.txt
    
    sed -i "s# #\t#g" snpsloc.txt
    

    现在生成的genotype_transpose.txt就是SNP信息文件,snpsloc.txt就是SNP位置文件

    head(genotype_transpose.txt)
    FID     DNA1    DNA2    DNA3    DNA4    DNA5    DNA6    DNA7    DNA8    DNA9    DNA10   DNA11   DNA12   DNA13   DNA14   DNA15   DNA16   DNA18   DNA19   DNA20   DNA21   DNA22   DNA23   DNA24   DNA25   DNA26   DNA27   DNA28   DNA30   DNA31   DNA32   DNA33
    Chr1-30469      0       0       0       0       0       0       0       1       0       0       0       0       1       0       0       0       0       0       0       0       0       1       0       0       0       0       0       0       0       0
    Chr1-30517      0       0       0       0       0       1       0       1       0       1       0       0       1       0       0       0       0       0       0       0       1       0       1       0       0       1       1       0       0       0
    Chr1-30533      0       1       1       1       1       1       2       0       1       0       1       0       0       0       0       1       0       1       1       0       0       0       1       0       0       0       1       1       0       0
    Chr1-69062      1       1       1       1       1       1       1       1       1       1       0       1       1       0       1       1       1       1       1       1       1       1       1       1       1       1       1       1       1       1
    Chr1-89929      0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       1       0       0       0       0       0       0       0       1       0       0       0       0       0       0
    Chr1-91327      0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       1       0       0       0       0       0       0       0       0
    Chr1-130169     0       0       0       0       0       2       0       1       0       2       0       0       1       0       0       0       0       0       0       0       2       0       1       0       0       1       2       0       0       0
    Chr1-188094     0       0       0       0       0       0       0       0       0       0       2       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-190417     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-191152     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-191944     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-194442     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-199462     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-199533     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-204570     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-206474     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-217339     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-217421     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-217626     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-217663     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-220802     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-221433     0       0       0       0       0       0       0       0       0       0       2       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-221715     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-226134     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       NA      0       0       0       0       0       0
    Chr1-226530     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-227955     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-235012     0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       0       1       0       0       0       0       0       0       0       0       0       0       0       0
    Chr1-241685     0       0       0       0       0       0       0       0       0       0       0 
    
    
    head(snpsloc.txt)
    snpid   Chr    pos
    Chr1-30469      Chr01   30469
    Chr1-30517      Chr01   30517
    Chr1-30533      Chr01   30533
    Chr1-69062      Chr01   69062
    Chr1-89929      Chr01   89929
    Chr1-91327      Chr01   91327
    Chr1-130169     Chr01   130169
    Chr1-188094     Chr01   188094
    Chr1-190417     Chr01   190417
    

    2.准备自己的表达量矩阵,也可以对矩阵进行取log2,进行标准化

    这个文章里面就是这个操作的:eQTL Regulating Transcript Levels Associated with Diverse Biological Processes in Tomato

    3.准备自己的基因位置信息

    grep "gene" geonom.gff3  |grep -v "ChrUn" | awk -F ";" '{print $1}' | sed -e "s/ID=//g" | awk '{print $9"\t"$1"\t"$4"\t"$5}' > geneloc.txt 
    
    head(geneloc.txt)
    LOC_Os01g01010  Chr01   2903    10817
    LOC_Os01g01019  Chr01   11218   12435
    LOC_Os01g01030  Chr01   12648   15915
    LOC_Os01g01040  Chr01   16292   20323
    LOC_Os01g01050  Chr01   22841   26971
    LOC_Os01g01060  Chr01   27136   28651
    LOC_Os01g01070  Chr01   29818   34493
    

    一定要注意,snpsloc.txtgeneloc.txtChr一定要对应,不能一个是Chr一个是chr!!!

    4.正式开始eQTL分析

    library(MatrixEQTL)
    ## Settings
    # Genotype file name
    SNP_file_name = paste( "/your_path/genotype_transpose.txt", sep="\t");
    snps_location_file_name = paste( "/your_path/snpsloc.txt", sep="\t");
    
    # Gene expression file name
    expression_file_name = paste( "/your_path/experssion.csv", sep=" ");
    gene_location_file_name = paste( "/your_path/geneloc.txt", sep="");
    
    ## Load genotype data
    snps = SlicedData$new();
    snps$fileDelimiter = "\t";      # the TAB character
    snps$fileOmitCharacters = "NA"; # denote missing values;
    snps$fileSkipRows = 1;          # one row of column labels
    snps$fileSkipColumns = 1;       # one column of row labels
    snps$fileSliceSize = 2000;      # read file in slices of 2,000 rows
    snps$LoadFile(SNP_file_name);
    
    ## Load gene expression data
    gene = SlicedData$new();
    gene$fileDelimiter = "\t";      # the TAB character
    gene$fileOmitCharacters = "NA"; # denote missing values;
    gene$fileSkipRows = 1;          # one row of column labels
    gene$fileSkipColumns = 1;       # one column of row labels
    gene$fileSliceSize = 2000;      # read file in slices of 2,000 rows
    gene$LoadFile(expression_file_name);
    
    ## Run the analysis
    snpspos = read.table(snps_location_file_name, header=TRUE, stringsAsFactors = FALSE, sep="\t");
    genepos = read.table(gene_location_file_name, header=TRUE, stringsAsFactors = FALSE, sep="\t");
    
    me = Matrix_eQTL_main(
      snps = snps,
      gene = gene,
      output_file_name   = "./modelANOVA.tra.txt",
      pvOutputThreshold  = 1e-5,
      useModel = modelANOVA, # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
      errorCovariance = numeric(),
      verbose = TRUE,
      output_file_name.cis = "./modelANOVA.cis.txt",
      pvOutputThreshold.cis = 1e-5,
      snpspos = snpspos,
      genepos = genepos,
      cisDist =  2e5,
      pvalue.hist = "qqplot",
      min.pv.by.genesnp = FALSE,
      noFDRsaveMemory = FALSE);
    
    pdf("modelANOVA.pdf")
    plot(me)
    dev.off()
    

    cisDist界定trans和cis的阈值;pvOutputThreshold判断trans的阈值;pvOutputThreshold.cis判断cis的阈值
    可以根据自己物种基因组大小,基因-基因间区大小设置cisDist

    5.用曼哈顿图----展示eQTL结果

    # 加载所需的包
    library(qqman)
    # 读取文件
    eQTL_results <- read.table("std3.modelANOVA.tra.txt", header=TRUE, sep="\t")
    # 分离SNP列以获取染色体号和位置
    eQTL_results$CHR <- as.numeric(gsub("Chr", "", gsub("-.*", "", eQTL_results$SNP)))
    eQTL_results$BP <- as.numeric(gsub(".*-", "", eQTL_results$SNP))
    # 确保使用未转换的p-value列 'p.value'
    plot_data <- eQTL_results[, c("SNP", "CHR", "BP", "p.value")]
    
    # 检查新的数据框
    head(plot_data)
    # 绘制曼哈顿图
    pdf(file="modelANOVA.trans.pdf", width=11, height=8.5)
    manhattan(plot_data, chr="CHR", bp="BP", p="p.value", snp="SNP", main="eQTL Manhattan Plot", col=c("blue4", "orange3"))
    dev.off()
    

    相关文章

      网友评论

        本文标题:MatrixEQTL-----eQTL分析

        本文链接:https://www.haomeiwen.com/subject/ttbqzdtx.html