美文网首页
常用自定义函数

常用自定义函数

作者: 郭师傅 | 来源:发表于2022-05-30 13:49 被阅读0次

    1、log2转换判断执行

    # 差异分析前,判断是否需要LOG2转换------------------------------------------
    ex <- exprSet
    qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
    LogC <- (qx[5] > 100) ||
      (qx[6]-qx[1] > 50 && qx[2] > 0) ||
      (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
    
    if (LogC) { ex[which(ex <= 0)] <- NaN
    exprSet <- log2(ex)
    print("log2 transform finished")}else{print("log2 transform not needed")}
    

    2、差异分析结果标记(1)

    # 定义加上下调标记函数,适用于limma包结果
    add_sign_p <- function(df,lfc = 2,p = 0.05){
      df <- df %>% mutate(sign = dplyr::case_when(df$P.Value < p & df$logFC> lfc ~ "up",
                                                  df$P.Value < p & df$logFC< -lfc ~ "down", 
                                                  TRUE ~ "stable")
      )
      return(df)
    }
    
    add_sign <- function(df,lfc = 2,padj = 0.05){
      df <- df %>% mutate(sign = dplyr::case_when(df$adj.P.Val < padj & df$logFC> lfc ~ "up",
                                                  df$adj.P.Val < padj & df$logFC< -lfc ~ "down",  
                                                  TRUE ~ "stable"
      )
      )
      return(df)
    }
    
    diff_padj <- add_sign(df = diff_1,lfc = 1,padj = 0.05)
    table(diff_padj$sign)
    diffSig_padj <- diff_padj %>% filter(sign == "down" | sign == "up")
    
    

    3、差异分析结果标记(2)

    # 定义加上下调标记函数,适用于DEseq2包结果
    add_sign_p <- function(df){
      df <- df %>% mutate(sign = dplyr::case_when(df$pvalue < p_cutoff & df$log2FoldChange> logFC_cutoff ~ "up",
                                                  df$pvalue < p_cutoff & df$log2FoldChange< -logFC_cutoff ~ "down",
                                                  TRUE ~ "stable")
      )
      return(df)
    }
    
    add_sign <- function(df){
      df <- df %>% mutate(sign = dplyr::case_when(df$padj < p_cutoff & df$log2FoldChange> logFC_cutoff ~ "up",
                                                  df$padj < p_cutoff & df$log2FoldChange< -logFC_cutoff ~ "down",
                                                  TRUE ~ "stable")
      )
      return(df)
    }
    

    4.表达矩阵PCA绘图

    library(FactoMineR)
    library(factoextra)
    array_qc_pcaplot <- function(exprSet,group_list){
      dat.pca <- PCA(as.data.frame(t(exprSet)), graph = FALSE)
      fviz_pca_ind(dat.pca,
                                geom.ind = "point", # show points only (nbut not "text")
                                col.ind = group_list, # color by groups
                                # palette = c("#00AFBB", "#E7B800"),  #修改颜色
                                addEllipses = TRUE, # Concentration ellipses
                                # legend.title = "GROUP"
      ) + theme(
        legend.position = c(0.80, 1),
        text = element_text(size = 8),
        legend.key.size = unit(3, "mm"),
        legend.title = element_blank()
      )
    }
    

    5.基因注释

    # 定义注释函数
    # 两个参数:exprSet:表达矩阵,行名是探针
    #           probe2symbol:注释文件,第一列探针,第二列sybol
    annot <- function(exprSet,probe2symbol_df){
      # 开始注释
      colnames(probe2symbol_df) <- c("probe_id","symbol")
      exprSet <- as.data.frame(exprSet)  #change express matrix to dataframe
      exprSet$probe_id <- rownames(exprSet)  #make a new column of probe_id by rowname
      exprSet$probe_id <- as.character(exprSet$probe_id)
      #match probe_id and gene symbol
      exprSet <- exprSet %>% 
        inner_join(probe2symbol_df,by="probe_id") %>% #合并探针的信息
        dplyr::select(-probe_id) %>% #去掉多余信息
        dplyr::select(symbol, everything()) %>% #重新排列,
        mutate(rowMean =rowMeans(.[grep("GSM", names(.))])) %>% #求出平均数(这边的.真的是画龙点睛)
        arrange(desc(rowMean)) %>% #把表达量的平均值按从大到小排序
        distinct(symbol,.keep_all = T) %>% # symbol留下第一个
        dplyr::select(-rowMean) %>% #反向选择去除rowMean这一列
        tibble::column_to_rownames(colnames(.)[1]) # 把第一列变成行名并删除
      return(exprSet)
    }
    

    6.limma包差异基因分析

    #表达矩阵:exprSet
    #分组列表:group_list
    
    # 1.制作design
    group_list <- pdata$group
    design <- model.matrix(~0+factor(group_list))
    # design <- model.matrix(~group)
    colnames(design) <- levels(factor(group_list))
    
    # 2.制作比较矩阵
    # makeContrasts里边组的顺序会影响结果
    # 根据目前资料,实验组写左,对照组写右
    group_list
    cont.matrix<-makeContrasts(T2D-control,levels = design)
    cont.matrix
    #做完后实验组是1,对照组是-1,此处注释有待分析
    
    # 定义DEG函数,适用于array,limma包
    deg_limma <- function(exprSet,design = design,cont.matrix = cont.matrix){
      fit <- lmFit(exprSet_4,design)
      fit2=contrasts.fit(fit,cont.matrix)
      fit2 <- eBayes(fit2)  ## default no trend !!!
      ##eBayes() with trend=TRUE
      #通过修改p.value,改变输出基因,过小可能无输出。实际影响的是adj.p,输出基因的不同可能影响火山图的起点(根部)
      tempOutput = topTable(fit2,coef=1,n=Inf,adjust="BH")    
      #tempOutput1 = topTable(fit2,coef=1,n=Inf,adjust="fdr",p.value = 0.05) 
      nrDEG = na.omit(tempOutput)
      return(nrDEG)
    }
    
    diff_1 <- deg_limma(exprSet = exprSet,
                        design = design,
                        cont.matrix = cont.matrix)
    

    相关文章

      网友评论

          本文标题:常用自定义函数

          本文链接:https://www.haomeiwen.com/subject/rzeoprtx.html