美文网首页
芯片分析的极简流程

芯片分析的极简流程

作者: 一只烟酒僧 | 来源:发表于2020-07-15 09:56 被阅读0次

    参考文章:
    1、GEOquery下载芯片数据及注释文件:https://www.jianshu.com/p/993b4022363f
    2、得到的expressionset对象的简单使用:http://www.bio-info-trainee.com/1510.html
    3、limma包的使用:https://www.jianshu.com/p/11534a814405

    library(GEOquery)
    library(stringr)
    library(limma)
    library(ggplot2)
    library(pheatmap)
    #array_endo<-getGEO("GSE111974",AnnotGPL = F,getGPL = F)
    #save(array_endo,file = "GSE111974_array_endo.rdata")
    array_endo<-array_endo[[1]]
    array_endo
    array_endo_mat<-exprs(array_endo)
    #获得注释平台信息
    array_endo_GPL<-getGEO("GPL17077",destdir = ".")
    array_endo_GPL<-array_endo_GPL@dataTable@table
    array_endo_GPL
    #获取样本特征信息
    array_endo_feature<-pData(array_endo)
    array_endo_feature<-array_endo_feature[,c(2,8)]
    array_endo_feature<-data.frame(sampleid=array_endo_feature[,1],
                                   group=ifelse(grepl("Control",array_endo_feature$source_name_ch1),"Control","RIF"))
    rownames(array_endo_feature)<-array_endo_feature$sampleid
    array_endo_feature
    #PCA
    pca3<-prcomp(t(array_endo_mat))
    pca3<-pca3$x
    pca3<-as.data.frame(pca3)
    ggplot(pca3,aes(x=PC1,y=PC2,color=array_endo_feature$group))+geom_point()
    pheatmap(cor(array_endo_mat),annotation_row = array_endo_feature)
    #使用limma,分别获得分组矩阵和差异比较矩阵
    design_lim2<-model.matrix(~0+array_endo_feature$group)
    design_lim2
    rownames(design_lim2)<-colnames(array_endo_mat)
    colnames(design_lim2)<-unique(array_endo_feature$group)
    design_lim2
    contrast.matrix_lim2<-makeContrasts(paste0(colnames(design_lim2),collapse = "-"),levels = design_lim2)
    fit_lim2<-lmFit(array_endo_mat,design_lim2)
    fit2_lim2<-contrasts.fit(fit_lim2,contrast.matrix_lim2)
    fit2_lim2<-eBayes(fit2_lim2)
    temp2<-topTable(fit2_lim2,coef = 1,number = Inf)
    nrDEG2<-na.omit(temp2)
    head(nrDEG2)
    ggplot(nrDEG2,aes(x=logFC,y=-log10(P.Value)))+geom_point()+
      geom_vline(xintercept = c(-1,1))+
      geom_hline(yintercept = -log10(0.05))+
      xlim(c(-4.5,4.5))+ylim(c(-0.5,16))
    #进行基因注释
    nrDEG2$gene_symbol<-array_endo_GPL[match(rownames(nrDEG2),array_endo_GPL$ID),7]
    nrDEG2$gene_symbol[1:5]
    

    相关文章

      网友评论

          本文标题:芯片分析的极简流程

          本文链接:https://www.haomeiwen.com/subject/exjqhktx.html