美文网首页
GEO数据下载便捷途径

GEO数据下载便捷途径

作者: 找兔子的小萝卜 | 来源:发表于2020-05-12 08:45 被阅读0次

    1 Install the development version from Github:

    library(devtools)
    install_github("jmzeng1314/GEOmirror")
    library(GEOmirror)
    

    本电脑已经下载完成

    2 使用起来非常方便,就一句话,找到你的GSE数据集的ID,传给 函数即可:

    use it to download GEO dataset, as below :

    eSet=geoChina('GSE1009') 
    eSet=geoChina('GSE27533') 
    eSet=geoChina('GSE95166') 
    

    3 Once you download the ExpressionSet of GEO dataset, you can access the expression matrix and phenotype data:

    ## download GSE95166 data
    # https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE95166
    #eSet=getGEO('GSE95166', destdir=".", AnnotGPL = F, getGPL = F)[[1]]
    library(GEOmirror)
    eSet=geoChina('GSE95166')
    eSet
    eSet=eSet[[1]]
    
    
    probes_expr <- exprs(eSet);dim(probes_expr)
    head(probes_expr[,1:4])
    boxplot(probes_expr,las=2)
    
    ## pheno info
    phenoDat <- pData(eSet)
    head(phenoDat[,1:4])
    # https://www.ncbi.nlm.nih.gov/pubmed/31430288
    
    groupList=factor(c(rep('npc',4),rep('normal',4)))
    table(groupList)
    eSet@annotation
    # GPL15314    Arraystar Human LncRNA microarray V2.0 (Agilent_033010 Probe Name version)
    

    4 对于这一点表达矩阵数据集,我们可以看看PCA图,火山图以及热图:

    genes_expr=probes_expr
    library("FactoMineR")
    library("factoextra")
    dat.pca <- PCA(t(genes_expr) , graph = FALSE)
    dat.pca
    fviz_pca_ind(dat.pca,
                 geom.ind = "point",
                 col.ind = groupList,
                 addEllipses = TRUE,
                 legend.title = "Groups"
    )
    library(limma)
    design=model.matrix(~factor(groupList))
    design
    fit=lmFit(genes_expr,design)
    fit=eBayes(fit)
    DEG=topTable(fit,coef=2,n=Inf)
    head(DEG)
    # We observed that 2107 lncRNAs were upregulated
    # while 2090 lncRNAs were downregulated by more than 2-fold,
    # NKILA among these downregulated lncRNAs (Fig 1A, GSE95166).
    
    ## for volcano plot
    df=DEG
    attach(df)
    df$v= -log10(P.Value)
    df$g=ifelse(df$P.Value>0.05,'stable',
                ifelse( df$logFC >1,'up',
                        ifelse( df$logFC < -1,'down','stable') )
    )
    table(df$g)
    df$name=rownames(df)
    head(df)
    library(ggpubr)
    ggpubr::ggscatter(df, x = "logFC", y = "v", color = "g",size = 0.5,
              label = "name", repel = T,
              label.select =head(rownames(df)),
              palette = c("#00AFBB", "#E7B800", "#FC4E07") )
    detach(df)
    
    
    x=DEG$logFC
    names(x)=rownames(DEG)
    cg=c(names(head(sort(x),100)),
         names(tail(sort(x),100)))
    cg
    library(pheatmap)
    n=t(scale(t(genes_expr[cg,])))
    n[n>2]=2
    n[n< -2]= -2
    n[1:4,1:4]
    ac=data.frame(groupList=groupList)
    rownames(ac)=colnames(n)  
    pheatmap(n,show_colnames =F,show_rownames = F,
             annotation_col=ac)
    

    实际上,这个时候,我们需要把探针的ID转换为基因名字,进行后续分析.

    相关文章

      网友评论

          本文标题:GEO数据下载便捷途径

          本文链接:https://www.haomeiwen.com/subject/rqevnhtx.html