GEO数据下载

作者: 白云梦_7 | 来源:发表于2019-03-26 15:35 被阅读0次

    1.R包GEOquery进行下载整理GEO数据

    根据文章找到所需的芯片信息GSE29250

    library(GEOquery)
    library(Biobase)
    gse=getGEO("GSE29250",GSEMatrix = TRUE,destdir = ".",getGPL = T,AnnotGPL = T)#destdir设置当前目录,getGPL 和AnnotGPL都设置TRUE,可以下载和获得平台的注释文件
    

    得到文件


    GEO files
    GPL file
    GSE-GPL1file
    GPLanno file
    GSE GPL2 file

    表达数据
    exprs<-exprs(gse[[1]])

    expr
    样品处理分组等信息
    pdata<-pData(gse[[1]])
    > head(pdata)
                                 title geo_accession                status submission_date last_update_date type
    GSM723159                  NSCLC 1     GSM723159 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
    GSM723160 adjacent normal tissue 1     GSM723160 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
    GSM723161                  NSCLC 2     GSM723161 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
    GSM723162 adjacent normal tissue 2     GSM723162 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
    GSM723163                  NSCLC 3     GSM723163 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
    GSM723164 adjacent normal tissue 3     GSM723164 Public on Sep 16 2012     May 12 2011      Sep 16 2012  RNA
              channel_count source_name_ch1 organism_ch1 characteristics_ch1 characteristics_ch1.1
    GSM723159             1            lung Homo sapiens        gender: male  disease state: NSCLC
    GSM723160             1            lung Homo sapiens        gender: male  disease state: NSCLC
    GSM723161             1            lung Homo sapiens        gender: male  disease state: NSCLC
    GSM723162             1            lung Homo sapiens        gender: male  disease state: NSCLC
    GSM723163             1            lung Homo sapiens        gender: male  disease state: NSCLC
    GSM723164             1            lung Homo sapiens        gender: male  disease state: NSCLC
              characteristics_ch1.2   characteristics_ch1.3 characteristics_ch1.4 treatment_protocol_ch1
    GSM723159  nsclc type: squamous          tissue: cancer          tnm: /4/1/0/                   none
    GSM723160  nsclc type: squamous tissue: adjacent normal          tnm: /4/1/0/                   none
    GSM723161  nsclc type: squamous          tissue: cancer          tnm: /4/2/0/                   none
    GSM723162  nsclc type: squamous tissue: adjacent normal          tnm: /4/2/0/                   none
    GSM723163  nsclc type: squamous          tissue: cancer          tnm: /4/0/0/                   none
    GSM723164  nsclc type: squamous tissue: adjacent normal          tnm: /4/0/0/                   none
              growth_protocol_ch1 molecule_ch1
    GSM723159                none    total RNA
    GSM723160                none    total RNA
    GSM723161                none    total RNA
    GSM723162                none    total RNA
    GSM723163                none    total RNA
    GSM723164                none    total RNA
                                                                                                                                                                                                      extract_protocol_ch1
    GSM723159 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
    GSM723160 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
    GSM723161 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
    GSM723162 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
    GSM723163 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
    GSM723164 RNA was extracted with Trizol reagent, followed by QIAGEN RNeasy mini kit in accordance with the prescribed protocol provided with the kit. Quality control was performed with 1% agrose electrophoresis   .
              label_ch1                                                                 label_protocol_ch1
    GSM723159    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
    GSM723160    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
    GSM723161    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
    GSM723162    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
    GSM723163    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
    GSM723164    biotin Biotinylated cRNA were prepared with the Ambion MessageAmp kit for Illumina arrays
              taxid_ch1                             hyb_protocol                       scan_protocol description
    GSM723159      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 1
    GSM723160      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 2
    GSM723161      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 3
    GSM723162      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 4
    GSM723163      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 5
    GSM723164      9606 Standard Illumina hybridization protocol Standard Illumina scanning protocol    SAMPLE 6
              description.1
    GSM723159   replicate 1
    GSM723160   replicate 1
    GSM723161   replicate 1
    GSM723162   replicate 1
    GSM723163   replicate 1
    GSM723164   replicate 1
                                                                                       data_processing platform_id
    GSM723159 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
    GSM723160 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
    GSM723161 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
    GSM723162 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
    GSM723163 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
    GSM723164 The data were normalised using average normalisation with Illumina Genomestudio software    GPL10558
              contact_name     contact_email contact_institute          contact_address contact_city
    GSM723159     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
    GSM723160     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
    GSM723161     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
    GSM723162     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
    GSM723163     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
    GSM723164     lina,,ma malina209@163.com Zhoushan Hospital Renmin North Road No.238     Zhoushan
              contact_zip/postal_code contact_country supplementary_file data_row_count disease state:ch1
    GSM723159                  316004           China               NONE          47225             NSCLC
    GSM723160                  316004           China               NONE          47225             NSCLC
    GSM723161                  316004           China               NONE          47225             NSCLC
    GSM723162                  316004           China               NONE          47225             NSCLC
    GSM723163                  316004           China               NONE          47225             NSCLC
    GSM723164                  316004           China               NONE          47225             NSCLC
              gender:ch1 nsclc type:ch1      tissue:ch1 tnm:ch1
    GSM723159       male       squamous          cancer /4/1/0/
    GSM723160       male       squamous adjacent normal /4/1/0/
    GSM723161       male       squamous          cancer /4/2/0/
    GSM723162       male       squamous adjacent normal /4/2/0/
    GSM723163       male       squamous          cancer /4/0/0/
    GSM723164       male       squamous adjacent normal /4/0/0/
    
    

    芯片平台的设计注释信息

    > fdata<-fData(gse[[1]])
    > head(fdata)
                           ID                                         Gene title Gene symbol Gene ID UniGene title
    ILMN_1343291 ILMN_1343291 eukaryotic translation elongation factor 1 alpha 1      EEF1A1    1915              
    ILMN_1343295 ILMN_1343295           glyceraldehyde-3-phosphate dehydrogenase       GAPDH    2597              
    ILMN_1651199 ILMN_1651199                                                                     NA              
    ILMN_1651209 ILMN_1651209                 solute carrier family 35 member E2     SLC35E2    9906              
    ILMN_1651210 ILMN_1651210                    dual specificity phosphatase 22      DUSP22   56940              
    ILMN_1651221 ILMN_1651221                                                                     NA              
                 UniGene symbol UniGene ID
    ILMN_1343291                          
    ILMN_1343295                          
    ILMN_1651199                          
    ILMN_1651209                          
    ILMN_1651210                          
    ILMN_1651221                          
                                                                                          Nucleotide Title
    ILMN_1343291            Homo sapiens eukaryotic translation elongation factor 1 alpha 1 (EEF1A1), mRNA
    ILMN_1343295 Homo sapiens glyceraldehyde-3-phosphate dehydrogenase (GAPDH), transcript variant 1, mRNA
    ILMN_1651199                                                                                          
    ILMN_1651209     Homo sapiens solute carrier family 35 member E2 (SLC35E2), transcript variant 1, mRNA
    ILMN_1651210         Homo sapiens dual specificity phosphatase 22 (DUSP22), transcript variant 2, mRNA
    ILMN_1651221                                                                                          
                        GI GenBank Accession Platform_CLONEID Platform_ORF Platform_SPOTID Chromosome location
    ILMN_1343291  83367078         NM_001402                                                            6q14.1
    ILMN_1343295 576583510         NM_002046                                                             12p13
    ILMN_1651199        NA                                                                                    
    ILMN_1651209 315139027         NM_182838                                                           1p36.33
    ILMN_1651210 557440873         NM_020185                                                            6p25.3
    ILMN_1651221        NA                                                                                    
                                                       Chromosome annotation
    ILMN_1343291 Chromosome 6, NC_000006.12 (73515750..73521032, complement)
    ILMN_1343295              Chromosome 12, NC_000012.12 (6534405..6538375)
    ILMN_1651199                                                            
    ILMN_1651209   Chromosome 1, NC_000001.11 (1724838..1745999, complement)
    ILMN_1651210                 Chromosome 6, NC_000006.12 (292057..351355)
    ILMN_1651221                                                            
                                                                                                                                                                                                                                                                                                                                                                                      GO:Function
    ILMN_1343291                                                                                                                                                                                                                           GTP binding///GTPase activity///poly(A) RNA binding///protein binding///protein kinase binding///tRNA binding///translation elongation factor activity
    ILMN_1343295 NAD binding///NADP binding///glyceraldehyde-3-phosphate dehydrogenase (NAD+) (phosphorylating) activity///glyceraldehyde-3-phosphate dehydrogenase (NAD+) (phosphorylating) activity///glyceraldehyde-3-phosphate dehydrogenase (NAD+) (phosphorylating) activity///identical protein binding///microtubule binding///peptidyl-cysteine S-nitrosylase activity///protein binding
    ILMN_1651199                                                                                                                                                                                                                                                                                                                                                                                 
    ILMN_1651209                                                                                                                                                                                                                                                                                                                                                                                 
    ILMN_1651210                                                                                                                                                                                                                                                                                   protein tyrosine phosphatase activity///protein tyrosine/serine/threonine phosphatase activity
    ILMN_1651221                                                                                                                                                                                                                                                                                                                                                                                 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        GO:Process
    ILMN_1343291                                                                                                                                                                                                                                                                                                                                         cellular response to epidermal growth factor stimulus///regulation of chaperone-mediated autophagy///regulation of transcription, DNA-templated///transcription, DNA-templated///translational elongation
    ILMN_1343295                                                                                                                                                                                                                      canonical glycolysis///cellular response to interferon-gamma///gluconeogenesis///microtubule cytoskeleton organization///negative regulation of translation///negative regulation of translation///neuron apoptotic process///peptidyl-cysteine S-trans-nitrosylation///protein stabilization///regulation of macroautophagy
    ILMN_1651199                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
    ILMN_1651209                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
    ILMN_1651210 apoptotic process///cell proliferation///inactivation of MAPK activity///multicellular organism development///negative regulation of T cell activation///negative regulation of T cell mediated immunity///negative regulation of T cell receptor signaling pathway///negative regulation of transcription from RNA polymerase II promoter///peptidyl-tyrosine dephosphorylation///positive regulation of JNK cascade///protein dephosphorylation///regulation of cell proliferation///transforming growth factor beta receptor signaling pathway
    ILMN_1651221                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
                                                                                                                                                                                                                                                                                                                                                                       GO:Component
    ILMN_1343291                                                                                 cortical actin cytoskeleton///cytoplasm///cytoplasm///cytoplasm///cytoplasmic side of lysosomal membrane///cytosol///cytosol///eukaryotic translation elongation factor 1 complex///extracellular exosome///extracellular space///membrane///nucleolus///nucleus///ruffle membrane
    ILMN_1343295 GAIT complex///cytoplasm///cytoplasm///cytosol///cytosol///cytosol///extracellular exosome///extracellular matrix///intracellular membrane-bounded organelle///intracellular ribonucleoprotein complex///lipid particle///membrane///microtubule cytoskeleton///nuclear membrane///nucleus///nucleus///perinuclear region of cytoplasm///plasma membrane///vesicle
    ILMN_1651199                                                                                                                                                                                                                                                                                                                                                                   
    ILMN_1651209                                                                                                                                                                                                                                                                                                                                     integral component of membrane
    ILMN_1651210                                                                                                                                                                                                                                                                                                                                                cytoplasm///nucleus
    ILMN_1651221                                                                                                                                                                                                                                                                                                                                                                   
                                                                                                                     GO:Function ID
    ILMN_1343291                           GO:0005525///GO:0003924///GO:0044822///GO:0005515///GO:0019901///GO:0000049///GO:0003746
    ILMN_1343295 GO:0051287///GO:0050661///GO:0004365///GO:0004365///GO:0004365///GO:0042802///GO:0008017///GO:0035605///GO:0005515
    ILMN_1651199                                                                                                                   
    ILMN_1651209                                                                                                                   
    ILMN_1651210                                                                                            GO:0004725///GO:0008138
    ILMN_1651221                                                                                                                   
                                                                                                                                                                          GO:Process ID
    ILMN_1343291                                                                                                         GO:0071364///GO:1904714///GO:0006355///GO:0006351///GO:0006414
    ILMN_1343295                                        GO:0061621///GO:0071346///GO:0006094///GO:0000226///GO:0017148///GO:0017148///GO:0051402///GO:0035606///GO:0050821///GO:0016241
    ILMN_1651199                                                                                                                                                                       
    ILMN_1651209                                                                                                                                                                       
    ILMN_1651210 GO:0006915///GO:0008283///GO:0000188///GO:0007275///GO:0050868///GO:0002710///GO:0050860///GO:0000122///GO:0035335///GO:0046330///GO:0006470///GO:0042127///GO:0007179
    ILMN_1651221                                                                                                                                                                       
                                                                                                                                                                                                                                                      GO:Component ID
    ILMN_1343291                                                                  GO:0030864///GO:0005737///GO:0005737///GO:0005737///GO:0098574///GO:0005829///GO:0005829///GO:0005853///GO:0070062///GO:0005615///GO:0016020///GO:0005730///GO:0005634///GO:0032587
    ILMN_1343295 GO:0097452///GO:0005737///GO:0005737///GO:0005829///GO:0005829///GO:0005829///GO:0070062///GO:0031012///GO:0043231///GO:0030529///GO:0005811///GO:0016020///GO:0015630///GO:0031965///GO:0005634///GO:0005634///GO:0048471///GO:0005886///GO:0031982
    ILMN_1651199                                                                                                                                                                                                                                                     
    ILMN_1651209                                                                                                                                                                                                                                           GO:0016021
    ILMN_1651210                                                                                                                                                                                                                              GO:0005737///GO:0005634
    ILMN_1651221                                                                                                                                                                                                                                                     
                                                  Platform_SEQUENCE
    ILMN_1343291 TGTGTTGAGAGCTTCTCAGACTATCCACCTTTGGGTCGCTTTGCTGTTCG
    ILMN_1343295 CTTCAACAGCGACACCCACTCCTCCACCTTTGACGCTGGGGCTGGCATTG
    ILMN_1651199 ATGCGAGGCCCCAGGGTTCGGCCCCGCAGCGCCGCTGAGTCCAAGGACCG
    ILMN_1651209 TCACGGCGTACGCCCTCATGGGGAAAATCTCCCCGGTGACTTTCAGGTCC
    ILMN_1651210 TGTGGACATGAGAGTTAGTTCTGTTTTGCCTGCACGGTGGGAGCGGCGTA
    ILMN_1651221 GCCGCCCCCTGCTTCACGGAGCCTGGTCCCATCAACCGCCGAAGGGCTGA
    

    2.直接下载


    RAWdata

    GEO自带差异分析:GEO2R


    GEO2R-1点击
    GEO2R-2选择平台,设置分组
    GEO2R-3Top250/see all
    result1:第一列是有差异的ID,点击可看到具体表达情况
    result1
    result2
    # Version info: R 3.2.3, Biobase 2.30.0, GEOquery 2.40.0, limma 3.26.8
    # R scripts generated  Sat Sep 29 03:49:34 EDT 2018
    
    ################################################################
    #   Differential expression analysis with limma
    library(Biobase)
    library(GEOquery)
    library(limma)
    
    # load series and platform data from GEO
    
    gset <- getGEO("GSE29250", GSEMatrix =TRUE, AnnotGPL=FALSE)
    if (length(gset) > 1) idx <- grep("GPL8179", attr(gset, "names")) else idx <- 1
    gset <- gset[[idx]]
    
    # make proper column names to match toptable 
    fvarLabels(gset) <- make.names(fvarLabels(gset))
    
    # group names for all samples
    gsms <- "010101010101"
    sml <- c()
    for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }
    
    # log2 transform
    ex <- exprs(gset)
    qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
    LogC <- (qx[5] > 100) ||
              (qx[6]-qx[1] > 50 && qx[2] > 0) ||
              (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
    if (LogC) { ex[which(ex <= 0)] <- NaN
      exprs(gset) <- log2(ex) }
    
    # set up the data and proceed with analysis
    sml <- paste("G", sml, sep="")    # set group names
    fl <- as.factor(sml)
    gset$description <- fl
    design <- model.matrix(~ description + 0, gset)
    colnames(design) <- levels(fl)
    fit <- lmFit(gset, design)
    cont.matrix <- makeContrasts(G1-G0, levels=design)
    fit2 <- contrasts.fit(fit, cont.matrix)
    fit2 <- eBayes(fit2, 0.01)
    tT <- topTable(fit2, adjust="fdr", sort.by="B", number=250)
    
    tT <- subset(tT, select=c("ID","adj.P.Val","P.Value","t","B","logFC","SEQUENCE","miRNA_ID","SPOT_ID"))
    write.table(tT, file=stdout(), row.names=F, sep="\t")
    
    
    ################################################################
    #   Boxplot for selected GEO samples
    library(Biobase)
    library(GEOquery)
    
    # load series and platform data from GEO
    
    gset <- getGEO("GSE29250", GSEMatrix =TRUE, getGPL=FALSE)
    if (length(gset) > 1) idx <- grep("GPL8179", attr(gset, "names")) else idx <- 1
    gset <- gset[[idx]]
    
    # group names for all samples in a series
    gsms <- "010101010101"
    sml <- c()
    for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }
    sml <- paste("G", sml, sep="")  set group names
    
    # order samples by group
    ex <- exprs(gset)[ , order(sml)]
    sml <- sml[order(sml)]
    fl <- as.factor(sml)
    labels <- c("test","control")
    
    # set parameters and draw the plot
    palette(c("#f4dfdf","#dfeaf4", "#AABBCC"))
    dev.new(width=4+dim(gset)[[2]]/5, height=6)
    par(mar=c(2+round(max(nchar(sampleNames(gset)))/2),4,2,1))
    title <- paste ("GSE29250", '/', annotation(gset), " selected samples", sep ='')
    boxplot(ex, boxwex=0.6, notch=T, main=title, outline=FALSE, las=2, col=fl)
    legend("topleft", labels, fill=palette(), bty="n")
    

    相关文章

      网友评论

        本文标题:GEO数据下载

        本文链接:https://www.haomeiwen.com/subject/ppspoftx.html