美文网首页R语言学习
R语言学习.5-数据读写

R语言学习.5-数据读写

作者: PhageNanoenzyme | 来源:发表于2021-01-11 01:11 被阅读0次

    数据读写练习是生信技能树生信爆款入门课程R语言部分Day5的讲到的一个重要知识点。
    为加深理解,现在做下练习巩固。

    Last compiled on 01/10/21

    rm(list = ls())
    

    1.读取ex1.txt

    a <- read.table('ex1.txt')
    dim(a)
    ## [1] 75  6
    head(a)
    ##                                                                               V1
    ## 1                                                                          title
    ## 2    Illumina Sentrix Array Matrix (SAM) - GoldenGate Methylation Cancer Panel I
    ## 3         Illumina HumanMethylation27 BeadChip (HumanMethylation27_270596_v.1.2)
    ## 4           Illumina HumanMethylation450 BeadChip (HumanMethylation450_15017482)
    ## 5 GE Healthcare/Amersham Biosciences CodeLink鈩\xa2   ADME Rat 16-Assay Bioarray
    ## 6                                       [AG] Affymetrix Arabidopsis Genome Array
    ##         V2                           V3             V4                   V5
    ## 1      gpl                 bioc_package   manufacturer             organism
    ## 2 GPL15380     GGHumanMethCancerPanelv1       Illumina         Homo sapiens
    ## 3  GPL8490  IlluminaHumanMethylation27k Illumina, Inc.         Homo sapiens
    ## 4 GPL13534 IlluminaHumanMethylation450k Illumina, Inc.         Homo sapiens
    ## 5  GPL2898                    adme16cod  GE Healthcare    Rattus norvegicus
    ## 6    GPL71                           ag     Affymetrix Arabidopsis thaliana
    ##               V6
    ## 1 data_row_count
    ## 2           1536
    ## 3          27578
    ## 4         485577
    ## 5           1280
    ## 6           8297
    

    2.读取ex2_B cell receptor signaling pathway.csv

    ex2 <- read.csv('ex2_B cell receptor signaling pathway.csv',
                    row.names = 1)
    dim(ex2)
    ## [1]  18 168
    ex2[1:4,1:4]
    ##         TCGA.06.0238.01A TCGA.06.0171.02A TCGA.28.5218.01A TCGA.06.0130.01A
    ## NCKAP1L         10.96088         13.67818         11.69558         12.41409
    ## SYK             10.64797         12.99044         11.07856         11.88787
    ## PTPRC           10.61789         13.49278         11.26111         12.33504
    ## PTPN6           10.49375         12.35558         10.58999         11.66260
    

    3.读取GSE32575_series_matrix.txt,赋值给gse。

    gse <- read.table('GSE32575_series_matrix.txt',
                      comment.char = '!',
                      header = T,
                      row.names = 1)
    
    dim(gse)
    ## [1] 336  48
    gse[1:4,1:4]
    ##               GSM807339  GSM807340  GSM807341  GSM807342
    ## ILMN_1343289 19525.4400 20503.6100 18821.2200 17943.6300
    ## ILMN_1343290 20599.1000 21696.7000 16206.9200 18101.9800
    ## ILMN_1343291 25829.9200 24742.1800 23758.1200 24592.3600
    ## ILMN_1343292   383.6296   353.3019   303.2715   375.0452
    

    4.描述gse的属性

    #View(gse)
    as.matrix(gse)[1:4,1:4]
    ##               GSM807339  GSM807340  GSM807341  GSM807342
    ## ILMN_1343289 19525.4400 20503.6100 18821.2200 17943.6300
    ## ILMN_1343290 20599.1000 21696.7000 16206.9200 18101.9800
    ## ILMN_1343291 25829.9200 24742.1800 23758.1200 24592.3600
    ## ILMN_1343292   383.6296   353.3019   303.2715   375.0452
    class(gse)
    ## [1] "data.frame"
    

    5.将gse导出为新的txt和csv文件。

    write.table(gse,'z.txt')
    write.csv(gse,'zz.csv')
    

    6.将gse保存为Rdata并加载。

    save(gse,file = 'ex.Rdata')#file = 必须写
    rm(list = ls())
    load('ex.Rdata')
    

    练习4-1:

    1.读取complete_set.txt(已保存在工作目录)

    a <- read.table('complete_set.txt')
    #先 dim
    dim(a)
    ## [1] 51 20
    a[1:4,1:4]
    ##                   V1                 V2                V3                 V4
    ## 1              geneA              geneB             geneC              geneD
    ## 2 -0.635020187971398  -0.49728008811353 0.514896730700242  -1.01508182502931
    ## 3   0.91605661780324 -0.545381308500589  1.20238322656491  0.956212067289626
    ## 4  0.805995294157758 -0.315914513323816  0.27825197143441 -0.727119736260533
    # 读入之后 要先查看数据 是必经步骤
    # 需要header= T
    # 否则列名被改变了,可以看出列名多了V1
    a <- read.table('complete_set.txt',header = T)
    dim(a)
    ## [1] 50 20
    a[1:4,1:4]
    ##        geneA       geneB      geneC      geneD
    ## 1 -0.6350202 -0.49728009  0.5148967 -1.0150818
    ## 2  0.9160566 -0.54538131  1.2023832  0.9562121
    ## 3  0.8059953 -0.31591451  0.2782520 -0.7271197
    ## 4  0.5380081 -0.06739211 -0.6237648 -1.6250202
    #正确
    #先 dim
    

    2.查看有多少行、多少列

    dim(a)
    ## [1] 50 20
    

    3.获取行名和列名

    rownames(a)
    ##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12" "13" "14" "15"
    ## [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
    ## [31] "31" "32" "33" "34" "35" "36" "37" "38" "39" "40" "41" "42" "43" "44" "45"
    ## [46] "46" "47" "48" "49" "50"
    colnames(a)
    ##  [1] "geneA" "geneB" "geneC" "geneD" "geneE" "geneF" "geneG" "geneH" "geneI"
    ## [10] "geneJ" "geneK" "geneL" "geneM" "geneN" "geneO" "geneP" "geneQ" "geneR"
    ## [19] "geneS" "geneT"
    

    4.导出为csv格式

    write.csv(a,'a.csv')
    

    5.保存为Rdata

    save(a,file = 'a.Rdata')
    

    6.加载class.Rdata,查看数据类型

    load('class.Rdata')
    #环境变量出现了y的数据
    #在环境变量里面  可以看出都是字符型chr
    class(y[2])#从一行一列也可以看出数据类型
    ## [1] "character"
    y#从矩阵也可以看出
    ##      gene1 gene2 gene3 gene4 gene5 gene6 grouplist
    ## GSM1 "40"  "15"  "22"  "600" "25"  "123" "control"
    ## GSM2 "20"  "45"  "77"  "544" "33"  "124" "control"
    ## GSM3 "51"  "12"  "26"  "350" "30"  "55"  "control"
    ## GSM4 "46"  "11"  "20"  "390" "45"  "334" "treat"  
    ## GSM5 "38"  "12"  "24"  "260" "20"  "543" "treat"  
    ## GSM6 "49"  "10"  "25"  "220" "33"  "239" "treat"
    class(y)#
    ## [1] "matrix" "array"
    #字符和数字的数据框 转为矩阵 就会这样
    

    高阶数据读取指南https://www.jianshu.com/p/4ea320c0dcc6

    相关文章

      网友评论

        本文标题:R语言学习.5-数据读写

        本文链接:https://www.haomeiwen.com/subject/jgnkaktx.html