美文网首页
R语言初级练习题

R语言初级练习题

作者: 养猪场小老板 | 来源:发表于2020-01-18 11:00 被阅读0次

    1、打开 Rstudio 告诉我它的工作目录。

    > getwd()
    [1] "D:/360MoveData/Users/xiaoxiaoyi/Desktop/111"
    

    2、新建6个向量,基于不同的数据类型。(重点是字符串,数值,逻辑值)

    数据类型是指:数值、字符、逻辑、因子
    数据结构是指:向量,矩阵,数组,数据框,列表


    • 数据类型

    > a <- c(1,2,3,4,5)#数值型
    > a
    [1] 1 2 3 4 5
    > class(a)
    [1] "numeric"
    > b <- c("q","w","e","r");b#字符串
    [1] "q" "w" "e" "r"
    > class(b)
    [1] "character"
    > c <- a>1#逻辑值
    > c
    [1] FALSE  TRUE  TRUE  TRUE  TRUE
    > class(c)
    [1] "logical"
    > d <- c("shuiguo","shucai")#因子
    > e <- factor(d)
    > e
    [1] shuiguo shucai 
    Levels: shucai shuiguo
    > class(e)
    [1] "factor"
    
    • 数据结构

    > x3 <- c(4,5,6,7)
    > dim(x3) <- c(2,2)
    > x3
         [,1] [,2]
    [1,]    4    6
    [2,]    5    7
    > class(x3)
    [1] "matrix"
    > array1 <- array(1:24,c(2,3,4))
    > class(array1)
    [1] "array"
    > x4 <- data.frame(x3)
    > x4
      X1 X2
    1  4  6
    2  5  7
    > class(x4)
    [1] "data.frame"
    > e <- c("a","b")
    > x4$e <- e
    > x4
      X1 X2 e
    1  4  6 a
    2  5  7 b
    > f <- c("dsd","dzsd","sddff","dfagrasrg")
    > x5$f <- f
    > x5
    [[1]]
      X1 X2 e
    1  4  6 a
    2  5  7 b
    $f
    [1] "dsd"       "dzsd"      "sddff"     "dfagrasrg"
    > class(x5)
    [1] "list"
    

    3、在你新建的数据框进行切片操作,比如首先取第1,3行, 然后取第4,6列

    > a1 <- c(1,2,3,4,5,6,7,8,9)
    > 
    > dim(a1) <- c(3,3)
    > a2 <- as.data.frame(a1)
    > a2
      V1 V2 V3
    1  1  4  7
    2  2  5  8
    3  3  6  9
    > class(a2)
    [1] "data.frame"
    > a2[1,]
      V1 V2 V3
    1  1  4  7
    > a2[3,]
      V1 V2 V3
    3  3  6  9
    

    4、使用data函数来加载R内置数据集 rivers 描述它

    > data(rivers)
    > rivers
      [1]  735  320  325  392  524  450 1459  135  465  600  330  336
     [13]  280  315  870  906  202  329  290 1000  600  505 1450  840
     [25] 1243  890  350  407  286  280  525  720  390  250  327  230
     [37]  265  850  210  630  260  230  360  730  600  306  390  420
     [49]  291  710  340  217  281  352  259  250  470  680  570  350
     [61]  300  560  900  625  332 2348 1171 3710 2315 2533  780  280
     [73]  410  460  260  255  431  350  760  618  338  981 1306  500
     [85]  696  605  250  411 1054  735  233  435  490  310  460  383
     [97]  375 1270  545  445 1885  380  300  380  377  425  276  210
    [109]  800  420  350  360  538 1100 1205  314  237  610  360  540
    [121] 1038  424  310  300  444  301  268  620  215  652  900  525
    [133]  246  360  529  500  720  270  430  671 1770
    > unique(rivers)#去重
      [1]  735  320  325  392  524  450 1459  135  465  600  330  336
     [13]  280  315  870  906  202  329  290 1000  505 1450  840 1243
     [25]  890  350  407  286  525  720  390  250  327  230  265  850
     [37]  210  630  260  360  730  306  420  291  710  340  217  281
     [49]  352  259  470  680  570  300  560  900  625  332 2348 1171
     [61] 3710 2315 2533  780  410  460  255  431  760  618  338  981
     [73] 1306  500  696  605  411 1054  233  435  490  310  383  375
     [85] 1270  545  445 1885  380  377  425  276  800  538 1100 1205
     [97]  314  237  610  540 1038  424  444  301  268  620  215  652
    [109]  246  529  270  430  671 1770
    > length(rivers)#长度
    [1] 141
    > length(unique(rivers))
    [1] 114
    > table(rivers)#统计
    rivers
     135  202  210  215  217  230  233  237  246  250  255  259  260 
       1    1    2    1    1    2    1    1    1    3    1    1    2 
     265  268  270  276  280  281  286  290  291  300  301  306  310 
       1    1    1    1    3    1    1    1    1    3    1    1    2 
     314  315  320  325  327  329  330  332  336  338  340  350  352 
       1    1    1    1    1    1    1    1    1    1    1    4    1 
     360  375  377  380  383  390  392  407  410  411  420  424  425 
       4    1    1    2    1    2    1    1    1    1    2    1    1 
     430  431  435  444  445  450  460  465  470  490  500  505  524 
       1    1    1    1    1    1    2    1    1    1    2    1    1 
     525  529  538  540  545  560  570  600  605  610  618  620  625 
       2    1    1    1    1    1    1    3    1    1    1    1    1 
     630  652  671  680  696  710  720  730  735  760  780  800  840 
       1    1    1    1    1    1    2    1    2    1    1    1    1 
     850  870  890  900  906  981 1000 1038 1054 1100 1171 1205 1243 
       1    1    1    2    1    1    1    1    1    1    1    1    1 
    1270 1306 1450 1459 1770 1885 2315 2348 2533 3710 
       1    1    1    1    1    1    1    1    1    1 
    > sort(rivers)#排序
      [1]  135  202  210  210  215  217  230  230  233  237  246  250
     [13]  250  250  255  259  260  260  265  268  270  276  280  280
     [25]  280  281  286  290  291  300  300  300  301  306  310  310
     [37]  314  315  320  325  327  329  330  332  336  338  340  350
     [49]  350  350  350  352  360  360  360  360  375  377  380  380
     [61]  383  390  390  392  407  410  411  420  420  424  425  430
     [73]  431  435  444  445  450  460  460  465  470  490  500  500
     [85]  505  524  525  525  529  538  540  545  560  570  600  600
     [97]  600  605  610  618  620  625  630  652  671  680  696  710
    [109]  720  720  730  735  735  760  780  800  840  850  870  890
    [121]  900  900  906  981 1000 1038 1054 1100 1171 1205 1243 1270
    [133] 1306 1450 1459 1770 1885 2315 2348 2533 3710
    > median(rivers)#中位数
    [1] 425
    > range(rivers)#显示最大值及最小值
    [1]  135 3710
    > which.min(rivers)#最小值下标
    [1] 8
    

    1.下载步骤:打开链接下载--Send results to Run selector--RunInfo Table
    图示:
    第一步:

    1、GEO.png
    第二步:新版
    2、下载.png
    因为下载改版,操作如下
    image.png
    image.png

    第三步:


    3、下载目录.png

    第四步:


    4、具体内容.png

    2.读取文件

    > sra <- read.table (SraRUNTable.txt)#注意设置工作目录
    Error in read.table(SraRUNTable.txt) : object 'SraRUNTable.txt' not found
    > sra <- read.table (file="SraRunTable.txt")#
    Error in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,  : 
      line 1 did not have 44 elements##指定文件分隔符为 \t
    > df1 <- read.table(file = "SraRunTable.txt",header = T,sep = '\t')#注意读取列名
    > View(df1)
    > dim(df1)#查看行数列数
    [1] 768  31
    > nrow(df1)#查看行数
    [1] 768
    > ncol(df1)#查看列数 colnames(df1)#查看列名
    [1] 31
    > colnames(df1)#查看列名
     [1] "BioSample"          "Experiment"         "MBases"            
     [4] "MBytes"             "Run"                "SRA_Sample"        
     [7] "Sample_Name"        "Assay_Type"         "AssemblyName"      
    [10] "AvgSpotLen"         "BioProject"         "Center_Name"       
    [13] "Consent"            "DATASTORE_filetype" "DATASTORE_provider"
    [16] "InsertSize"         "Instrument"         "LibraryLayout"     
    [19] "LibrarySelection"   "LibrarySource"      "LoadDate"          
    [22] "Organism"           "Platform"           "ReleaseDate"       
    [25] "SRA_Study"          "age"                "cell_type"         
    [28] "marker_genes"       "source_name"        "strain"            
    [31] "tissue
    >install.packages("magrittr") 
    >library(magrittr) #加载包,以便使用%>%
    > for (i in colnames(df1)) paste(i,class(df1[,i])) %>% print()
    [1] "BioSample character"
    [1] "Experiment character"
    [1] "MBases integer"
    [1] "MBytes integer"
    [1] "Run character"
    [1] "SRA_Sample character"
    [1] "Sample_Name character"
    [1] "Assay_Type character"
    [1] "AssemblyName character"
    [1] "AvgSpotLen integer"
    [1] "BioProject character"
    [1] "Center_Name character"
    [1] "Consent character"
    [1] "DATASTORE_filetype character"
    [1] "DATASTORE_provider character"
    [1] "InsertSize integer"
    [1] "Instrument character"
    [1] "LibraryLayout character"
    [1] "LibrarySelection character"
    [1] "LibrarySource character"
    [1] "LoadDate character"
    [1] "Organism character"
    [1] "Platform character"
    [1] "ReleaseDate character"
    [1] "SRA_Study character"
    [1] "age character"
    [1] "cell_type character"
    [1] "marker_genes character"
    [1] "source_name character"
    [1] "strain character"
    [1] "tissue character"
    

    1.下载GEO样本信息 点此获取下载步骤
    GEO官网:https://www.ncbi.nlm.nih.gov/geo/ ---- 点击samples----- search 输入 GSE111229 ---- export
    2.读取到R中

    > df2<- read.table("sample.csv",header = T)
    > View(df2)
    > dim(df2)
    [1] 20  6
    > library(magrittr)
    > for (i in colnames(df2)) paste(i,class(df2[,i])) %>% print()
    [1] "Accession.Title.Sample character"
    [1] "Type.Taxonomy.Channels.Platform.Series.Supplementary character"
    [1] "Types.Supplementary character"
    [1] "Links.SRA character"
    [1] "Accession.Contact.Release character"
    [1] "Date character"
    

    读出之后发现与其他同学的不同,错误原因:此文件为csv文件,以,为分隔符,若想用read.table ,需要:

    > df2<- read.table("sample.csv",sep = ",")
    > df3<- read.csv("sample.csv")
    > dim(df2)
    [1] 20 12
    

    发现行数还是错误,查看下载来的初始文件本是20行,因为我下载时只下载了当前页,重新下载,选择 All search results,

    > df2=read.csv(file="sample.csv")
    > View(df2)
    > dim(df2)
    [1] 768  12
    > library(magrittr)
    Warning message:
    程辑包‘magrittr’是用R版本3.5.3 来建造的 
    > for (i in colnames(df2)) paste(i,class(df2[,i])) %>% print()
    [1] "Accession factor"
    [1] "Title factor"
    [1] "Sample.Type factor"
    [1] "Taxonomy factor"
    [1] "Channels integer"
    [1] "Platform factor"
    [1] "Series factor"
    [1] "Supplementary.Types factor"
    [1] "Supplementary.Links factor"
    [1] "SRA.Accession factor"
    [1] "Contact factor"
    

    把前面两个步骤的两个表(RunInfo Table 文件,样本信息sample.csv)关联起来,使用merge函数。
    总体思路:找出相同内容合并

    rm(list = ls())
    options(stringsAsFactors = F)
    df1 <- read.table(file = "SraRunTable.txt",header = T,sep = '\t')
    df2 <- read.csv(file = "sample.csv")
    for (i in colnames(df1)) {if (i %in% colnames(df2)) print(i)}#查看相同列名
    df1[1,"Platform"]  
    df2[1,"Platform"]#查看两个数据框相同列名的行名
    

    后面大神的内容就看不懂了,参考https://www.jianshu.com/p/c07e67e2c757

    相关文章

      网友评论

          本文标题:R语言初级练习题

          本文链接:https://www.haomeiwen.com/subject/ewkkzctx.html