美文网首页
2020-12-16 第四章基本数据管理2.0

2020-12-16 第四章基本数据管理2.0

作者: L6511 | 来源:发表于2020-12-16 10:20 被阅读0次

    建立数据框并改变其中的元素

    > manager<-c(1,2,3,4,5)
    > data<-c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
    > country<-c("US","US","UK","UK","UK")
    > gender<-c("M","F","F","M","F")
    > age<-c(32,45,25,39,99)
    > q1<-c(5,3,3,3,2)
    > q2<-c(4,5,5,3,2)
    > q3<-c(5,2,5,4,1)
    > q4<-c(5,5,5,NA,2)
    > q5<-c(5,5,2,NA,1)
    > date<-c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
    > leadership<-data.frame(manager,date,country,gender,age,q1,q2,q3,q4,q5)
    > leadership
      manager     date country gender age q1 q2 q3 q4 q5
    1       1 10/24/08      US      M  32  5  4  5  5  5
    2       2 10/28/08      US      F  45  3  5  2  5  5
    3       3  10/1/08      UK      F  25  3  5  5  5  2
    4       4 10/12/08      UK      M  39  3  3  4 NA NA
    5       5   5/1/09      UK      F  99  2  2  1  2  1
    > leadership$age[leadership$age==99]<-NA
    > leadership$agecat[leadership$age>75]<-"Elder"
    > leadership$agecat[leadership$age>=55&leadership$age<=75]<"Middle Aged"
    [1] NA
    > leadership$agecat[leadership$age<55]<-"Young"
    > library(reshape)
    > leadership<-rename(leadership,
    +                    c(manager="managerID",date="testDate")
    + )
    > names(leadership)
     [1] "managerID" "testDate"  "country"   "gender"    "age"       "q1"        "q2"       
     [8] "q3"        "q4"        "q5"        "agecat"   
    > names(leadership)[2]<-"testDate"
    > leadership
      managerID testDate country gender age q1 q2 q3 q4 q5 agecat
    1         1 10/24/08      US      M  32  5  4  5  5  5  Young
    2         2 10/28/08      US      F  45  3  5  2  5  5  Young
    3         3  10/1/08      UK      F  25  3  5  5  5  2  Young
    4         4 10/12/08      UK      M  39  3  3  4 NA NA  Young
    5         5   5/1/09      UK      F  NA  2  2  1  2  1   <NA>
    > names(leadership)[6:10]<-c("item1","item2","item3","item4","item5")
    > leadership
      managerID testDate country gender age item1 item2 item3 item4 item5 agecat
    1         1 10/24/08      US      M  32     5     4     5     5     5  Young
    2         2 10/28/08      US      F  45     3     5     2     5     5  Young
    3         3  10/1/08      UK      F  25     3     5     5     5     2  Young
    4         4 10/12/08      UK      M  39     3     3     4    NA    NA  Young
    5         5   5/1/09      UK      F  NA     2     2     1     2     1   <NA>
    > 
    

    在第95页

    变量的重命名

    > names(leadership)
     [1] "managerID" "testDate"  "country"   "gender"    "age"       "q1"        "q2"       
     [8] "q3"        "q4"        "q5"        "agecat"   
    > names(leadership)[2]<-"testDate"
    > leadership
      managerID testDate country gender age q1 q2 q3 q4 q5 agecat
    1         1 10/24/08      US      M  32  5  4  5  5  5  Young
    2         2 10/28/08      US      F  45  3  5  2  5  5  Young
    3         3  10/1/08      UK      F  25  3  5  5  5  2  Young
    4         4 10/12/08      UK      M  39  3  3  4 NA NA  Young
    5         5   5/1/09      UK      F  NA  2  2  1  2  1   <NA>
    > names(leadership)[6:10]<-c("item1","item2","item3","item4","item5")
    > leadership
      managerID testDate country gender age item1 item2 item3 item4 item5 agecat
    1         1 10/24/08      US      M  32     5     4     5     5     5  Young
    2         2 10/28/08      US      F  45     3     5     2     5     5  Young
    3         3  10/1/08      UK      F  25     3     5     5     5     2  Young
    4         4 10/12/08      UK      M  39     3     3     4    NA    NA  Young
    5         5   5/1/09      UK      F  NA     2     2     1     2     1   <NA>
    > y<-c(1,2,3,NA)
    > is.na(y)
    [1] FALSE FALSE FALSE  TRUE
    > is.na(leadership[,6:10])
         item1 item2 item3 item4 item5
    [1,] FALSE FALSE FALSE FALSE FALSE
    [2,] FALSE FALSE FALSE FALSE FALSE
    [3,] FALSE FALSE FALSE FALSE FALSE
    [4,] FALSE FALSE FALSE  TRUE  TRUE
    [5,] FALSE FALSE FALSE FALSE FALSE
    > leadership$age[leadership$age==99]<-NA
    > leadership
      managerID testDate country gender age item1 item2 item3 item4 item5 agecat
    1         1 10/24/08      US      M  32     5     4     5     5     5  Young
    2         2 10/28/08      US      F  45     3     5     2     5     5  Young
    3         3  10/1/08      UK      F  25     3     5     5     5     2  Young
    4         4 10/12/08      UK      M  39     3     3     4    NA    NA  Young
    5         5   5/1/09      UK      F  NA     2     2     1     2     1   <NA>
    

    使用na.omit()删除不完整的观测

    > manager<-c(1,2,3,4,5)
    > country<-c("US","US","UK","UK","UK")
    > gender<-c("M","F","F","M","F")
    > age<-c(32,45,25,39,99)
    > q1<-c(5,3,3,3,2)
    > q2<-c(4,5,5,3,2)
    > q3<-c(5,2,5,4,1)
    > q4<-c(5,5,5,NA,2)
    > q5<-c(5,5,2,NA,1)
    > date<-c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
    > leadership<-data.frame(manager,date,country,gender,age,q1,q2,q3,q4,q5)
    > leadership
      manager     date country gender age q1 q2 q3 q4 q5
    1       1 10/24/08      US      M  32  5  4  5  5  5
    2       2 10/28/08      US      F  45  3  5  2  5  5
    3       3  10/1/08      UK      F  25  3  5  5  5  2
    4       4 10/12/08      UK      M  39  3  3  4 NA NA
    5       5   5/1/09      UK      F  99  2  2  1  2  1
    > newdata<-na.omit(leadership)
    > newdata
      manager     date country gender age q1 q2 q3 q4 q5
    1       1 10/24/08      US      M  32  5  4  5  5  5
    2       2 10/28/08      US      F  45  3  5  2  5  5
    3       3  10/1/08      UK      F  25  3  5  5  5  2
    5       5   5/1/09      UK      F  99  2  2  1  2  1
    

    关于日期的函数在第99页

    第四章主要学习了如何确定一个对象的数据类型,以及它如何转换成其他类型,存储缺失值,日期值的方式。用公式创建新变量并重编码了现有变量,如何对数据进行排序对变量进行重命名,对数据集进行了横向合并和纵向合并

    相关文章

      网友评论

          本文标题:2020-12-16 第四章基本数据管理2.0

          本文链接:https://www.haomeiwen.com/subject/vrsogktx.html