美文网首页R语言学习笔记
R语言学习笔记6-因子和表格篇

R语言学习笔记6-因子和表格篇

作者: RudyHe | 来源:发表于2015-12-04 14:43 被阅读129次
    - Factor
        - x<-c(5,12,13,12)
        - xf<-factor(x)
        - xf
            - 5 12 13 12
            - Levels: 5 12 13
        - str(xf)
            - unclass(xf)
            - 1 2 3 2
            - attr(,"levels")
        - length(xf)    # 4 still size of data
        - xff<-factor(x,levels=c(5,12,13,88))
        - xff
            - 5 12 13 12
            - Levels: 5 12 13 88
        - xff[2]<-88
        - xff
            - 5 88 13 12
            - Levels: 5 12 13 88
        - 
    
    - operations
        - ages<-c(25,26,55,37,21,42)
        - affils<-c("R","D","D","R","U","D")
        - tapply(ages,affils,mean)
            - D R U
            - 41 31 21
        - d<-data.frame(list(gender=c("M","M","F","M","F","F"),ages=c(47,59,21,32,33,24))
        - d$over25<-ifelse(d$age>25,1,0)
        - tapply(d$income,list(d$gender,d$over25),mean)
            - F 39050 123000.00    # both has column for over25 and below25, because of 0 value in d$over25
            - M NA 73166.67    # Male has no ages below 25
        - split    # tapply using split first, then apply function to every element
        - split(d$income,list(d$gender,d$over25))
            - F.0    32450 45650
            - M.0    numeric(0)
            - F.1    123000
            - M.1    55000 88000 76500
        - split(1:7,c("M","F","F","I","M","M","F")
            - F    2 3 7
            - I    4
            - M    1 5 6
        - aba<-read.csv("xxx.data",header=TRUE)
        - by(aba,aba$Gender,function(m) lm(m[,2]~m[,3]))    # for object not only vector diff from tapply
    
    - table
        - u<-c(22,8,33,6,8,29,-2)
        - fl<-list(c(5,12,13,12,13,5,13),c("a","bc","a","a","bc","a","a"))
        - tapply(u,fl,length)
            -   a bc
            - 5 2 NA    # 5 match 2 twice
            - 12 1 1
            - 13 2 1
        - table(fl)
            -   f1.2
            - fl.1 a bc
            - 5 2 0
            - 12 1 1
            - 13 2 1
        - ct<-read.table("ct.dat",header=T)
        - cttab<-table(ct)
            -   No Yes
            - No 2 0
            - Not Sure 0 1
            - Yes 1 1
        - table(c(5,12,13,12,8,5))
            - 5 8 12 13
            - 2 1 2 1
        - class(cttab)    # "table"
        - apply(cttab,1,sum)
            - No Not Sure Yes    # margin value is the sum of other variables
            - 2 1 2
        - d<-c(5,12,13,4,3,28,12,12,9,5,5,13,5,4,12)
        - dtab<-table(d)
        - tabdom(dtab,3)
            - d Freq
            - 3 5 4
            - 5 12 4
            - 2 4 2
        - aggregate(aba[,-1],list(aba$Gender),median)
        - binmarks<-seq(from=0.0,to=1.0,by=0.1)
        - z<-c(0.88,0.28,0.59,0.43,0.47,0.24,0.05,0.88)
        - cut(z,binmarks,labels=F)
            - 9 3 6 5 5 3 1 9    # 0.88>0.8, the 9th segment, and so on
    

    相关文章

      网友评论

        本文标题:R语言学习笔记6-因子和表格篇

        本文链接:https://www.haomeiwen.com/subject/tdpshttx.html