- Factor
- x<-c(5,12,13,12)
- xf<-factor(x)
- xf
- 5 12 13 12
- Levels: 5 12 13
- str(xf)
- unclass(xf)
- 1 2 3 2
- attr(,"levels")
- length(xf) # 4 still size of data
- xff<-factor(x,levels=c(5,12,13,88))
- xff
- 5 12 13 12
- Levels: 5 12 13 88
- xff[2]<-88
- xff
- 5 88 13 12
- Levels: 5 12 13 88
- operations
- ages<-c(25,26,55,37,21,42)
- affils<-c("R","D","D","R","U","D")
- tapply(ages,affils,mean)
- D R U
- 41 31 21
- d<-data.frame(list(gender=c("M","M","F","M","F","F"),ages=c(47,59,21,32,33,24))
- d$over25<-ifelse(d$age>25,1,0)
- tapply(d$income,list(d$gender,d$over25),mean)
- F 39050 123000.00 # both has column for over25 and below25, because of 0 value in d$over25
- M NA 73166.67 # Male has no ages below 25
- split # tapply using split first, then apply function to every element
- split(d$income,list(d$gender,d$over25))
- F.0 32450 45650
- M.0 numeric(0)
- F.1 123000
- M.1 55000 88000 76500
- split(1:7,c("M","F","F","I","M","M","F")
- F 2 3 7
- I 4
- M 1 5 6
- aba<-read.csv("xxx.data",header=TRUE)
- by(aba,aba$Gender,function(m) lm(m[,2]~m[,3])) # for object not only vector diff from tapply
- table
- u<-c(22,8,33,6,8,29,-2)
- fl<-list(c(5,12,13,12,13,5,13),c("a","bc","a","a","bc","a","a"))
- tapply(u,fl,length)
- a bc
- 5 2 NA # 5 match 2 twice
- 12 1 1
- 13 2 1
- table(fl)
- f1.2
- fl.1 a bc
- 5 2 0
- 12 1 1
- 13 2 1
- ct<-read.table("ct.dat",header=T)
- cttab<-table(ct)
- No Yes
- No 2 0
- Not Sure 0 1
- Yes 1 1
- table(c(5,12,13,12,8,5))
- 5 8 12 13
- 2 1 2 1
- class(cttab) # "table"
- apply(cttab,1,sum)
- No Not Sure Yes # margin value is the sum of other variables
- 2 1 2
- d<-c(5,12,13,4,3,28,12,12,9,5,5,13,5,4,12)
- dtab<-table(d)
- tabdom(dtab,3)
- d Freq
- 3 5 4
- 5 12 4
- 2 4 2
- aggregate(aba[,-1],list(aba$Gender),median)
- binmarks<-seq(from=0.0,to=1.0,by=0.1)
- z<-c(0.88,0.28,0.59,0.43,0.47,0.24,0.05,0.88)
- cut(z,binmarks,labels=F)
- 9 3 6 5 5 3 1 9 # 0.88>0.8, the 9th segment, and so on