###示例数据
manager <- c(1,2,3,4,5)
date <- c("10/24/08","10/28/08","10/1/08","10/12/08","5/1/09")
gender <- c("M","F","F","M","F")
age <- c(32,45,25,39,99)
q1 <- c(5,3,3,3,2)
q2 <- c(4,5,5,3,2)
q3 <- c(5,2,5,4,1)
q4 <- c(5,5,5,NA,2)
q5 <- c(5,5,2,NA,1)
leadership <- data.frame(manager,date,
gender,
age,
q1,q2,q3,q4,q5,
stringsAsFactors=FALSE)
head(leadership)
示例数据结构
# Listing 4.2 - Creating new variables
#在原始数据库增加一列数据
mydata<-data.frame(x1 = c(2, 2, 6, 4),
x2 = c(3, 4, 2, 8))
mydata$sumx <- mydata$x1 + mydata$x2
mydata$meanx <- (mydata$x1 + mydata$x2)/2
#attach方法增加变量
attach(mydata)
mydata$sumx <- x1 + x2
mydata$meanx <- (x1 + x2)/2
detach(mydata)
###利用transform函数增加变量
mydata <- transform(mydata,
sumx = x1 + x2,
meanx = (x1 + x2)/2)
# Recoding variables
leadership$age[leadership$age==99] <- NA#将age列为99的设定为缺失值
leadership$agecat[leadership$age > 75] <- "Elder"
leadership$agecat[leadership$age >= 55 &
leadership$age <= 75] <- "Middle Aged"
leadership$agecat[leadership$age < 55] <- "Young"
注意编码前需要设定缺失值,注意筛选方法
#以下为另一种简便的方法
leadership <- within(leadership,{
agecat <- NA#定义一列均为NA的列,以下依次定义不同的分组标准
agecat[age > 75] <- "Elder"
agecat[age >= 55 & age <= 75] <- "Middle Aged"
agecat[age < 55] <- "Young" })
###利用names函数重命名
names(leadership)[2] <- "newdata"
colnames(leadership)
names函数修改命名
# Renaming variables with the plyr package
names(leadership)
names(leadership)[2] <- "testDate"
leadership
library(plyr)
leadership <- rename(leadership,
c(manager="managerID", date="testDate"))
colnames(leadership)
利用rename函数重新命名列变量
网友评论