经过近三个月的学习,匆忙将《R in action》过了一遍,自己实现了其中的绝大部分代码。当然这个过程是机械重复而非创造性的工作。不过编程初期就是这样把,不断重复别人的代码,领会编程的奥妙。
其实初期的学习因为没有问题导向,看的比较浅薄,以至于在后来统计课程的需要,自己亲手做了一个小的项目才意识到学习方法需要改进、统计知识需要补充。好在通过整个过程,各个方面都在提高。
之前一直在按照章节更新学习的笔记。其实最有帮助的只是代码的注释。因为后边的学习没有详尽的笔记,只保存了部分代码及注释,分享在此希望能对后学者有所帮助。另,如有任何问题欢迎通过各种方式联系我,大家一起讨论。
接下来将投入到R在交通方面的具体问题学习中。具体的近期任务是mlogit及交通相关的几个packages的学习。
#13 泊松回归
#载入程序包
library(rrcov)
library(robust)
#查看数据
data(breslow.dat, package="robust")
names(breslow.dat)
summary(breslow.dat[c(6, 7, 8, 10)]) #查看所研究变量的统计信息
#基础信息图表
opar <- par(no.readonly=TRUE) #创建新的绘图参数
par(mfrow=c(1,2)) #一行两列放置图形
attach(breslow.dat)
hist(sumY, breaks=20, #绘制sumY直方图,分组参考20
xlab="Geizure Count",
main="Distribution of Seizures")
boxplot(sumY ~ Trt, xlab="Treamtment", #按照Trt分组,对sumY绘制箱线图
main="Group Comparisons")
par(opar)
#进行泊松回归
fit <- glm(sumY ~ Base + Age + Trt, data=breslow.dat, family=poisson())
summary(fit) #显示模型主要结果
#解释模型参数
coef(fit) #显示模型系数(此为对数系数)
exp(coef(fit)) #指数化系数
#过度离势检验
library(qcc)
qcc.overdispersion.test(breslow.dat$sumY, type="poisson") #是否存在过度离势检验
fit.od <- glm(sumY ~ Base + Age + Trt, data=breslow.dat,
family=quasipoisson()) #类泊松方法拟合
summary(fit.od)
#时间段变化的泊松回归
fittime <- glm(sumY ~ Base + Age + Trt, data=breslow.dat,
offset= log(time), family=poisson())
opar <- par(no.readonly=TRUE) #
#14主成份和因子分析
#主成分分析-主成分个数判断-三种特征值判别准则
library(psych)
fa.parallel(USJudgeRatings[,-1], #所有观测(列),删去第一列
fa="pc", n.iter=100, #100个随机数据矩阵对比
#原书代码不能执行,修改fa="pc"
show.legend=FALSE,
main="Scree plot with parallel analysis")
abline(h=1, lwd=1, col="green") #手动添加直线
#代码清单14-1 主成分分析
library(psych)
pc <- principal(USJudgeRatings[,-1], nfactors=1)
pc
#case2
library(psych)
fa.parallel(Harman23.cor$cov, n.obs=302, #n.obs观测数
fa="pc", n.iter=100,
show.legend=FALSE, main="Scree plot with parallel analysis")
#代码清单14-2 身高测量指标的主成分分析
library(psych)
pc <- principal(Harman23.cor$cov,
nfactors=2, rotate="none")
pc
#代码清单14-3 方差极大旋转的主成分分析
rc <- principal(Harman23.cor$cov, nfactors=2, rotate="varimax")
rc
#代码清单14-4 从原始数据中获取成分得分
library(psych)
pc <- principal(USJudgeRatings[,-1], nfactors=1, score=TRUE)
head(pc$scores) #查看各个观测对象在主成分上的得分,head
cor(USJudgeRatings$CONT, pc$score) #变量CONT与评分间的相关系数
#代码清单14-5 获取主成分得分的系数
library(psych)
rc <- principal(Harman23.cor$cov, nfactors=2, rotate="varimax")
round(unclass(rc$weights),2)
#14.3 探索性因子分析
options(digits=2) #显示小数位数
covariances <- ability.cov$cov #ability.cov协方差矩阵数据转换为相关系数矩阵
correlations <- cov2cor(covariances) #cov2cor()转化相关系数矩阵
##判断需提取的因子数
fa.parallel(correlations, n.obs=112, #观测数112
fa="both", n.iter=100, #fa="both"同时展示PCA和EFA分析结果
main="Scree plots with parallel analysis")
#提取公共因子
##代码清单14-5 为旋转的主轴迭代因子法
fa <- fa(correlations, nfactors=2, rotate="none", fm="pa")
fa
#因子旋转
##代码清单14-7 正交旋转提取因子
fa.varimax <- fa(correlations, nfacotrs=2, rotate="varimax", fm="pa")
fa.varimax
#斜交旋转提取因子
fa.promax <- fa(correlations, nfactors=2, rotate="promax", fm="pa")
fa.promax
#因子结构矩阵(因子载荷阵)求解函数:F = P * Phi
fsm <- function(oblique) {
if (class(oblique)[2] == "fa" & is.null(oblique$Phi)) {
waning("Object doesn't look oblique EFA")
} else {
P <- unclass(oblique$loading)
F <- P %*% oblique$Phi
colnames(F) <- c("PA1", "PA2")
return(F)
}
}
#使用函数
fsm(fa.promax)
#绘图结果可视化
factor.plot(fa.promax, labels=rownames(fa.promax$loadings)) #轴为载荷大小
fa.diagram(fa.promax, simple=FALSE)
#第15章 处理缺失数据的高级方法
install.packages(c("VIM", "mice"))
library(VIM)
library(mice)
data(sleep, package="VIM") #加载数据集
sleep[complete.cases(sleep),] #列出没有缺失值的行
sleep[!complete.cases(sleep),] #列出含有缺失值的行
sum(is.na(sleep$Dream)) #Dream数据有多少个缺失值
mean(is.na(sleep$Dream)) #缺失占总体比例
mean(!complete.cases(sleep)) #含有确缺失数据的观测的比例
library(mice)
data(sleep, package="VIM")
md.pattern(sleep)
library(VIM)
aggr(sleep, prop=FALSE, numbers=TRUE)
matrixplot(sleep)
#用相关性探索缺失值
x <- as.data.frame(abs(is.na(sleep))) #计算缺失数据
head(sleep, n=5) #查看原始数据
head(x, n=5) #查看缺失数据(1为缺失)
y <- x[which(sd(x) > 0)] #提取*含*缺失值的变量
cor(y) #指示变量间相关系数
#实例分析(行删除)
complete.cases(sleep) #识别缺失值行
newdata <- sleep[complete.cases(sleep), ]
na.omit(sleep)
options(digits=1) #显示小数点后1位
cor(na.omit(sleep))
#多重插补
library(mice)
data(sleep, package="VIM")
imp <- mice(sleep, seed=1234)
fit <- with(imp, lm(Dream ~ Span + Gest))
pooled <- pool(fit)
summary(pooled)
#16 高级图形进阶
library(lattice)
singer
histogram(~height | voice.part, data=singer,
main="Distribution of Heights by Voice Pitch",
xlab="Height (inches)")
#16-1 lattice绘图示例
library(lattice)
attach(mtcars)
mtcars
gear <- factor(gear, levels=c(3, 4, 5),
labels=c("3 gears", "4 gears", "5 gears")) #分组标签
cyl <- factor(cyl,levels=c(4, 6, 8),
labels=c("4 cylinders", "6 cylinders", "8 cylinders"))
densityplot(~mpg, main="Density Plot",
xlab="Miles per Gallon") #核密度图
densityplot(~mpg | cyl,
main="Density Plot by Number of Cylinders",
xlab="Miles per Gallon") #条件变量核密度图
bwplot(cyl ~ mpg | gear,
main="Box Plots by Cylinders and Gears",
xlab="Miles per Gallon", ylab="Cylinders") #箱线图
xyplot(mpg ~ wt | cyl * gear,
main="Scatter Plots by Cylinders and Gears",
xlab="Car Weight", ylab="Miles per Gallon") #散点图
cloud(mpg ~ wt * qsec | cyl,
main="3D Scatter Plots by Cylinders") #三维散点图
dotplot(cyl ~ mpg | gear,
main="Dot Plots by Number of Gears and Cylinders",
xlab="Miles Per Gallon") #点图
splom(mtcars[c(1, 3, 4, 5, 6)],
main="Scatter Plot Matrix for mtcars Data") #散点图矩阵
detach(mtcars)
library(lattice)
mygraph <- densityplot(~height | voice.part, data=singer) #存储图形至mygraph
#条件变量 因子-离散变量
displacement <- equal.count(mtcars$disp, number=3, overlap=0)
xyplot(mpg ~ wt | displacement, data=mtcars,
main="Miles per Gallon vs. Weight by Engine Displacement",
xlab="Weight", ylab="Miles per Gallon",
layout=c(3, 1), aspect=1.5) #aspect宽高比
#自定义面板函数xyplot
displacement <- equal.count(mtcars$disp, number=3, overlap=0)
#equal.count连续变量离散化
mypanel <- function(x, y) {
panel.xyplot(x, y, pch=19) #pch=19填充圆圈
panel.rug(x, y) #轴须线
panel.grid(h=-1, v=-1) #水平和竖直网格线,负值强制对齐轴标签
panel.lmline(x, y, col="red", lwd=1, lty=2)
} #自定义面板函数
xyplot(mpg~wt | displacement, data=mtcars,
layout=c(3, 1),
aspect=1.5, #锁定宽高比
main="Miles per Gallon vs. Weight by Engine Displacement",
xlab="Weight",
ylab="Miles per Gallon",
panel=mypanel)
#自定义面板函数和额外选系选项的plot
library(lattice)
mtcars$transmission <- factor(mtcars$am, levels=c(0, 1),
labels=c("Automatic", "Manual"))
panel.smoother <- function(x, y) {
panel.grid(h=-1, v=-1) #强制对齐轴标签的网格线
panel.xyplot(x, y) #散点图
panel.loess(x, y) #绘制非参数拟合曲线
panel.abline(h=mean(y), lwd=2, lty=2, col="green")#添加均直线
}
xyplot(mpg~disp|transmission, data=mtcars,
scales=list(cex=.8, col="red"),
panel=panel.smoother,
xlab="Displacement", ylab="Miles per Gallon",
main="MPG vs Displacement by Transmission Type",
sub="Dotted lines aer Group Means", aspect=1) #sub下标题
#叠加显示分组变量图形
library(lattice)
mtcars$transmission <- factor(mtcars$am, levels=c(0, 1),
labels=c("Automatic", "Manual"))
densityplot(~mpg, data=mtcars,
group=transmission,
main="MPG Distribution by Transmission Type",
xlab="Miles per Gallon",
auto.key=TRUE)#auto.key手动添加图例
auto.key=list(space="right", columns=1, title="Transmission")
#16-4
library(lattice)
mtcars$transmission <- factor(mtcars$am, levels=c(0, 1),
labels=c("Automatic", "Manual"))
colors = c("red", "blue")
lines = c(1, 2)
points = c(16, 17)
key.trans <- list(title="Transmission",
space="bottom", columns=2,
#图例摆放在下方,两栏
text=list(levels(mtcars$transmission)),
#水平名称
points=list(pch=points, col=colors),
lines=list(col=colors, lty=lines),
cex.title=1, cex=.9)
densityplot(~mpg, data=mtcars,
group=transmission,
main="MPG Distribution by Transmission Type",
xlab="Miles per Gallon",
pch=points, lty=lines, col=colors,
lwd=2, jitter=.005,
key=key.trans)
#16-5 包含分组变量和条件变量以及自定义图例的xyplot
library(lattice)
colors <- "darkgreen"
symbols <- c(1:12)
linetype <- c(1:3)
CO2
key.species <- list(title="Plant",
space="right",#标签在右
text=list(levels(CO2$Plant)),
points=list(pch=symbols, col=colors))
xyplot(uptake~conc|Type*Treatment, data=CO2,
group=Plant,
type="o",
pch=symbols, col=colors, lty=linetype,
main="Carbon Dioxide Uptake\nin Grass Plants",
ylab=expression(paste("Uptake",
bgroup("(", italic(frac("umol", "m"^2)), ")"))),
xlab=expression(paset("Concentration",
bgroup("(", italic(frac(mL,L)), ")"))),
sub="Grass Species:Echinochloa crus-galli",
key=key.species)
#图形参数
show.settings()
mysettings <- trellis.par.get()
mysettings$superpose.symbol
mysettings$superpose.symbol$pch <- c(1:10)
trellis.par.set(mysettings)
show.settings()
#页面摆放
#splite
library(lattice)
graph1 <- histogram(~height|voice.part, data=singer,
main="Heights of Choral Singers by Voice Part")
graph2 <- densityplot(~height, data=singer, group=voice.part,
plot.points=FALSE, auto.key=list(columns=4))
plot(graph1, split=c(1, 1, 1, 2))
plot(graph2, split=c(1, 2, 1, 2), newpage=FALSE)
#position
library(lattice)
graph1 <- histogram(~height|voice.part, data=singer,
main="Heights of Choral Singers by Voice Part")
graph2 <- densityplot(~height, data=singer, group=voice.part,
plot.point=FALSE, auto.key=list(columns=4))
plot(graph1, position=c(0, 3, 1, 1))
plot(graph2, position=c(0, 0, 1, 3), newpage=FALSE)
#ggplot2
library(ggplot2)
mtcars$cylinder <- as.factor(mtcars$cyl)
qplot(cylinder, mpg, data=mtcars, geom=c("boxplot", "jitter"),
fill=cylinder,
main="Box plots with superimposed data points",
xlab="Number of Cylinders",
ylab="Miles per Gallon")
#case2
library(ggplot2)
transmission <- factor(mtcars$am, levels=c(0, 1),
labels=c("Automatic", "Manual"))
qplot(wt, mpg, data=mtcars,
color=transmission, shape=transmission,
geom=c("point", "smooth"),
method="lm", formula=y~x,
xlab="Weight", ylab="Miles Per Gallon",
main="Regression Example")
#case3
library(ggplot2)
mtcars$cyl <- factor(mtcars$cyl, levels=c(4, 6, 8),
labels=c("4 cylinders", "6 cylinders", "8 cylinders"))
mtcars$am <- factor(mtcars$am, levels=c(0, 1),
labels=c("Automatic", "Manual"))
qplot(wt, mpg, data=mtcars, facets=am ~ cyl, size=hp)
#case4
library(ggplot2)
data(singer, package="lattice")
qplot(height, data=singer, geom=c("density"),
facets=voice.part ~ . , fill=voice.part)
#交互式图形
install.packages(c("playwith", "latticist", "iplot", "rggobi"))
#identify()
plot(mtcars$wt, mtcars$mpg)
identify(mtcars$wt, mtcars$mpg, labels=row.names(mtcars))
#playwith
library(playwith)
library(lattice)
playwith(
xyplot(mpg~wt|factor(cyl)*factor(am),
data=mtcars, subscripts=TRUE,
type=c("r", "p"))
)
#latticist
install.packages("latticist")
library(latticist)
mtcars$cyl <- factor(mtcars$cyl)
mtcars$gear <- factor(mtcars$gear)
latticist(mtcars, use.playwith=TRUE)
#iplots
library(iplots)
attach(mtcars)
cylinders <- factor(cyl)
gears <- factor(gear)
transmission <- factor(am)
ihist(mpg)
ibar(gears)
iplot(mpg, wt)
ibox(mtcars[c("mpg", "wt", "qsec", "disp", "hp")])
ipcp(mtcars[c("mpg", "wt", "qsec", "disp", "hp")])
imosaic(transmission, cylinders)
detach(mtcars)
##########
alpha
cov
ability.cov
数据集:Harman74.cor
install.packages("qcc")
install.packages(c("playwith", "latticist", "iplot", "rggobi"))
其实一个详尽的笔记应该是更好的学习分享方式,之后会注意,尽量不再出现现在这样的烂尾工程。
——2017.3.29
——实验中心510
网友评论