R语言实战_终篇

作者: 00e6a8fd618f | 来源:发表于2017-03-29 16:47 被阅读258次

    经过近三个月的学习,匆忙将《R in action》过了一遍,自己实现了其中的绝大部分代码。当然这个过程是机械重复而非创造性的工作。不过编程初期就是这样把,不断重复别人的代码,领会编程的奥妙。

    其实初期的学习因为没有问题导向,看的比较浅薄,以至于在后来统计课程的需要,自己亲手做了一个小的项目才意识到学习方法需要改进、统计知识需要补充。好在通过整个过程,各个方面都在提高。

    之前一直在按照章节更新学习的笔记。其实最有帮助的只是代码的注释。因为后边的学习没有详尽的笔记,只保存了部分代码及注释,分享在此希望能对后学者有所帮助。另,如有任何问题欢迎通过各种方式联系我,大家一起讨论。

    接下来将投入到R在交通方面的具体问题学习中。具体的近期任务是mlogit及交通相关的几个packages的学习。


    #13 泊松回归
    
    #载入程序包
    library(rrcov)
    library(robust)
    
    #查看数据
    data(breslow.dat, package="robust")
    names(breslow.dat)
    summary(breslow.dat[c(6, 7, 8, 10)]) #查看所研究变量的统计信息
    #基础信息图表
    opar <- par(no.readonly=TRUE) #创建新的绘图参数
    par(mfrow=c(1,2)) #一行两列放置图形
    attach(breslow.dat)
    hist(sumY, breaks=20, #绘制sumY直方图,分组参考20
         xlab="Geizure Count",
         main="Distribution of Seizures")
    boxplot(sumY ~ Trt, xlab="Treamtment",  #按照Trt分组,对sumY绘制箱线图
            main="Group Comparisons")
    par(opar)
    #进行泊松回归
    fit <- glm(sumY ~ Base + Age + Trt, data=breslow.dat, family=poisson())
    summary(fit) #显示模型主要结果
    #解释模型参数
    coef(fit) #显示模型系数(此为对数系数)
    exp(coef(fit)) #指数化系数
    #过度离势检验
    library(qcc)
    qcc.overdispersion.test(breslow.dat$sumY, type="poisson") #是否存在过度离势检验
    fit.od <- glm(sumY ~ Base + Age + Trt, data=breslow.dat,
                  family=quasipoisson()) #类泊松方法拟合
    summary(fit.od)
    #时间段变化的泊松回归
    fittime <- glm(sumY ~ Base + Age + Trt, data=breslow.dat,
                   offset= log(time), family=poisson())
    
    opar <- par(no.readonly=TRUE) #
    
    #14主成份和因子分析
    #主成分分析-主成分个数判断-三种特征值判别准则
    library(psych)
    fa.parallel(USJudgeRatings[,-1],  #所有观测(列),删去第一列
                fa="pc", n.iter=100, #100个随机数据矩阵对比
                #原书代码不能执行,修改fa="pc"
                show.legend=FALSE, 
                main="Scree plot with parallel analysis")
    abline(h=1, lwd=1, col="green") #手动添加直线
    
    #代码清单14-1 主成分分析
    library(psych)
    pc <- principal(USJudgeRatings[,-1], nfactors=1)
    pc
    #case2 
    library(psych)
    fa.parallel(Harman23.cor$cov, n.obs=302, #n.obs观测数
                fa="pc", n.iter=100,
                show.legend=FALSE, main="Scree plot with parallel analysis")
    #代码清单14-2 身高测量指标的主成分分析
    library(psych)
    pc <- principal(Harman23.cor$cov, 
                    nfactors=2, rotate="none")
    pc
    #代码清单14-3 方差极大旋转的主成分分析
    rc <- principal(Harman23.cor$cov, nfactors=2, rotate="varimax")
    rc
    #代码清单14-4 从原始数据中获取成分得分
    library(psych)
    pc <- principal(USJudgeRatings[,-1], nfactors=1, score=TRUE)
    head(pc$scores) #查看各个观测对象在主成分上的得分,head
    cor(USJudgeRatings$CONT, pc$score) #变量CONT与评分间的相关系数
    #代码清单14-5 获取主成分得分的系数
    library(psych)
    rc <- principal(Harman23.cor$cov, nfactors=2, rotate="varimax")
    round(unclass(rc$weights),2)
    
    #14.3 探索性因子分析
    options(digits=2) #显示小数位数
    covariances <- ability.cov$cov #ability.cov协方差矩阵数据转换为相关系数矩阵
    correlations <- cov2cor(covariances)  #cov2cor()转化相关系数矩阵
    ##判断需提取的因子数
    fa.parallel(correlations, n.obs=112,  #观测数112
                fa="both", n.iter=100, #fa="both"同时展示PCA和EFA分析结果
                main="Scree plots with parallel analysis")
    #提取公共因子
    ##代码清单14-5 为旋转的主轴迭代因子法
    fa <- fa(correlations, nfactors=2, rotate="none", fm="pa")
    fa
    #因子旋转
    ##代码清单14-7 正交旋转提取因子
    fa.varimax <- fa(correlations, nfacotrs=2, rotate="varimax", fm="pa")
    fa.varimax
    #斜交旋转提取因子
    fa.promax <- fa(correlations, nfactors=2, rotate="promax", fm="pa")
    fa.promax
    #因子结构矩阵(因子载荷阵)求解函数:F = P * Phi
    fsm <- function(oblique) {
      if  (class(oblique)[2] == "fa" & is.null(oblique$Phi)) {
           waning("Object doesn't look oblique EFA")
      } else {
           P <- unclass(oblique$loading)
           F <- P %*% oblique$Phi
           colnames(F) <- c("PA1", "PA2")
           return(F)
         }
    }
    #使用函数
    fsm(fa.promax)
    #绘图结果可视化
    factor.plot(fa.promax, labels=rownames(fa.promax$loadings)) #轴为载荷大小
    fa.diagram(fa.promax, simple=FALSE)
    
    
    #第15章 处理缺失数据的高级方法
    install.packages(c("VIM", "mice"))
    library(VIM)
    library(mice)
    
    data(sleep, package="VIM") #加载数据集
    sleep[complete.cases(sleep),] #列出没有缺失值的行
    sleep[!complete.cases(sleep),] #列出含有缺失值的行
    
    sum(is.na(sleep$Dream)) #Dream数据有多少个缺失值
    mean(is.na(sleep$Dream)) #缺失占总体比例
    mean(!complete.cases(sleep)) #含有确缺失数据的观测的比例
    
    library(mice)
    data(sleep, package="VIM")
    md.pattern(sleep)
    
    library(VIM)
    aggr(sleep, prop=FALSE, numbers=TRUE)
    matrixplot(sleep)
    
    #用相关性探索缺失值
    x <- as.data.frame(abs(is.na(sleep))) #计算缺失数据
    head(sleep, n=5)  #查看原始数据
    head(x, n=5) #查看缺失数据(1为缺失)
    y <- x[which(sd(x) > 0)] #提取*含*缺失值的变量
    cor(y) #指示变量间相关系数
    
    #实例分析(行删除)
    complete.cases(sleep) #识别缺失值行
    newdata <- sleep[complete.cases(sleep), ]
    na.omit(sleep)
    options(digits=1) #显示小数点后1位
    cor(na.omit(sleep))
    #多重插补
    library(mice)
    data(sleep, package="VIM")
    imp <- mice(sleep, seed=1234)
    fit <- with(imp, lm(Dream ~ Span + Gest))
    pooled <- pool(fit)
    summary(pooled)
    
    #16 高级图形进阶
    library(lattice)
    singer
    histogram(~height | voice.part, data=singer,
              main="Distribution of Heights by Voice Pitch",
              xlab="Height (inches)")
    #16-1 lattice绘图示例
    library(lattice)
    attach(mtcars)
    mtcars
    gear <- factor(gear, levels=c(3, 4, 5),
                   labels=c("3 gears", "4 gears", "5 gears")) #分组标签
    cyl <- factor(cyl,levels=c(4, 6, 8),
                  labels=c("4 cylinders", "6 cylinders", "8 cylinders"))
    densityplot(~mpg, main="Density Plot",
                xlab="Miles per Gallon") #核密度图
    densityplot(~mpg | cyl,
                main="Density Plot by Number of Cylinders",
                xlab="Miles per Gallon") #条件变量核密度图
    bwplot(cyl ~ mpg | gear,
           main="Box Plots by Cylinders and Gears",
           xlab="Miles per Gallon", ylab="Cylinders") #箱线图
    xyplot(mpg ~ wt | cyl * gear,
           main="Scatter Plots by Cylinders and Gears",
           xlab="Car Weight", ylab="Miles per Gallon") #散点图
    cloud(mpg ~ wt * qsec | cyl,
          main="3D Scatter Plots by Cylinders") #三维散点图
    dotplot(cyl ~ mpg | gear, 
            main="Dot Plots by Number of Gears and Cylinders",
            xlab="Miles Per Gallon") #点图
    splom(mtcars[c(1, 3, 4, 5, 6)],
          main="Scatter Plot Matrix for mtcars Data") #散点图矩阵
    detach(mtcars)
    library(lattice)
    mygraph <- densityplot(~height | voice.part, data=singer) #存储图形至mygraph
    #条件变量 因子-离散变量
    displacement <- equal.count(mtcars$disp, number=3, overlap=0)
    xyplot(mpg ~ wt | displacement, data=mtcars,
           main="Miles per Gallon vs. Weight by Engine Displacement",
           xlab="Weight", ylab="Miles per Gallon",
           layout=c(3, 1), aspect=1.5) #aspect宽高比
    #自定义面板函数xyplot
    displacement <- equal.count(mtcars$disp, number=3, overlap=0)
                  #equal.count连续变量离散化
    mypanel <- function(x, y) {
                 panel.xyplot(x, y, pch=19) #pch=19填充圆圈
                 panel.rug(x, y) #轴须线
                 panel.grid(h=-1, v=-1) #水平和竖直网格线,负值强制对齐轴标签
                 panel.lmline(x, y, col="red", lwd=1, lty=2)
               } #自定义面板函数
    xyplot(mpg~wt | displacement, data=mtcars,
           layout=c(3, 1),
           aspect=1.5, #锁定宽高比
           main="Miles per Gallon vs. Weight by Engine Displacement",
           xlab="Weight",
           ylab="Miles per Gallon",
           panel=mypanel)
    #自定义面板函数和额外选系选项的plot
    library(lattice)
    mtcars$transmission <- factor(mtcars$am, levels=c(0, 1),
                                  labels=c("Automatic", "Manual"))
    panel.smoother <- function(x, y) {
                        panel.grid(h=-1, v=-1) #强制对齐轴标签的网格线
                        panel.xyplot(x, y) #散点图
                        panel.loess(x, y) #绘制非参数拟合曲线
                        panel.abline(h=mean(y), lwd=2, lty=2, col="green")#添加均直线
                      }
    xyplot(mpg~disp|transmission, data=mtcars,
           scales=list(cex=.8, col="red"),
           panel=panel.smoother,
           xlab="Displacement", ylab="Miles per Gallon",
           main="MPG vs Displacement by Transmission Type",
           sub="Dotted lines aer Group Means", aspect=1) #sub下标题
    #叠加显示分组变量图形
    library(lattice)
    mtcars$transmission <- factor(mtcars$am, levels=c(0, 1),
                                   labels=c("Automatic", "Manual"))
    densityplot(~mpg, data=mtcars,
                group=transmission,
                main="MPG Distribution by Transmission Type",
                xlab="Miles per Gallon",
                auto.key=TRUE)#auto.key手动添加图例
    auto.key=list(space="right", columns=1, title="Transmission")
    #16-4
    library(lattice)
    mtcars$transmission <- factor(mtcars$am, levels=c(0, 1),
                                  labels=c("Automatic", "Manual"))
    colors = c("red", "blue")
    lines = c(1, 2)
    points = c(16, 17)
    key.trans <- list(title="Transmission", 
                      space="bottom", columns=2,
                      #图例摆放在下方,两栏
                      text=list(levels(mtcars$transmission)),
                      #水平名称
                      points=list(pch=points, col=colors),
                      lines=list(col=colors, lty=lines),
                      cex.title=1, cex=.9)
    densityplot(~mpg, data=mtcars,
                group=transmission,
                main="MPG Distribution by Transmission Type", 
                xlab="Miles per Gallon",
                pch=points, lty=lines, col=colors,
                lwd=2, jitter=.005,
                key=key.trans)
    #16-5 包含分组变量和条件变量以及自定义图例的xyplot
    library(lattice)
    colors <- "darkgreen"
    symbols <- c(1:12)
    linetype <- c(1:3)
    CO2
    key.species <- list(title="Plant",
                        space="right",#标签在右
                        text=list(levels(CO2$Plant)),
                        points=list(pch=symbols, col=colors))
    
    xyplot(uptake~conc|Type*Treatment, data=CO2,
           group=Plant,
           type="o",
           pch=symbols, col=colors, lty=linetype,
           main="Carbon Dioxide Uptake\nin Grass Plants",
           ylab=expression(paste("Uptake",
                  bgroup("(", italic(frac("umol", "m"^2)), ")"))),
           xlab=expression(paset("Concentration",
                  bgroup("(", italic(frac(mL,L)), ")"))),
           sub="Grass Species:Echinochloa crus-galli",
           key=key.species)
    #图形参数
    show.settings()
    mysettings <- trellis.par.get()
    mysettings$superpose.symbol
    mysettings$superpose.symbol$pch <- c(1:10)
    trellis.par.set(mysettings)
    show.settings()
    #页面摆放
    #splite
    library(lattice)
    graph1 <- histogram(~height|voice.part, data=singer,
                        main="Heights of Choral Singers by Voice Part")
    graph2 <- densityplot(~height, data=singer, group=voice.part,
                          plot.points=FALSE, auto.key=list(columns=4))
    plot(graph1, split=c(1, 1, 1, 2))
      plot(graph2, split=c(1, 2, 1, 2), newpage=FALSE)
    #position
    library(lattice)
    graph1 <- histogram(~height|voice.part, data=singer,
                        main="Heights of Choral Singers by Voice Part")
    graph2 <- densityplot(~height, data=singer, group=voice.part,
                          plot.point=FALSE, auto.key=list(columns=4))
    plot(graph1, position=c(0, 3, 1, 1))
    plot(graph2, position=c(0, 0, 1, 3), newpage=FALSE)
    #ggplot2
    library(ggplot2)
    mtcars$cylinder <- as.factor(mtcars$cyl)
    qplot(cylinder, mpg, data=mtcars, geom=c("boxplot", "jitter"),
          fill=cylinder,
          main="Box plots with superimposed data points",
          xlab="Number of Cylinders",
          ylab="Miles per Gallon")
    #case2
    library(ggplot2)
    transmission <- factor(mtcars$am, levels=c(0, 1),
                           labels=c("Automatic", "Manual"))
    qplot(wt, mpg, data=mtcars, 
          color=transmission, shape=transmission,
          geom=c("point", "smooth"),
          method="lm", formula=y~x,
          xlab="Weight", ylab="Miles Per Gallon",
          main="Regression Example")
    #case3
    library(ggplot2)
    mtcars$cyl <- factor(mtcars$cyl, levels=c(4, 6, 8),
                         labels=c("4 cylinders", "6 cylinders", "8 cylinders"))
    mtcars$am <- factor(mtcars$am, levels=c(0, 1),
                        labels=c("Automatic", "Manual"))
    qplot(wt, mpg, data=mtcars, facets=am ~ cyl, size=hp)
    #case4
    library(ggplot2)
    data(singer, package="lattice")
    qplot(height, data=singer, geom=c("density"),
          facets=voice.part ~ . , fill=voice.part)
    
    #交互式图形
    install.packages(c("playwith", "latticist", "iplot", "rggobi"))
    #identify()
    plot(mtcars$wt, mtcars$mpg)
    identify(mtcars$wt, mtcars$mpg, labels=row.names(mtcars))
    #playwith
    library(playwith)
    library(lattice)
    playwith(
      xyplot(mpg~wt|factor(cyl)*factor(am),
             data=mtcars, subscripts=TRUE,
             type=c("r", "p"))
    )
    #latticist
    install.packages("latticist")
    library(latticist)
    mtcars$cyl <- factor(mtcars$cyl)
    mtcars$gear <- factor(mtcars$gear)
    latticist(mtcars, use.playwith=TRUE)
    #iplots
    library(iplots)
    attach(mtcars)
    cylinders <- factor(cyl)
    gears <- factor(gear)
    transmission <- factor(am)
    ihist(mpg)
    ibar(gears)
    iplot(mpg, wt)
    ibox(mtcars[c("mpg", "wt", "qsec", "disp", "hp")])
    ipcp(mtcars[c("mpg", "wt", "qsec", "disp", "hp")])
    imosaic(transmission, cylinders)
    detach(mtcars)
    
    ##########
    alpha
    cov
    ability.cov
    数据集:Harman74.cor
    install.packages("qcc")
    install.packages(c("playwith", "latticist", "iplot", "rggobi"))
    

    其实一个详尽的笔记应该是更好的学习分享方式,之后会注意,尽量不再出现现在这样的烂尾工程。


    ——2017.3.29
    ——实验中心510

    相关文章

      网友评论

        本文标题:R语言实战_终篇

        本文链接:https://www.haomeiwen.com/subject/hlqyottx.html