美文网首页
中国老年人婚姻状况图(数据转换及拼图)

中国老年人婚姻状况图(数据转换及拼图)

作者: 冬之心 | 来源:发表于2019-08-23 16:22 被阅读0次
    knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
    

    自然数转为比例

    语法

    rescale(x, to = c(0, 1), from = range(x, na.rm = TRUE, finite = TRUE), ...)

    参数

    • x:continuous vector of values to manipulate. 要操作的数值向量

    • to: output range (numeric vector of length two) 输出范围

    • from:input range (vector of length two). If not given, is calculated from the range of x 输入范围,默认为x的范围

    • ... :other arguments passed on to methods

    例子

    library(scales)
    
    x <- c(127.31, 5787.47, 3703.58, 78.61)
    
    #转成(0,1)区间,输入范围为 [0,sum(x)]  ,实际上算的是各值占比。
    rescale(x, to=c(0,1), from=c(0,sum(x)))
    
    #转成(0,100)区间
    rescale(x, to=c(0,100), from=c(0,sum(x)))
    
    

    小数点转成百分比

    语法

    percent(x, accuracy = NULL, scale = 100, prefix = "",
    suffix = "%", big.mark = " ", decimal.mark = ".", trim = TRUE,
    ...)

    参数

    • accuracy: Number to round to, NULL for automatic guess. 数值精确度。不设置,函数会自动判断 设定小数位。

    • scale:A scaling factor: x will be multiply by scale before formating (useful if the underlying data is on another scale, e.g. for computing percentages or thousands). 刻度,默认为100。

    • prefix, suffix:Symbols to display before and after value. 前缀,后缀。后缀默认为%

    • big.mark:Character used between every 3 digits to separate thousands. 大数的分隔符。

    • decimal.mark:The character to be used to indicate the numeric decimal point. 小数点的分隔符。

    • trim:Logical, if FALSE, values are right-justified to a common width 修剪,默认为TRUE。如果是FALSE,值右对齐到一个公共宽度(所有值等宽)。如果是TRUE,值的前导空格就会删除。

    percent(rescale(x, to=c(0,1), from=c(0,sum(x[1:4]))))
    [1] "0%" "17%" "33%" "50%" "67%" "83%" "100%" "117%" "133%" "150%" "167%" "833%"
    percent(rescale(x, to=c(0,1), from=c(0,sum(x[1:4]))), trim=FASLE)
    [1] " 0%" " 17%" " 33%" " 50%" " 67%" " 83%" "100%" "117%" "133%" "150%" "167%" "833%"

    例子

    
    #不保留小数位
    percent(rescale(x, to=c(0,1), from=c(0,sum(x[1:4]))), accuracy = 1)
    
    #保留两位小数
    percent(rescale(x, to=c(0,1), from=c(0,sum(x[1:4]))), accuracy = .01)
    
    

    中国老年人婚姻状况变化

    library(reshape2)
    library(tidyverse)
    library(scales)
    
    # 录入数据,并整理成数据框
    marriage <- c("未婚","有配偶","丧偶","离婚")
    y1990 <- c(127.31, 5787.47, 3703.58, 78.61)
    y2000 <- c(212.17, 8616.39, 3885.58,84.26)
    y2010 <- c(313.68, 12459.03, 4747.92, 138.08)
    
    
    marriage2 <- data.frame(marriage, y1990, y2000, y2010)
    
    marriage3 <- melt(marriage2, id.vars="marriage", variable.name="year", value.name="population")
    
    marriage3$marriage <- factor(marriage3$marriage, levels= c("未婚","有配偶","丧偶","离婚"), c("未婚","有配偶","丧偶","离婚"))
    
    marriage3$year <- str_replace(marriage3$year,"y","")
    
    
    
    #根据人数,计算比例
    y1990p <- percent(rescale(y1990, to =c(0,1), from=c(0,sum(y1990))),accuracy=.01, suffix = "")
    y2000p <- percent(rescale(y2000, to =c(0,1), from=c(0,sum(y2000))),accuracy=.01, suffix = "")
    y2010p <- percent(rescale(y2010, to =c(0,1), from=c(0,sum(y2010))),accuracy=.01, suffix = "")
    
    marriage_P <- data.frame(marriage, y1990p, y2000p, y2010p)
    marriage_P2 <- melt(marriage_P, id.vars="marriage", variable.name = "year", value.name = "percent")
    
    marriage_P2$marriage <- factor(marriage_P2$marriage, levels= c("未婚","有配偶","丧偶","离婚"), c("未婚","有配偶","丧偶","离婚"))
    
    #删除year值中的"y"和“p",只提取年份。
    marriage_P2$year <- str_sub(marriage_P2$year, 2,5)
    
    # 不知道为什么,如果直接把marriage_P2整个表合并进来,不会改变原变量的数据类型。但是如果只合并marriage_P2$percent,会改变percent变量的数据类型,变成因子型。
    marriage4 <- cbind(marriage3, marriage_P2$percent)
    
    names(marriage4)[4] <- "percent"
    
    #因子型 转数值型,不能直接转,一定要先转成字符型,再转成数值型。
    
    marriage4$percent <- as.numeric(as.character(marriage4$percent))
    
    library(ggplot2)
    
    ggplot(marriage4, aes(x=year, y=percent, group=marriage) ) + 
      geom_col(aes(fill=marriage), position="dodge") + 
      geom_text(aes(label=percent, y= percent+0.5), position = position_dodge(width = 0.9), vjust=0) + 
      labs(x=NULL, y=NULL, fill="婚姻类型", title="比例变化图") +
      theme(legend.position = c(0.8, 0.8))
    
    ggplot(marriage4, aes(x=year, y=population, group=marriage)) + 
      geom_line(aes(colour=marriage), size=2) +
      geom_point(aes(shape=marriage),size=2) + 
      facet_wrap(.~marriage, scales="free") + 
      labs(x=NULL, y="人口数(万人)", title="人口变化图") + 
      theme(legend.position = "none")
    
    
    Rplot04.png Rplot021.png

    拼图

    拼图包常用有三个:

    • gridExtra包的grid.arrange()函数
    • ggpubr包的ggarange()函数
    • cowplot包的ggdraw()+draw_plot()函数

    参见:

    
    library(cowplot)
    
    p1<- ggplot(marriage4, aes(x=year, y=percent, group=marriage) ) + 
         geom_col(aes(fill=marriage), position="dodge") + 
         geom_text(aes(label=percent, y= percent+0.5), position = position_dodge(width = 0.9), vjust=0) + 
         labs(x=NULL, y="比例(%)", fill="婚姻类型") + 
         theme(legend.position = c(0.92, 0.85), legend.background = element_blank())
    
    p2 <- ggplot(marriage4, aes(x=year, y=population, group=marriage)) + 
      geom_line(aes(colour=marriage), size=2) +
      geom_point(aes(shape=marriage),size=2) + 
      facet_wrap(.~marriage, scales="free") + 
      labs(x=NULL, y="人口数(万人)") + 
      theme(legend.position = "none")
    
    ggdraw() + 
      draw_plot(p1, 0,0.1,0.5,0.85) + 
      draw_plot(p2, 0.5,0.1,0.5,0.85) + 
      draw_plot_label(c("比例图","人口图"),x=c(0,0.5), y=c(1,1)) +
      draw_plot_label("数据来源:中国人口普查 制图:李亮", x=0.63, y=0.1, size=8)
    
    
    • ggdraw() 在ggplot图的上面调协 一个绘图层。
    • draw_plot(plot, x = 0, y = 0, width = 1, height = 1, scale = 1,
      hjust = 0, vjust = 0) 在ggdraw画布上的某个地方放置一个plot图
      • (0, 0, 0.5, 1) 左半图;(0.5, 0, 0.5, 1) 右半图
      • (0, 0.5 , 1, 0.5) 上半图;(0, 0, 1, 0.5) 下半图
    • draw_plot_label(label, x = 0, y = 1, hjust = -0.5, vjust = 1.5,
      size = 16, fontface = "bold", family = NULL, color = NULL,
      colour, ...) 给图添加标签。
    Rplot06.png

    程序改进

    利用通道分组计算新值。

    
    marriage <- c("未婚","有配偶","丧偶","离婚")
    y1990 <- c(127.31, 5787.47, 3703.58, 78.61)
    y2000 <- c(212.17, 8616.39, 3885.58,84.26)
    y2010 <- c(313.68, 12459.03, 4747.92, 138.08)
    
    marriage2 <- data.frame(marriage, y1990, y2000, y2010)
    marriage3 <- melt(marriage2, id.vars="marriage", variable.name="year", value.name="population")
    marriage3$marriage <- factor(marriage3$marriage, levels= c("未婚","有配偶","丧偶","离婚"), c("未婚","有配偶","丧偶","离婚"))
    marriage3$year <- str_replace(marriage3$year,"y","")
    
    # 按year分组,计算各婚姻类别人口占某一year组人口的百分比。
    marriage3 <- marriage3 %>%
     group_by(year) %>%
     mutate(percent= percent(rescale(population, to= c(0,1), from=c(0,sum(population))), accuracy=0.01, suffix=""))
    
    marriage3
    

    合并数据框时应注意数据类型

    y1990p <- percent(rescale(y1990, to =c(0,1), from=c(0,sum(y1990))),accuracy=.01, suffix = "")
    y2000p <- percent(rescale(y2000, to =c(0,1), from=c(0,sum(y2000))),accuracy=.01, suffix = "")
    y2010p <- percent(rescale(y2010, to =c(0,1), from=c(0,sum(y2010))),accuracy=.01, suffix = "")
    
    # 注意percent转化出来的是字符型列表
    # 注意字符型列表转成数据框时,默认会变成因子,给后面数据处理带来麻烦。因此要加参数stringsAsFactors=FALSE
    
    marriage_P <- data.frame(marriage, y1990p, y2000p, y2010p, stringsAsFactors=FALSE)
    marriage_P2 <- melt(marriage_P, id.vars="marriage", variable.name = "year", value.name = "percent")
    marriage_P2 <- as_tibble(marriage_P2)
    marriage_P2
    # 注意melt()函数在数据框转置时measure.vars变成的新变量是因子型,如本例中的year
    
    
    # 使用tidyr包中的gather()函数, 默认factor_key = FALSE, 即Key值被存为字符型。如果TRUE,则存为因子型。
    marriage_P <- data.frame(marriage, y1990p, y2000p, y2010p, stringsAsFactors=FALSE)
    marriage_P2 <- gather(marriage_P, key = "year", value = "percent", - marriage)
    marriage_P2 <- as_tibble(marriage_P2)
    marriage_P2
    
    

    相关文章

      网友评论

          本文标题:中国老年人婚姻状况图(数据转换及拼图)

          本文链接:https://www.haomeiwen.com/subject/fudtectx.html