71-R语言新冠疫情地图可视化

作者: wonphen | 来源:发表于2020-05-04 20:47 被阅读0次

    1、导入数据

    > library(pacman)
    > p_load(dplyr)
    > covid <- readr::read_csv("./data_set/novel-corona-virus-2019-dataset/time_series_covid_19_confirmed.csv") %>%
    +   rename(province=`Province/State`,region=`Country/Region`)
    > names(covid)
    
    ##   [1] "province" "region"   "Lat"      "Long"     "1/22/20"  "1/23/20" 
    ##   [7] "1/24/20"  "1/25/20"  "1/26/20"  "1/27/20"  "1/28/20"  "1/29/20" 
    ##  [13] "1/30/20"  "1/31/20"  "2/1/20"   "2/2/20"   "2/3/20"   "2/4/20"  
    ##  [19] "2/5/20"   "2/6/20"   "2/7/20"   "2/8/20"   "2/9/20"   "2/10/20" 
    ##  [25] "2/11/20"  "2/12/20"  "2/13/20"  "2/14/20"  "2/15/20"  "2/16/20" 
    ##  [31] "2/17/20"  "2/18/20"  "2/19/20"  "2/20/20"  "2/21/20"  "2/22/20" 
    ##  [37] "2/23/20"  "2/24/20"  "2/25/20"  "2/26/20"  "2/27/20"  "2/28/20" 
    ##  [43] "2/29/20"  "3/1/20"   "3/2/20"   "3/3/20"   "3/4/20"   "3/5/20"  
    ##  [49] "3/6/20"   "3/7/20"   "3/8/20"   "3/9/20"   "3/10/20"  "3/11/20" 
    ##  [55] "3/12/20"  "3/13/20"  "3/14/20"  "3/15/20"  "3/16/20"  "3/17/20" 
    ##  [61] "3/18/20"  "3/19/20"  "3/20/20"  "3/21/20"  "3/22/20"  "3/23/20" 
    ##  [67] "3/24/20"  "3/25/20"  "3/26/20"  "3/27/20"  "3/28/20"  "3/29/20" 
    ##  [73] "3/30/20"  "3/31/20"  "4/1/20"   "4/2/20"   "4/3/20"   "4/4/20"  
    ##  [79] "4/5/20"   "4/6/20"   "4/7/20"   "4/8/20"   "4/9/20"   "4/10/20" 
    ##  [85] "4/11/20"  "4/12/20"  "4/13/20"  "4/14/20"  "4/15/20"  "4/16/20" 
    ##  [91] "4/17/20"  "4/18/20"  "4/19/20"  "4/20/20"  "4/21/20"  "4/22/20" 
    ##  [97] "4/23/20"  "4/24/20"  "4/25/20"  "4/26/20"
    

    2、数据适配

    因为REmap包中获取的国家(region)名称和我们数据中的名称可能不一致,需要手动将其一一对应。比如美国在我们数据中为US,而在REmap中为United States of America。

    > covid$region[which(covid$region=="Antigua and Barbuda")] <- "Bermuda"
    > covid$region[which(covid$region=="Czechia")] <- "Czech Republic"
    > covid$region[which(covid$region=="Congo (Kinshasa)")] <- "Democratic Republic of the Congo"
    > covid$region[which(covid$region=="Timor-Leste")] <- "East Timor"
    > covid$region[which(covid$region=="Guinea-Bissau")] <- "Guinea Bissau"
    > covid$region[which(covid$region=="Cote d'Ivoire")] <- "Ivory Coast"
    > covid$region[which(covid$region=="North Macedonia")] <- "Macedonia"
    > covid$region[which(covid$region=="Serbia")] <- "Republic of Serbia"
    > covid$region[which(covid$region=="Congo (Brazzaville)")] <- "Republic of the Congo"
    > covid$region[which(covid$region=="Korea, South")] <- "South Korea"
    > covid$region[which(covid$region=="Eswatini")] <- "Swaziland"
    > covid$region[which(covid$region=="Bahamas")] <- "The Bahamas"
    > covid$region[which(covid$region=="Tanzania")] <- "United Republic of Tanzania"
    > covid$region[which(covid$region=="US")] <- "United States of America"
    > covid$region[which(covid$region=="West Bank and Gaza")] <- "West Bank"
    > # covid$region[which(covid$region=="china")] <- "xianggang"
    

    3、汇总数据

    > # 按行求和
    > covid.sel <- covid %>% 
    +   reshape2::dcast(`province` + `region` + `Long` + `Lat` ~ .,
    +                   fun.aggregate = sum) %>%
    +   # 更名
    +   rename(vol=".")
    > str(covid.sel)
    
    ## 'data.frame':    264 obs. of  5 variables:
    ##  $ province: chr  "Alberta" "Anguilla" "Anhui" "Aruba" ...
    ##  $ region  : chr  "Canada" "United Kingdom" "China" "Netherlands" ...
    ##  $ Long    : num  -116.6 -63.1 117.2 -70 149 ...
    ##  $ Lat     : num  53.9 18.2 31.8 12.5 -35.5 ...
    ##  $ vol     : num  4480 3 991 100 106 ...
    

    4、画图

    > p_load(REmap)
    > # 获取REmap中的国家名
    > country <- data.frame(region=mapNames("world"))
    > # 根据名称对应数据
    > mapdata <- left_join(country,covid.sel,by="region") %>%
    +   select(region,vol)
    > str(mapdata)
    
    ## 'data.frame':    257 obs. of  2 variables:
    ##  $ region: chr  "Afghanistan" "Angola" "Albania" "United Arab Emirates" ...
    ##  $ vol   : num  1531 26 726 10349 3892 ...
    
    > covid.world <- remapC(mapdata,maptype = "world",
    +                       color = c('#FD0100','#FFB8B5'),
    +                       theme = get_theme("dark"),
    +                       title = "2019-nCoV全球分布图",
    +                       subtitle = "截止2020年4月26日",)
    > covid.world
    
    ## Save img as: C:\Users\Admin\AppData\Local\Temp\RtmpKs931s/ID_20200504184333_2814895.html
    
    REmap地图

    使用REmap包画的图会通过浏览器展示出来,鼠标放在某一区域,会自动显示相应数值。
    灰色地带为region名称没有对应上的区域。

    5、按时间序列画动态图

    > p_load(animation,sp,maptools,ggplot2)
    > # 读取中国地图的多边形数据
    > china <- readShapePoly("./data_set/china_basic_map/bou2_4p.shp")
    > # 提取省级名称
    > province.name <- china$NAME %>% as.character()
    > # 转换为数据框
    > china.sel <- fortify(china) %>% select(long,lat,group,id)
    > # 添加province列
    > china.sel$province[!duplicated(china.sel$id)] <- province.name
    > # 填充其他列
    > china.sel <- tidyr::fill_(china.sel,fill_cols="province",.direction="down")
    
    ## 'data.frame':    89912 obs. of  5 variables:
    ##  $ long    : num  121 121 122 122 122 ...
    ##  $ lat     : num  53.3 53.3 53.3 53.3 53.3 ...
    ##  $ group   : Factor w/ 925 levels "0.1","1.1","2.1",..: 1 1 1 1 1 1 1 1 1 1 ...
    ##  $ id      : chr  "0" "0" "0" "0" ...
    ##  $ province: chr  "黑龙江省" "黑龙江省" "黑龙江省" "黑龙江省" ...
    
    > # 抽取中国的疫情数据
    > china.data <- covid %>% filter(region=="China" | region=="Taiwan*") %>%
    +   select(-"region")
    > 
    > china.data$province <-
    +   c("安徽省","北京市","重庆市","福建省","甘肃省","广东省",
    +   "广西壮族自治区","贵州省","海南省","河北省","黑龙江省","河南省",
    +   "香港特别行政区","湖北省","湖南省","内蒙古自治区","江苏省",
    +   "江西省","吉林省","辽宁省","澳门特别行政区","宁夏回族自治区","青海省",
    +   "陕西省","山东省","上海市","山西省","四川省","天津市",
    +   "西藏自治区","新疆维吾尔自治区","云南省","浙江省","台湾省")
    > 
    > # 合并数据
    > china.data <- china.data[,-c(2:3)] %>% 
    +   right_join(china.sel,by="province")
    > china.data <- select(china.data,-c("id"))
    > str(china.data)
    
    > # 设置播放速度及图片大小,转换器路径
    > ani.options(interval=0.5,
    +             convert=shQuote("C:/Program Files/ImageMagick-7.0.10-Q16/convert.exe"),
    +             ani.width=800,ani.height=800)
    > 
    > saveGIF(
    +  for(i in seq(2,97,by=11)) {
    +     data <- china.data[,c(1,98:100,i)] %>% as.data.frame()
    +     names(data)[5] <- "vol"
    +     p <- ggplot(data,aes(long,lat,group=group,fill=vol)) +
    +          geom_polygon(col="gray60") +
    +          scale_fill_gradient(low="white",high="red") +
    +          labs(title = names(china.data)[i],x="",y="") +
    +          theme(panel.grid = element_blank(),
    +                panel.background = element_blank(),
    +                axis.text = element_blank(),
    +                axis.ticks = element_blank(),
    +                legend.position = "none")
    +     print(p)
    +   }
    + )
    
    疫情时间序列图

    因为湖北的数据太大,导致其他区域的填充颜色太浅。
    这个地方需要注意的有两点:1、数据一定要转换为data.frame,因为目前还不支持tibble;2、图片的长宽比例需要手动调整,不然图片会很难看。

    6、动态点图

    > # 画一个空白的中国地图
    > p <- ggplot() +
    +   geom_polygon(data=china,aes(long,lat,group=group),col="gray40",fill="white") +
    +   labs(title = "疫情地图",x="",y="") +
    +   theme(panel.grid = element_blank(),
    +        panel.background = element_blank(),
    +        axis.text = element_blank(),
    +        axis.ticks = element_blank(),
    +        legend.position = "none")
    >
    > china.data2 <- covid %>% filter(region=="China" | region=="Taiwan*") %>%
    +   select(-c("region","province"))
    
    > saveGIF(
    +   for(i in seq(3,98,by=11)) {
    +     data <- china.data2[,c(1,2,i)] %>% as.data.frame()
    +     names(data)[3] <- "vol"
    +     pic <- p + geom_jitter(aes(Long,Lat,size=vol),
    +                           data=data,shape=19,col="red") +
    +       scale_size(range = c(1.2,10)) +
    +       labs(title = names(china.data2)[i],x="",y="")
    +     print(pic)
    +   })
    
    疫情时间序列点图

    相关文章

      网友评论

        本文标题:71-R语言新冠疫情地图可视化

        本文链接:https://www.haomeiwen.com/subject/ybbyghtx.html