美文网首页人工智能&大数据
2019-nCoV疫情地图动态可视化

2019-nCoV疫情地图动态可视化

作者: 欧阳松 | 来源:发表于2020-02-03 23:49 被阅读0次

来自 https://mp.weixin.qq.com/s/9IrY9kajZG2SJcoewUVH8w

  • 1 简介

数据介绍:
该数据是从丁香园·丁香医生通过爬虫获取的全国2019-ncov病毒的感染病例。
时间的分辨率:1小时
空间分辨率:城市和省份
起止时间:从2020/1/25/17时到疫情结束

  • 2 需要的包

devtools::install_github("microly/alimap")
library(alimap) # to get China map at the prefecture city level
library(sf)
library(ggplot2)
library(dplyr)
library(tibble)
library(tidyr)
library(magrittr)
library(purrr)
library(readr)
library(stringr)
library(gganimate)
library(lubridate)
library(Cairo)
library(magick)
  • 3 地图数据

如果有本地数据,可以自行读取。
因为很多市级地名存在变化,
而且爬取的比较乱,部分没有“市”这个字,
所以使用前2个汉字进行联结表。以地图数据集中的城市名为准。

Chinamap_cities_sf <- map_prefecture_city() %>% 
  mutate(c2 = str_sub(name, 1, 2))
  • 4 时间序列

每12小时更新1次,从早上9点到晚上9点。

# set start day
startTime <- ymd_h("2020/1/25 21")
nowTime <- Sys.time() %>% with_tz(tz = "Asia/Shanghai") # only support Shanghai timezone
endTime <- if(hour(nowTime) > 21) {
  date(nowTime) + dhours(21)
} else if (hour(nowTime) > 9){
  date(nowTime) + dhours(9)
} else {
  date(nowTime) - ddays(1) + dhours(21)
}

timeLength <- interval(startTime, endTime) %>% 
  time_length("hour") %>% `/`(12)
# time sequence
mytime <- startTime + dhours(12*(0:timeLength)) %>% .[-6] # 404 at the time
mymonth <- month(mytime)
myday <- day(mytime)
myhour <- hour(mytime) %>% as.character() %>% 
  str_pad(width = 2, side = "left", pad = "0") # make character string same length

myAPI <- paste(date(mytime), myhour, sep = "T")
  • 5 疫情数据

通过API接口读取疫情历史数据,API接口由网友提供,爬取自丁香园。

# define a function to read epidemic data of a day
read_epidemic <- function(oneAPI) {
  url_API <- paste0("http://69.171.70.18:5000/download/city_level_", oneAPI, ".csv")
  epidemic_df <- read_csv(file = url_API)
  colnames(epidemic_df) <- c("x1","unnamed", "city", "confirmed_c", "suspected_c",
                             "cured_c", "dead_c", "province", "short_p", "confirmed_p",
                             "suspected_p", "cured_p", "dead_p", "comment")
  epidemic_df %<>% select(city, confirmed_c)
  return(epidemic_df)
}
  
  
epidemic_nest <- tibble(time = mytime,
                        myAPI = myAPI) %>% 
  mutate(., data = map(.$myAPI, ~read_epidemic(.x))) %>% 
  select(-myAPI) %>% unnest()
  • 5.1 分箱

因为很多市级地名存在变化,
而且爬取的比较乱,部分没有“市”这个字,
所以使用前2个汉字进行联结表。以地图数据集中的城市名为准。

mybreaks <- c(0, 1, 10, 50, 100, 500, 1000, 5000, 100000)
mylabels <- c("0", "1-9", "10-49", "50-99", "100-499",
              "500-999", "1000-4999", ">=5000")

epidemic_df <- epidemic_nest %>% 
  mutate(conf2 = cut(confirmed_c, breaks = mybreaks,
                     labels = mylabels, include.lowest = TRUE,
                     right = FALSE, ordered_result = TRUE)) %>% 
  mutate(c2 = str_sub(city, 1, 2))
  • 6 联结表及循环绘图

# create temporary document
dir.create(dir1 <- file.path(tempdir(), "testdir"))

for (i in 1:length(mytime)) {
  # join epidemic data with map data
  epidemic_time <- epidemic_df %>% filter(time == mytime[i])
  epidemic_city <- Chinamap_cities_sf %>% left_join(epidemic_time, by = "c2")
  # treatment NA
  conf2 <- epidemic_city$conf2 %>% replace_na(0)
  epidemic_city %<>% select(-c2, -city, -conf2)
  epidemic_city$conf2 <- conf2

  # plot
  gg_epidemic <-  ggplot(epidemic_city) + 
    geom_sf(aes(fill = conf2)) + 
    coord_sf() + 
    scale_fill_brewer(palette = "YlOrRd", direction = 1) + 
    guides(fill = guide_legend(title = "确诊人数", reverse = T)) + 
    labs(title = "2019-ncov疫情数据可视化",
        subtitle = mytime[i],
        caption = "数据来源:丁香园·丁香医生") + 
    theme(
      # 标题
      plot.title = element_text(face = "bold", hjust = 0.5,
                                color = "black"),
      plot.subtitle = element_text(face = "bold", hjust = 0.5, size = 20,
                                color = "red"),
      plot.caption = element_text(face = "bold", hjust = 1,
                                color = "blue"),
      # 图例
      legend.title = element_text(face = "bold",
                                  color = "black"),
      legend.text = element_text(face = "bold",
                                 color = "black"),
      legend.background = element_rect(colour = "black"),
      legend.key = element_rect(fill = NA), # 图例箱体无背景
      legend.position = c(0.85, 0.2),
      axis.ticks = element_blank(),
      axis.text = element_blank(),
      # 绘图面板
      panel.background = element_blank(),
      panel.border = element_rect(color = "black", linetype = "solid", size = 1, fill = NA)
      )
  
  # save picture
  ggsave(filename = paste0(date(mytime[i]), "_", hour(mytime[i]), ".png"),
         plot = gg_epidemic, path = dir1,
         width = 20, height = 20, units = "cm")
}
  • 7 动画

path_pre <- "./"
animate_epidemic <- 
  image_animate(image = image_read(path = paste0(dir1, "/",
                                            date(mytime), "_", hour(mytime), ".png")))
anim_save(filename = "疫情地图可视化动态图.gif",
          animation = animate_epidemic, path = path_pre)

unlink(dir1)
2019-nCoV疫情地图动态可视化

相关文章

网友评论

    本文标题:2019-nCoV疫情地图动态可视化

    本文链接:https://www.haomeiwen.com/subject/fyjlxhtx.html