林丹和李宗伟：一个时代


library(ggplot2)
singleman <- read.csv("c:/users/liang/desktop/singleman.csv")
singleman$starttime <- as.Date(singleman$starttime)
singleman$endtime <- as.Date(singleman$endtime)
# 计算保持世界第一的周数。
singleman$week3 <- round(as.numeric(difftime(singleman$endtime, singleman$starttime, units = "weeks")))

ggplot(singleman,aes(x=athlet, y=weeks2)) + geom_col(aes(fill=country))

Rplot02.png

#数据按周数汇总
singleman2 <- aggregate(weeks2 ~ athlet, singleman, sum)

ggplot(singleman2, aes(x=athlet, y=weeks2)) +
  geom_col(aes(fill=athlet)) + 
  geom_text(aes(y=weeks2+5,label=weeks2)) + 
  labs(x=NULL, y="世界排名第一周数", title="羽毛球男单历年世界排名第一",subtitle="（2002/11/07-2019/08/21）", caption="数据来源：世界羽联  制作：wintryheart") + 
  theme(plot.title = element_text(hjust=0.5), plot.subtitle = element_text(hjust=0.5), legend.position = "none")

Rplot.png

# 转换数据框，变成时间线排列的一维数据。
date <- seq.Date(from = as.Date("2002-11-07"), to = as.Date("2019-08-21"), by="week")

week <- seq(1:sum(singleman$weeks2))

athlet <- c(rep(singleman$athlet[1], singleman$weeks2[1]))
for (i in 2:20) { athlet <- append(athlet, c(rep(singleman$athlet[i], singleman$weeks2[i])))}

country <- c(rep(singleman$country[1], singleman$weeks2[1]))
for(i in 2:20) {country <- append(country, c(rep(singleman$country[i], singleman$weeks2[i])))}

singlemale <- data.frame(athlet, date, country, week)


library(gganimate)

ggplot(singlemale, aes(x=date,y=athlet)) + 
  geom_point(aes(color=athlet),size=1)+ scale_x_date(breaks = "1 year", date_labels = "%Y") + labs(x=NULL, y=NULL, title="羽毛球男单历年世界排名第一（2002-2019）",subtitle="(按周计算)", caption="数据来源：世界羽联  制作：wintryheart") + theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) 

ggplot(singlemale, aes(x=date,y=athlet)) + 
  geom_point(aes(color=athlet),size=1) + 
  scale_x_date(breaks = "1 year", date_labels = "%Y") + 
  labs(x=NULL, y=NULL, title="羽毛球男单历年世界排名第一（2002-2019）",subtitle="(按周计算)", caption="数据来源：世界羽联  制作：wintryheart") + 
  theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) + 
  transition_time(time = date) + 
  shadow_mark()

Rplot01.png

file2d485e471dcf.gif


singlemale2 <- singlemale

#按运动员重新排序维持世界排名第一的第几周
for (i in singlemale2$athlet) {
  singlemale2$week2[singlemale2$athle==i] <- seq(1:length(singlemale2$athlet[singlemale2$athle==i]))
  }

ggplot(singlemale2, aes(x=athlet, y=week2)) +geom_point(aes(color=athlet), size=5) +
   geom_text(aes(y=week2+2, label=week2)) + 
   labs(title = "羽毛球男单世界排名第一（时间：{frame_along}）", x=NULL, y="排名世界第一周数") + 
   guides(color=FALSE)+
   transition_reveal(date)+ shadow_mark(past = TRUE)

file2d484874ec1 (1).gif

虽然添加上周数标签，但是在停顿时，数值会加小数点，等待再次上升。
$\color\red{没办法做成变动的条形图。可能是数据格式整理的问题，即每个时间点应该包含每个类别的数据。}$

ggplot(singlemale2, aes(x=athlet, y=week2)) +geom_point(aes(color=athlet), size=3) + 
   labs(title = "羽毛球男单世界排名第一（时间：{frame_time}）", x=NULL, y="排名世界第一周数") + 
   guides(color=FALSE)+
   transition_time(date)+ shadow_mark()

file2d483d0b4465.gif

用shadow_mark()解决了显示上升轨迹的路径问题。
$\color\red{但是添加变化的周数标签问题没有解决。}$

没完成的想法


# 运动员姓名唯一化
athlet4 <- unique(singleman$athlet)

# 按日期长度重复复制，保证每个日期都有一组运动员姓名。
athlet4 <- rep(athlet4, 876)
# 日期按运动员个数重复复制，保证每个运动员都有一组日期。
date2 <- rep(seq.Date(from = as.Date("2002-11-07"), to = as.Date("2019-08-21"), by="week"),10)
nweek <- rep(seq(1:876)*10)
# 按日期排序
date3 <- sort(date2)

library(tidyverse)
singlemale3 <- data.frame(athlet4, date3)
names(singlemale3) <- c("athlet","date")
singlemale3 <- left_join(singlemale3, singleman, by="athlet")
# 结果发现，合并数据框后，前面的处理变得无效，每个日期下运动员姓名还是有重复的。

#按运动员姓名和日期删除重复数据。
singlemale3 <- singlemale3[!duplicated(singlemale3[,c(1,2)]),]