林丹和李宗伟:一个时代
library(ggplot2)
singleman <- read.csv("c:/users/liang/desktop/singleman.csv")
singleman$starttime <- as.Date(singleman$starttime)
singleman$endtime <- as.Date(singleman$endtime)
# 计算保持世界第一的周数。
singleman$week3 <- round(as.numeric(difftime(singleman$endtime, singleman$starttime, units = "weeks")))
ggplot(singleman,aes(x=athlet, y=weeks2)) + geom_col(aes(fill=country))
data:image/s3,"s3://crabby-images/92701/927014d67c6402405e801dd79d4b4903a1f3ce1d" alt=""
Rplot02.png
#数据按周数汇总
singleman2 <- aggregate(weeks2 ~ athlet, singleman, sum)
ggplot(singleman2, aes(x=athlet, y=weeks2)) +
geom_col(aes(fill=athlet)) +
geom_text(aes(y=weeks2+5,label=weeks2)) +
labs(x=NULL, y="世界排名第一周数", title="羽毛球男单历年世界排名第一",subtitle="(2002/11/07-2019/08/21)", caption="数据来源:世界羽联 制作:wintryheart") +
theme(plot.title = element_text(hjust=0.5), plot.subtitle = element_text(hjust=0.5), legend.position = "none")
data:image/s3,"s3://crabby-images/16c7f/16c7fa52742d3275b219403ab9cc5bec701b35fb" alt=""
Rplot.png
# 转换数据框,变成时间线排列的一维数据。
date <- seq.Date(from = as.Date("2002-11-07"), to = as.Date("2019-08-21"), by="week")
week <- seq(1:sum(singleman$weeks2))
athlet <- c(rep(singleman$athlet[1], singleman$weeks2[1]))
for (i in 2:20) { athlet <- append(athlet, c(rep(singleman$athlet[i], singleman$weeks2[i])))}
country <- c(rep(singleman$country[1], singleman$weeks2[1]))
for(i in 2:20) {country <- append(country, c(rep(singleman$country[i], singleman$weeks2[i])))}
singlemale <- data.frame(athlet, date, country, week)
library(gganimate)
ggplot(singlemale, aes(x=date,y=athlet)) +
geom_point(aes(color=athlet),size=1)+ scale_x_date(breaks = "1 year", date_labels = "%Y") + labs(x=NULL, y=NULL, title="羽毛球男单历年世界排名第一(2002-2019)",subtitle="(按周计算)", caption="数据来源:世界羽联 制作:wintryheart") + theme(legend.position = "none", plot.title = element_text(hjust = 0.5))
ggplot(singlemale, aes(x=date,y=athlet)) +
geom_point(aes(color=athlet),size=1) +
scale_x_date(breaks = "1 year", date_labels = "%Y") +
labs(x=NULL, y=NULL, title="羽毛球男单历年世界排名第一(2002-2019)",subtitle="(按周计算)", caption="数据来源:世界羽联 制作:wintryheart") +
theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) +
transition_time(time = date) +
shadow_mark()
data:image/s3,"s3://crabby-images/d7bb4/d7bb48988d126c45981ed2341714f046a0f7b491" alt=""
Rplot01.png
data:image/s3,"s3://crabby-images/b7327/b732733fb9e15cff12ab5df67ce768abdecdbc2d" alt=""
file2d485e471dcf.gif
singlemale2 <- singlemale
#按运动员重新排序维持世界排名第一的第几周
for (i in singlemale2$athlet) {
singlemale2$week2[singlemale2$athle==i] <- seq(1:length(singlemale2$athlet[singlemale2$athle==i]))
}
ggplot(singlemale2, aes(x=athlet, y=week2)) +geom_point(aes(color=athlet), size=5) +
geom_text(aes(y=week2+2, label=week2)) +
labs(title = "羽毛球男单世界排名第一(时间:{frame_along})", x=NULL, y="排名世界第一周数") +
guides(color=FALSE)+
transition_reveal(date)+ shadow_mark(past = TRUE)
data:image/s3,"s3://crabby-images/6fe1e/6fe1ebcf4dfa5758b5963415f339359b3913702c" alt=""
file2d484874ec1 (1).gif
- 虽然添加上周数标签,但是在停顿时,数值会加小数点,等待再次上升。
data:image/s3,"s3://crabby-images/94c84/94c842e8e1b9a22abba5a86b63c75e1c1c6536a0" alt="\color\red{没办法做成变动的条形图。可能是数据格式整理的问题,即每个时间点应该包含每个类别的数据。}"
ggplot(singlemale2, aes(x=athlet, y=week2)) +geom_point(aes(color=athlet), size=3) +
labs(title = "羽毛球男单世界排名第一(时间:{frame_time})", x=NULL, y="排名世界第一周数") +
guides(color=FALSE)+
transition_time(date)+ shadow_mark()
data:image/s3,"s3://crabby-images/82fa3/82fa3001d70309135ee70aedbc331bd57ddb6c36" alt=""
file2d483d0b4465.gif
- 用shadow_mark()解决了显示上升轨迹的路径问题。
data:image/s3,"s3://crabby-images/1caa0/1caa09dcd24baef3a4a82520d71a646c8f06c094" alt="\color\red{但是添加变化的周数标签问题没有解决。}"
没完成的想法
# 运动员姓名唯一化
athlet4 <- unique(singleman$athlet)
# 按日期长度重复复制,保证每个日期都有一组运动员姓名。
athlet4 <- rep(athlet4, 876)
# 日期按运动员个数重复复制,保证每个运动员都有一组日期。
date2 <- rep(seq.Date(from = as.Date("2002-11-07"), to = as.Date("2019-08-21"), by="week"),10)
nweek <- rep(seq(1:876)*10)
# 按日期排序
date3 <- sort(date2)
library(tidyverse)
singlemale3 <- data.frame(athlet4, date3)
names(singlemale3) <- c("athlet","date")
singlemale3 <- left_join(singlemale3, singleman, by="athlet")
# 结果发现,合并数据框后,前面的处理变得无效,每个日期下运动员姓名还是有重复的。
#按运动员姓名和日期删除重复数据。
singlemale3 <- singlemale3[!duplicated(singlemale3[,c(1,2)]),]
网友评论