- 原文来自 https://mp.weixin.qq.com/s/dgtli_3m8Mxw7TMRNvtA6A(异常感谢)
- 科比是我们80后每个篮球爱好者喜爱的球员,当年没少追,可惜英年早逝,只能用R来分析一生数据,以此纪念!
- 科比数据下载:
https://share.weiyun.com/5cGUFvK
library(tidyverse)
library(gridExtra)
library(ggplot2)
library(ggpubr)
# Read the stats
shots <- read.csv("data.csv")
投球类型:
# We use a different alpha value for jump shots to improve the visualization
ggplot() +
geom_point(data=shots %>% filter(combined_shot_type=="Jump Shot"),
aes(x=lon, y=lat), colour="grey", alpha=0.3) +
geom_point(data=shots %>% filter(combined_shot_type!="Jump Shot"),
aes(x=lon, y=lat, colour=combined_shot_type), alpha=0.8) +
labs(title="Shot type") +
ylim(c(33.7, 34.0883)) +
theme_void() + theme(legend.title=element_blank(),
plot.title=element_text(hjust=0.5))

投球范围
p2 <- ggplot(shots, aes(x=lon, y=lat)) +
geom_point(aes(color=shot_zone_range)) +
labs(title="Shot zone range") +
ylim(c(33.7, 34.0883)) +
theme_void() +
theme(legend.position="none",
plot.title=element_text(hjust=0.5))
p3 <- ggplot(shots, aes(x=fct_infreq(shot_zone_range))) +
geom_bar(aes(fill=shot_zone_range)) +
labs(y="Frequency") +
theme_bw() +
theme(axis.title.x=element_blank(),
legend.position="none")
ggarrange(p2,p3,ncol=1,labels = "AUTO")

投球区域:
# Shot zone area
p4 <- ggplot(shots, aes(x=lon, y=lat)) +
geom_point(aes(colour=shot_zone_area)) +
labs(title="Shot zone area") +
ylim(c(33.7, 34.0883)) +
theme_void() +
theme(legend.position="none",
plot.title=element_text(hjust=0.5))
p5 <- ggplot(shots, aes(x=fct_infreq(shot_zone_area))) +
geom_bar(aes(fill=shot_zone_area)) +
labs(y="Frequency") +
theme_bw() +
theme(axis.text.x=element_text(size=7),
axis.title.x=element_blank(),
legend.position="none")
ggarrange(p4,p5,ncol=1,labels = "AUTO")

根据场线来划分:
p6 <- ggplot(shots, aes(x=lon, y=lat)) +
geom_point(aes(color=shot_zone_basic)) +
labs(title="Shot zone basic") +
ylim(c(33.7, 34.0883)) +
theme_void() +
theme(legend.position="none",
plot.title=element_text(hjust=0.5))
p7 <- ggplot(shots, aes(x=fct_infreq(shot_zone_basic))) +
geom_bar(aes(fill=shot_zone_basic)) +
labs(y="Frequency") +
theme_bw() +
theme(axis.text.x=element_text(size=6.3),
axis.title.x=element_blank(),
legend.position="none")
ggarrange(p6,p7,ncol=1,labels = "AUTO")

不同投球的方式及其准确性
shots %>%
group_by(action_type) %>%
summarise(Accuracy=mean(shot_made_flag, na.rm = TRUE),
counts=n()) %>%
filter(counts>20) %>%
ggplot(aes(x=reorder(action_type, Accuracy), y=Accuracy)) +
geom_point(aes(colour=Accuracy), size=3) +
scale_colour_gradient(low="orangered", high="chartreuse3") +
labs(title="Accuracy by shot type") +
theme_bw() +
theme(axis.title.y=element_blank(),
legend.position="none",
plot.title=element_text(hjust=0.5)) +
coord_flip()

准确度随着时间变化:
shots %>%
group_by(season) %>%
summarise(Accuracy=mean(shot_made_flag, na.rm = TRUE)) %>%
ggplot(aes(x=season, y=Accuracy, group=1)) +
geom_line(aes(colour=Accuracy)) +
geom_point(aes(colour=Accuracy), size=3) +
scale_colour_gradient(low="orangered", high="chartreuse3") +
labs(title="Accuracy by season", x="Season") +
theme_bw() +
theme(legend.position="none",
axis.text.x=element_text(angle=45, hjust=1),
plot.title=element_text(hjust=0.5))

准确率和赛季变化
shots %>%
group_by(season) %>%
summarise(Playoff=mean(shot_made_flag[playoffs==1], na.rm = TRUE),
RegularSeason=mean(shot_made_flag[playoffs==0], na.rm = TRUE)) %>%
ggplot(aes(x=season, group=1)) +
geom_line(aes(y=Playoff, colour="Playoff")) +
geom_line(aes(y=RegularSeason, colour="RegularSeason")) +
geom_point(aes(y=Playoff, colour="Playoff"), size=3) +
geom_point(aes(y=RegularSeason, colour="RegularSeason"), size=3) +
labs(title="Accuracy by season",
subtitle="Playoff and Regular Season",
x="Season", y="Accuracy") +
theme_bw() +
theme(legend.title=element_blank(),
legend.position="bottom",
axis.text.x=element_text(angle=45, hjust=1),
plot.title=element_text(hjust=0.5),
plot.subtitle=element_text(hjust=0.5))

网友评论