数据主题:Rap Artists
数据源:
https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-14/polls.csv
这次挑的图比较简单,比较有意思的就是纯黑背景
实际就是封装成一个函数可以单独调用
前置知识
- 黑色主题
黑色主题代码:Black theme for ggplot2
主题代码和简单示例如下:
theme_black = function(base_size = 12, base_family = "") {
theme_grey(base_size = base_size, base_family = base_family) %+replace%
theme(
# Specify axis options
axis.line = element_blank(),
axis.text.x = element_text(size = base_size*0.8, color = "white", lineheight = 0.9),
axis.text.y = element_text(size = base_size*0.8, color = "white", lineheight = 0.9),
axis.ticks = element_line(color = "white", size = 0.2),
axis.title.x = element_text(size = base_size, color = "white", margin = margin(0, 10, 0, 0)),
axis.title.y = element_text(size = base_size, color = "white", angle = 90, margin = margin(0, 10, 0, 0)),
axis.ticks.length = unit(0.3, "lines"),
# Specify legend options
legend.background = element_rect(color = NA, fill = "black"),
legend.key = element_rect(color = "white", fill = "black"),
legend.key.size = unit(1.2, "lines"),
legend.key.height = NULL,
legend.key.width = NULL,
legend.text = element_text(size = base_size*0.8, color = "white"),
legend.title = element_text(size = base_size*0.8, face = "bold", hjust = 0, color = "white"),
legend.position = "right",
legend.text.align = NULL,
legend.title.align = NULL,
legend.direction = "vertical",
legend.box = NULL,
# Specify panel options
panel.background = element_rect(fill = "black", color = NA),
panel.border = element_rect(fill = NA, color = "white"),
panel.grid.major = element_line(color = "grey35"),
panel.grid.minor = element_line(color = "grey20"),
panel.spacing = unit(0.5, "lines"),
# Specify facetting options
strip.background = element_rect(fill = "grey30", color = "grey10"),
strip.text.x = element_text(size = base_size*0.8, color = "white"),
strip.text.y = element_text(size = base_size*0.8, color = "white",angle = -90),
# Specify plot options
plot.background = element_rect(color = "black", fill = "black"),
plot.title = element_text(size = base_size*1.2, color = "white"),
plot.margin = unit(rep(1, 4), "lines")
)
}
library(patchwork)
p1 <- ggplot(mtcars, aes(wt, mpg)) + geom_point()
p2 <- ggplot(mtcars, aes(wt, mpg)) + geom_point(color = "white") + theme_black()
p3 <- iris %>%
ggplot(aes(Sepal.Length,Petal.Length,color=Species)) +
geom_point() +
theme(
legend.position = c(0.85,0.18)
)
p4 <- iris %>%
ggplot(aes(Sepal.Length,Petal.Length,color=Species)) +
geom_point() +
theme_black() +
theme(
legend.position = c(0.85,0.18)
)
(p1 + p3)/(p2 + p4)
- gghighlight
gghighlight()
can highlight almost any geoms.
本质就是把感兴趣的点highlight
我觉得理解成非感兴趣的点unhighlight可能更好
对示例简单修改后如下:
# 构建数据
dat <- data.frame(
idx = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
value = c(1, 2, 3, 10, 11, 12, 9, 10, 11),
category = rep(c("apple", "banana", "strawberry"), 3),
stringsAsFactors = FALSE
)
- (1) 各分组最大值符合条件则highlight该分组
p1 <- ggplot(dat, aes(idx, value, colour = category)) +
geom_line() +
gghighlight(max(value) > 10)
可以看到gghighlight会设置打标签
本质就是ggrepel::geom_label_repel()
,所以可以进行个性化调整
- (2) 分组后按数据筛选感兴趣的点
ggplot(dat, aes(idx, value)) +
geom_point() +
gghighlight(value > 2, label_key = category)
依然会对highlight的点打上标签
但如果点过多会有如下warning并且不打标签,上限应该是10个
label_key: category
Too many data points, skip labeling
- (2) 可以调节非highlight数据的参数:
unhighlighted_params
就是标识但弱化非感兴趣数据的存在,举两个例子
p1 <- ggplot(dat, aes(idx, value, colour = category)) +
geom_line(size = 4) +
gghighlight(max(value) > 10,
unhighlighted_params = list(size = 2)
)
p2 <- ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(fill=class),shape=21,color='white',size=2.5,stroke=1) +
gghighlight::gghighlight(hwy > 19,
unhighlighted_params = list(size = 1))
p1 + p2
画图
- 第一张图:歌曲年份和排名的散点图
# 数据源
polls <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-14/polls.csv')
# 注册字体
windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
RMN=windowsFont("Times New Roman"),
ARL=windowsFont("Arial"),
JBM=windowsFont("JetBrains Mono"))
polls %>%
# 自定义扩大了数据范围利于结果呈现
mutate(points = case_when(rank == 1 ~ 10,
rank == 2 ~ 8,
rank == 3 ~ 6,
rank == 4 ~ 4,
rank == 5 ~ 2)) %>%
group_by(title, year, gender) %>%
summarise(sum_points = sum(points)) %>%
# arrange(-var) 等价于 arrange(desc(var))
arrange(-sum_points) %>%
ggplot(aes(year, sum_points)) +
geom_point(position = "jitter") +
# 黑色背景下highlight无论是感兴趣还是非兴趣的数据点颜色均会反转成可见
gghighlight::gghighlight(sum_points > 90, label_key = title,
# 这里原作者没有修改
label_params = list(fill=NA, colour="white",
size = 5, family='ARL',
segment.colour = NA)) +
labs(title = "Top Rated Songs",
x = "Year",
y= "Total Points") +
# 加上黑色主题函数
theme_black() +
# 如果需要调整各参数需要在主题背景之后设置否则会被覆盖
theme(
text = element_text(family="ARL"),
plot.title = element_text(size = 20,vjust = 2),
axis.title.x = element_text(size = 16),
axis.title.y = element_text(size = 16),
axis.text.x = element_text(size = 13),
axis.text.y = element_text(size = 13)
)
- 第二张图:探究歌曲排名与歌手合作的关系
res <- polls %>%
# 利用正则和文本检测寻找特定符号 ft 和 & 明确有无合作
mutate(first_artist = gsub( " ft.*$", "", artist ),
first_artist = gsub( " &.*$", "", first_artist ),
collaboration = case_when(str_detect(artist, "ft") ~ "Yes",
str_detect(artist, "&") ~ "Yes",
TRUE ~ "No"),
points = case_when(rank == 1 ~ 10,
rank == 2 ~ 8,
rank == 3 ~ 6,
rank == 4 ~ 4,
rank == 5 ~ 2)) %>%
# 根据歌手和合作与否分别计算平均歌曲得分
group_by(first_artist, collaboration) %>%
summarise(sum_points = sum(points),
n = n(),
avg_points = sum_points/n) %>%
group_by(first_artist) %>%
# 等价于 filter(n() == 2),筛选出有无合作均发过歌曲的歌手
filter(n() > 1) %>%
arrange(-avg_points)
res %>%
ggplot(aes(avg_points, first_artist, color = collaboration)) +
geom_line(aes(group = first_artist)) +
geom_point() +
scale_color_manual(values = c("red", "green"), name = "Collaboration?") +
labs(title = "Do artists get higher rankings when they collaborate?",
y = "",
x = "Average Points Rating") +
theme_black() +
theme(
text = element_text(family="ARL"),
plot.title = element_text(size = 17,vjust = 4),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 11),
axis.text.y = element_text(size = 11),
legend.title = element_text(size = 13),
legend.text = element_text(size = 10)
)
网友评论