作者: 桓峰基因 | 来源:发表于2022-09-04 17:39



FigDraw 19. SCI 文章中绘图之坡度图(Slope Chart)

前 言


坡度图(Slope Chart)可以高效地可视化。同一个核心指标随着时间推移的变化情况。


目前,还没有现成的构建函数来绘制坡度图。我们可以利用gglot2 及扩展包来解决这个问题。简单安装一个ggalt 软件包:



这里我们选取《R语言数据可视化之美》这本书里面的两个例子,以及来之r-statistics 的一个例子。

1. 两年份对比



df1 <- read.csv("Slopecharts_Data1.csv")
colnames(df1) <- c("continent", "1952", "1957")
left_label <- paste(df1$continent, round(df1$`1952`), sep = ", ")
right_label <- paste(df1$continent, round(df1$`1957`), sep = ", ")
df1$class <- ifelse((df1$`1957` - df1$`1952`) < 0, "red", "green")

## continent 1952 1957 class
## 1 Argentina 67 74 green
## 2 Bangladesh 54 53 red
## 3 Brazil 62 68 green
## 4 Canada 73 80 green
## 5 China 68 72 green
## 6 Egypt 60 61 green

2. 多年份对比



df2 <- read.csv("Slopecharts_Data2.csv")
colnames(df2) <- c("continent", 2007:2013)

dfm <- melt(df2, id = "continent")

dfm$value <- as.numeric(dfm$value)
dfm$variable <- as.numeric(dfm$variable)

left_label <- paste(dfm$continent, round(dfm$value), sep = ", ")
right_label <- paste(dfm$continent, round(dfm$value), sep = ", ")

left_point <- dfm$value
right_point <- dfm$value
class <- dfm$variable

for (i in 1:nrow(dfm)) {
if (dfm$variable[i] != 1) {
left_label[i] <- ""
left_point[i] <- NaN
if (dfm$variable[i] != 7) {
right_label[i] <- ""
right_point[i] <- NaN

if (df2[df2$continent == dfm$continent[i], 2] > df2[df2$continent == dfm$continent[i],
8]) {
class[i] <- "green"
} else {
class[i] <- "red"


## continent variable value
## 1 Germany 1 2428500
## 2 United Kingdom 1 2054238
## 3 France 1 1886792
## 4 Italy 1 1554199
## 5 Spain 1 1053161
## 6 Netherlands 1 571773

3. 癌症的生存比例



source_df <- read.csv("cancer_survival_rates.csv")
## group year value
## 1 Oral cavity 5 56.7
## 2 Oesophagus 5 14.2
## 3 Stomach 5 23.8
## 4 Colon 5 61.7
## 5 Rectum 5 62.6
## 6 Liver and intrahepatic bile duct 5 7.5
# Define functions. Source: https://github.com/jkeirstead/r-slopegraph
tufte_sort <- function(df, x = "year", y = "value", group = "group", method = "tufte",
min.space = 0.05) {
## First rename the columns for consistency
ids <- match(c(x, y, group), names(df))
df <- df[, ids]
names(df) <- c("x", "y", "group")

## Expand grid to ensure every combination has a defined value
tmp <- expand.grid(x = unique(df$x), group = unique(df$group))
tmp <- merge(df, tmp, all.y = TRUE)
df <- mutate(tmp, y = ifelse(is.na(y), 0, y))

## Cast into a matrix shape and arrange by first column
tmp <- dcast(df, group ~ x, value.var = "y")
ord <- order(tmp[, 2])
tmp <- tmp[ord, ]

min.space <- min.space * diff(range(tmp[, -1]))
yshift <- numeric(nrow(tmp))
## Start at 'bottom' row Repeat for rest of the rows until you hit the top
for (i in 2:nrow(tmp)) {
## Shift subsequent row up by equal space so gap between two entries is
## >= minimum
mat <- as.matrix(tmp[(i - 1):i, -1])
d.min <- min(diff(mat))
yshift[i] <- ifelse(d.min < min.space, min.space - d.min, 0)

tmp <- cbind(tmp, yshift = cumsum(yshift))

scale <- 1
tmp <- melt(tmp, id = c("group", "yshift"), variable.name = "x", value.name = "y")
## Store these gaps in a separate variable so that they can be scaled ypos
## = a*yshift + y

tmp <- transform(tmp, ypos = y + scale * yshift)




1. 两年份对比

p <- ggplot(df1) + 
geom_segment(aes(x=1, xend=2, y=`1952`, yend=`1957`, col=class), size=.75, show.legend=F) + #连接线
geom_vline(xintercept=1, linetype="solid", size=.1) + # 1952年的垂直直线
geom_vline(xintercept=2, linetype="solid", size=.1) + # 1957年的垂直直线
geom_point(aes(x=1, y=`1952`), size=3,shape=21,fill="grey80",color="black") + # 1952年的数据点
geom_point(aes(x=2, y=`1957`), size=3,shape=21,fill="grey80",color="black") + # 1957年的数据点
scale_color_manual(labels = c("Up", "Down"), values = c("green"="#A6D854","red"="#FC4E07")) +
xlim(.5, 2.5)
# 添加文本信息
p <- p + geom_text(label=left_label, y=df1$`1952`, x=rep(1, NROW(df1)), hjust=1.1, size=3.5)
p <- p + geom_text(label=right_label, y=df1$`1957`, x=rep(2, NROW(df1)), hjust=-0.1, size=3.5)
p <- p + geom_text(label="1952", x=1, y=1.02*(max(df1$`1952`, df1$`1957`)), hjust=1.2, size=5)
p <- p + geom_text(label="1957", x=2, y=1.02*(max(df1$`1952`, df1$`1957`)), hjust=-0.1, size=5)


2. 多年份对比

p <- ggplot(dfm) + geom_xspline(aes(x = variable, y = value, group = continent, colour = class),
size = 0.75) + geom_vline(xintercept = 1, linetype = "solid", size = 0.1) + geom_vline(xintercept = 7,
linetype = "solid", size = 0.1) + geom_point(aes(x = variable, y = left_point),
size = 3, shape = 21, fill = "grey80", color = "black") + geom_point(aes(x = variable,
y = right_point), size = 3, shape = 21, fill = "grey80", color = "black") + scale_color_manual(labels = c("Up",
"Down"), values = c(green = "#FC4E07", red = "#A6D854")) + xlim(-4, 12)

p <- p + geom_text(label = left_label, y = dfm$value, x = rep(1, NROW(dfm)), hjust = 1.1,
size = 3.5)
p <- p + geom_text(label = right_label, y = dfm$value, x = rep(7, NROW(dfm)), hjust = -0.1,
size = 3.5)
p <- p + geom_text(label = "2007", x = 1, y = 1.02 * (max(df2$value)), hjust = 1.2,
size = 5) # title
p <- p + geom_text(label = "2013", x = 7, y = 1.02 * (max(df2$value)), hjust = -0.1,
size = 5) # title

3. 癌症的生存比例

## Plot
plot_slopegraph(df) + labs(title = "Estimates of % survival rates") + theme(axis.title = element_blank(),
axis.ticks = element_blank(), plot.title = element_text(hjust = 0.5, family = "American Typewriter",
face = "bold"), axis.text = element_text(family = "American Typewriter",
face = "bold")) + theme_classic()



  1. 张杰. 《R语言数据可视化之美》

