参考:datacamp
dplyr package
- 通过filter 筛选符合某种情况的数据集,多个条件可以用逗号隔开
library(gapminder)
library(dplyr)
# Filter the gapminder dataset for the year 1957
gapminder %>%
filter(year == "1957")
通过%in%
可以通过多个条件筛选。
# Filter for the names Steven, Thomas, and Matthew
selected_names <- babynames %>%
filter(name %in% c("Steven", "Thomas", "Matthew"))
- arrange verb
通过arrange 排列数据集
# 按lifeExp递增排
gapminder %>%
arrange(lifeExp)
或者
# 按lifeExp递减排
gapminder %>%
arrange(desc(lifeExp))
还可以同时使用多个verb
gapminder %>%
filter(year == "1957") %>%
arrange(desc(pop))
- mutate
通过mutate 处理变量数据。(增加、修改、删减)
library(gapminder)
library(dplyr)
# Use mutate to change lifeExp to be in months
gapminder %>%
mutate(lifeExp = 12 * lifeExp)
# Use mutate to create a new column called lifeExpMonths
gapminder %>%
mutate(lifeExpMonths = 12 * lifeExp)
- summerize
用于计算列表内数据的相关内容,如sum(), mean(),median,min, max
如
library(gapminder)
library(dplyr)
# Summarize to find the median life expectancy
gapminder %>%
summarize(medianLifeExp = median(lifeExp))
其他的summarize
函数中的运算。
- group_by
可以用分组将其和summarize巧妙结合起来。
如依据year, continent 将数据分类,接着再在分类过的数据中找出平均、总、或者中间值等。
library(gapminder)
library(dplyr)
# Find median life expectancy and maximum GDP per capita in each year
gapminder %>%
group_by(year) %>%
summarize(medianLifeExp = median(lifeExp), maxGdpPercap = max(gdpPercap))
ggplot2 包
如下表示,表格gapminder_1952,以pop为x轴,gdpPercap 为y轴,构建散点图
ggplot(gapminder_1952, aes(x = pop, y = gdpPercap)) +
geom_point()
输出
- 调整数值的单位
如将x轴数值单位取lg。scale_x_log10
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
# Change this plot to put the x-axis on a log scale
ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) +
geom_point() + scale_x_log10()
-
增加额外的显示(借助颜色表现更多分类)
# Add the size aesthetic to represent a country's gdpPercap
ggplot(gapminder_1952, aes(x = pop, y = lifeExp, color = continent, size = gdpPercap)) +
geom_point() +
scale_x_log10()
- 通过Faceting,将图片数据切分为多个图片
facet_wrap(~ name)
library(gapminder)
library(dplyr)
library(ggplot2)
# Scatter plot comparing gdpPercap and lifeExp, with color representing continent
# and size representing population, faceted by year
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) +
geom_point() +
scale_x_log10() +
facet_wrap(~ year)
- expand_limits
设定图表中起始点
expand_limits(y = 0)
,表示坐标轴从y = 0 开始。
结合gapminder 与ggplot2
如
library(gapminder)
library(dplyr)
library(ggplot2)
by_year <- gapminder %>%
group_by(year) %>%
summarize(medianLifeExp = median(lifeExp),
maxGdpPercap = max(gdpPercap))
# Create a scatter plot showing the change in medianLifeExp over time
ggplot(by_year, aes(x = year, y = medianLifeExp)) +
expand_limits(y = 0) +
geom_point()
再复杂一点的例子
library(gapminder)
library(dplyr)
library(ggplot2)
# Summarize medianGdpPercap within each continent within each year: by_year_continent
by_year_continent <- gapminder %>%
group_by(continent, year) %>%
summarize(medianGdpPercap = median(gdpPercap))
# Plot the change in medianGdpPercap in each continent over time
ggplot(by_year_continent, aes(x = year, y = medianGdpPercap,color = continent)) + geom_point() + expand_limits(y = 0)
折线图
除了散点图外,ggplot 还可以做其他类型的图
直接将
geom_point
更改为geom_line
即可例如
library(gapminder)
library(dplyr)
library(ggplot2)
# Summarize the median gdpPercap by year & continent, save as by_year_continent
by_year_continent <- gapminder %>%
group_by(year, continent) %>%
summarize(medianGdpPercap = median(gdpPercap))
# Create a line plot showing the change in medianGdpPercap over time
ggplot(by_year_continent, aes(x = year, y = medianGdpPercap, color = continent)) + expand_limits(y = 0) + geom_line()
柱状图
也就是geom_col()
例子
library(gapminder)
library(dplyr)
library(ggplot2)
# Summarize the median gdpPercap by continent in 1952
by_continent <- gapminder %>%
filter(year == "1952") %>%
group_by(continent) %>%
summarize(medianGdpPercap = median(gdpPercap))
ggplot(by_continent, aes(x = continent, y = medianGdpPercap)) + geom_col()
histogram 直方图
直方图一般会默认y轴数据,计算为count(对应数值大小)。其他则同理,也就是geom_histogram()
ggplot(gapminder_1952, aes(x = pop_by_mil)) + geom_histogram(bins = 50)
箱形图
相似的,也就是geom_boxplot()
library(gapminder)
library(dplyr)
library(ggplot2)
gapminder_1952 <- gapminder %>%
filter(year == 1952)
# Add a title to this graph: "Comparing GDP per capita across continents"
ggplot(gapminder_1952, aes(x = continent, y = gdpPercap)) +
geom_boxplot() +
scale_y_log10() +
ggtitle("Comparing GDP per capita across continents")
通过ggtitle(" ")可以为图像设置标题。
总结
网友评论