DAY 4

作者: Peng_001 | 来源:发表于2020-05-03 23:54 被阅读0次

    参考:datacamp

    dplyr package

    1. 通过filter 筛选符合某种情况的数据集,多个条件可以用逗号隔开
    library(gapminder)
    library(dplyr)
    
    # Filter the gapminder dataset for the year 1957
    gapminder %>%
      filter(year == "1957")
    

    通过%in% 可以通过多个条件筛选。

    # Filter for the names Steven, Thomas, and Matthew 
    selected_names <- babynames %>%
      filter(name %in% c("Steven", "Thomas", "Matthew"))
    
    1. arrange verb

    通过arrange 排列数据集

    # 按lifeExp递增排
    gapminder %>%
      arrange(lifeExp)
    

    或者

    # 按lifeExp递减排
    gapminder %>%
      arrange(desc(lifeExp))
    

    还可以同时使用多个verb

    gapminder %>%
      filter(year == "1957") %>%
        arrange(desc(pop))
    
    1. mutate

    通过mutate 处理变量数据。(增加、修改、删减)

    library(gapminder)
    library(dplyr)
    
    # Use mutate to change lifeExp to be in months
    gapminder %>%
      mutate(lifeExp = 12 * lifeExp)
    
    # Use mutate to create a new column called lifeExpMonths
    gapminder %>%
      mutate(lifeExpMonths = 12 * lifeExp)
    
    1. summerize
      用于计算列表内数据的相关内容,如sum(), mean(),median,min, max
    library(gapminder)
    library(dplyr)
    
    # Summarize to find the median life expectancy
    gapminder %>%
      summarize(medianLifeExp = median(lifeExp))
    

    其他的summarize 函数中的运算。

    1. group_by
      可以用分组将其和summarize巧妙结合起来。
      如依据year, continent 将数据分类,接着再在分类过的数据中找出平均、总、或者中间值等。
    library(gapminder)
    library(dplyr)
    
    # Find median life expectancy and maximum GDP per capita in each year
    gapminder %>%
      group_by(year) %>%
      summarize(medianLifeExp = median(lifeExp), maxGdpPercap = max(gdpPercap))
    

    ggplot2 包

    如下表示,表格gapminder_1952,以pop为x轴,gdpPercap 为y轴,构建散点图

    ggplot(gapminder_1952, aes(x = pop, y = gdpPercap)) +
      geom_point()
    

    输出


    1. 调整数值的单位

    如将x轴数值单位取lg。scale_x_log10

    library(gapminder)
    library(dplyr)
    library(ggplot2)
    
    gapminder_1952 <- gapminder %>%
      filter(year == 1952)
    
    # Change this plot to put the x-axis on a log scale
    ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) +
      geom_point() + scale_x_log10()
    
    1. 增加额外的显示(借助颜色表现更多分类)


    # Add the size aesthetic to represent a country's gdpPercap
    ggplot(gapminder_1952, aes(x = pop, y = lifeExp, color = continent, size = gdpPercap)) +
      geom_point() +
      scale_x_log10()
    
    1. 通过Faceting,将图片数据切分为多个图片
      facet_wrap(~ name)
    library(gapminder)
    library(dplyr)
    library(ggplot2)
    
    # Scatter plot comparing gdpPercap and lifeExp, with color representing continent
    # and size representing population, faceted by year
    ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) +
      geom_point() +
      scale_x_log10() +
      facet_wrap(~ year)
    
    1. expand_limits
      设定图表中起始点
      expand_limits(y = 0),表示坐标轴从y = 0 开始。

    结合gapminder 与ggplot2

    library(gapminder)
    library(dplyr)
    library(ggplot2)
    
    by_year <- gapminder %>%
      group_by(year) %>%
      summarize(medianLifeExp = median(lifeExp),
                maxGdpPercap = max(gdpPercap))
    
    # Create a scatter plot showing the change in medianLifeExp over time
    ggplot(by_year, aes(x = year, y = medianLifeExp)) +
      expand_limits(y = 0) +
      geom_point()
    

    再复杂一点的例子

    library(gapminder)
    library(dplyr)
    library(ggplot2)
    
    # Summarize medianGdpPercap within each continent within each year: by_year_continent
    by_year_continent <- gapminder %>%
      group_by(continent, year) %>%
      summarize(medianGdpPercap = median(gdpPercap))
    
    # Plot the change in medianGdpPercap in each continent over time
    ggplot(by_year_continent, aes(x = year, y = medianGdpPercap,color = continent)) + geom_point() + expand_limits(y = 0)
    

    折线图

    除了散点图外,ggplot 还可以做其他类型的图


    直接将geom_point更改为geom_line即可
    例如
    library(gapminder)
    library(dplyr)
    library(ggplot2)
    
    # Summarize the median gdpPercap by year & continent, save as by_year_continent
    by_year_continent <- gapminder %>%
      group_by(year, continent) %>%
      summarize(medianGdpPercap = median(gdpPercap))
    
    # Create a line plot showing the change in medianGdpPercap over time
    ggplot(by_year_continent, aes(x = year, y = medianGdpPercap, color = continent)) + expand_limits(y = 0) + geom_line()
    

    柱状图

    也就是geom_col()
    例子

    library(gapminder)
    library(dplyr)
    library(ggplot2)
    
    # Summarize the median gdpPercap by continent in 1952
    by_continent <- gapminder %>%
      filter(year == "1952") %>%
      group_by(continent) %>%
      summarize(medianGdpPercap = median(gdpPercap))
    
    ggplot(by_continent, aes(x = continent, y = medianGdpPercap)) + geom_col()
    
    

    histogram 直方图

    直方图一般会默认y轴数据,计算为count(对应数值大小)。其他则同理,也就是geom_histogram()

    ggplot(gapminder_1952, aes(x = pop_by_mil)) + geom_histogram(bins = 50)
    

    箱形图

    相似的,也就是geom_boxplot()

    library(gapminder)
    library(dplyr)
    library(ggplot2)
    
    gapminder_1952 <- gapminder %>%
      filter(year == 1952)
    
    # Add a title to this graph: "Comparing GDP per capita across continents"
    ggplot(gapminder_1952, aes(x = continent, y = gdpPercap)) +
      geom_boxplot() +
      scale_y_log10() +
      ggtitle("Comparing GDP per capita across continents")
    

    通过ggtitle(" ")可以为图像设置标题。


    总结


    相关文章

      网友评论

          本文标题:DAY 4

          本文链接:https://www.haomeiwen.com/subject/nizcghtx.html