美文网首页R语言做图R plot
R可视化:基础图形可视化(二)

R可视化:基础图形可视化(二)

作者: 生信学习者2 | 来源:发表于2021-01-30 10:49 被阅读0次

    基础图形可视化

    数据分析的图形可视化是了解数据分布、波动和相关性等属性必不可少的手段。本文基于参考资料收集其可视化代码,如想了解更多,请查看参考链接。更多知识分享请到 https://zouhua.top/

    边界散点图(Scatterplot With Encircling)

    library(ggplot2)
    library(ggalt)
    midwest_select <- midwest[midwest$poptotal > 350000 & 
                                midwest$poptotal <= 500000 & 
                                midwest$area > 0.01 & 
                                midwest$area < 0.1, ]
    
    ggplot(midwest, aes(x=area, y=poptotal)) + 
      geom_point(aes(col=state, size=popdensity)) +   # draw points
      geom_smooth(method="loess", se=F) + 
      xlim(c(0, 0.1)) + 
      ylim(c(0, 500000)) +   # draw smoothing line
      geom_encircle(aes(x=area, y=poptotal), 
                    data=midwest_select, 
                    color="red", 
                    size=2, 
                    expand=0.08) +   # encircle
      labs(subtitle="Area Vs Population", 
           y="Population", 
           x="Area", 
           title="Scatterplot + Encircle", 
           caption="Source: midwest")
    

    边缘箱图/直方图(Marginal Histogram / Boxplot)

    2、边缘箱图/直方图(Marginal Histogram / Boxplot)

    library(ggplot2)
    library(ggExtra)
    data(mpg, package="ggplot2")
    
    theme_set(theme_bw()) 
    mpg_select <- mpg[mpg$hwy >= 35 & mpg$cty > 27, ]
    g <- ggplot(mpg, aes(cty, hwy)) + 
      geom_count() + 
      geom_smooth(method="lm", se=F)
    
    ggMarginal(g, type = "histogram", fill="transparent")
    #ggMarginal(g, type = "boxplot", fill="transparent")
    

    拟合散点图

    library(ggplot2)
    theme_set(theme_bw()) 
    data("midwest")
    
    ggplot(midwest, aes(x=area, y=poptotal)) + 
      geom_point(aes(col=state, size=popdensity)) + 
      geom_smooth(method="loess", se=F) + 
      xlim(c(0, 0.1)) + 
      ylim(c(0, 500000)) + 
      labs(subtitle="Area Vs Population", 
           y="Population", 
           x="Area", 
           title="Scatterplot", 
           caption = "Source: midwest")
    

    相关系数图(Correlogram)

    library(ggplot2)
    library(ggcorrplot)
    
    data(mtcars)
    corr <- round(cor(mtcars), 1)
    
    ggcorrplot(corr, hc.order = TRUE, 
               type = "lower", 
               lab = TRUE, 
               lab_size = 3, 
               method="circle", 
               colors = c("tomato2", "white", "springgreen3"), 
               title="Correlogram of mtcars", 
               ggtheme=theme_bw)
    

    水平发散型文本(Diverging Texts)

    library(ggplot2)
    library(dplyr)
    library(tibble)
    theme_set(theme_bw())  
    
    # Data Prep
    data("mtcars")
    
    plotdata <- mtcars %>% rownames_to_column("car_name") %>%
      mutate(mpg_z=round((mpg - mean(mpg))/sd(mpg), 2),
             mpg_type=ifelse(mpg_z < 0, "below", "above")) %>%
      arrange(mpg_z)
    plotdata$car_name <- factor(plotdata$car_name, 
                                levels = as.character(plotdata$car_name))
    
    ggplot(plotdata, aes(x=car_name, y=mpg_z, label=mpg_z)) + 
      geom_bar(stat='identity', aes(fill=mpg_type), width=.5)  +
      scale_fill_manual(name="Mileage", 
                        labels = c("Above Average", "Below Average"), 
                        values = c("above"="#00ba38", "below"="#f8766d")) + 
      labs(subtitle="Normalised mileage from 'mtcars'", 
           title= "Diverging Bars") + 
      coord_flip()
    

    水平棒棒糖图(Diverging Lollipop Chart)

    ggplot(plotdata, aes(x=car_name, y=mpg_z, label=mpg_z)) + 
      geom_point(stat='identity', fill="black", size=6)  +
      geom_segment(aes(y = 0, 
                       x = car_name, 
                       yend = mpg_z, 
                       xend = car_name), 
                   color = "black") +
      geom_text(color="white", size=2) +
      labs(title="Diverging Lollipop Chart", 
           subtitle="Normalized mileage from 'mtcars': Lollipop") + 
      ylim(-2.5, 2.5) +
      coord_flip()
    

    去棒棒糖图(Diverging Dot Plot)

    ggplot(plotdata, aes(x=car_name, y=mpg_z, label=mpg_z)) + 
      geom_point(stat='identity', aes(col=mpg_type), size=6)  +
      scale_color_manual(name="Mileage", 
                         labels = c("Above Average", "Below Average"), 
                         values = c("above"="#00ba38", "below"="#f8766d")) + 
      geom_text(color="white", size=2) +
      labs(title="Diverging Dot Plot", 
           subtitle="Normalized mileage from 'mtcars': Dotplot") + 
      ylim(-2.5, 2.5) +
      coord_flip()
    

    面积图(Area Chart)

    library(ggplot2)
    library(quantmod)
    data("economics", package = "ggplot2")
    
    economics$returns_perc <- c(0, diff(economics$psavert)/economics$psavert[-length(economics$psavert)])
    
    brks <- economics$date[seq(1, length(economics$date), 12)]
    lbls <- lubridate::year(economics$date[seq(1, length(economics$date), 12)])
    
    ggplot(economics[1:100, ], aes(date, returns_perc)) + 
      geom_area() + 
      scale_x_date(breaks=brks, labels=lbls) + 
      theme(axis.text.x = element_text(angle=90)) + 
      labs(title="Area Chart", 
           subtitle = "Perc Returns for Personal Savings", 
           y="% Returns for Personal savings", 
           caption="Source: economics")
    

    排序条形图(Ordered Bar Chart)

    cty_mpg <- aggregate(mpg$cty, by=list(mpg$manufacturer), FUN=mean)  
    colnames(cty_mpg) <- c("make", "mileage") 
    cty_mpg <- cty_mpg[order(cty_mpg$mileage), ]  
    cty_mpg$make <- factor(cty_mpg$make, levels = cty_mpg$make)  
    
    library(ggplot2)
    theme_set(theme_bw())
    
    ggplot(cty_mpg, aes(x=make, y=mileage)) + 
      geom_bar(stat="identity", width=.5, fill="tomato3") + 
      labs(title="Ordered Bar Chart", 
           subtitle="Make Vs Avg. Mileage", 
           caption="source: mpg") + 
      theme(axis.text.x = element_text(angle=65, vjust=0.6))
    

    直方图(Histogram)

    library(ggplot2)
    theme_set(theme_classic())
    
    g <- ggplot(mpg, aes(displ)) + scale_fill_brewer(palette = "Spectral")
    
    g + geom_histogram(aes(fill=class), 
                       binwidth = .1, 
                       col="black", 
                       size=.1) +  # change binwidth
      labs(title="Histogram with Auto Binning", 
           subtitle="Engine Displacement across Vehicle Classes")  
    
    g + geom_histogram(aes(fill=class), 
                       bins=5, 
                       col="black", 
                       size=.1) +   # change number of bins
      labs(title="Histogram with Fixed Bins", 
           subtitle="Engine Displacement across Vehicle Classes")
    
    library(ggplot2)
    theme_set(theme_classic())
    
    g <- ggplot(mpg, aes(manufacturer))
    g + geom_bar(aes(fill=class), width = 0.5) + 
      theme(axis.text.x = element_text(angle=65, vjust=0.6)) + 
      labs(title="Histogram on Categorical Variable", 
           subtitle="Manufacturer across Vehicle Classes") 
    

    核密度图(Density plot)

    library(ggplot2)
    theme_set(theme_classic())
    
    g <- ggplot(mpg, aes(cty))
    g + geom_density(aes(fill=factor(cyl)), alpha=0.8) + 
        labs(title="Density plot", 
             subtitle="City Mileage Grouped by Number of cylinders",
             caption="Source: mpg",
             x="City Mileage",
             fill="# Cylinders")
    

    点图结合箱图(Dot + Box Plot)

    library(ggplot2)
    theme_set(theme_bw())
    
    # plot
    g <- ggplot(mpg, aes(manufacturer, cty))
    g + geom_boxplot() + 
      geom_dotplot(binaxis='y', 
                   stackdir='center', 
                   dotsize = .5, 
                   fill="red") +
      theme(axis.text.x = element_text(angle=65, vjust=0.6)) + 
      labs(title="Box plot + Dot plot", 
           subtitle="City Mileage vs Class: Each dot represents 1 row in source data",
           caption="Source: mpg",
           x="Class of Vehicle",
           y="City Mileage")
    

    小提琴图(Violin Plot)

    library(ggplot2)
    theme_set(theme_bw())
    
    # plot
    g <- ggplot(mpg, aes(class, cty))
    g + geom_violin() + 
      labs(title="Violin plot", 
           subtitle="City Mileage vs Class of vehicle",
           caption="Source: mpg",
           x="Class of Vehicle",
           y="City Mileage")
    

    饼图

    library(ggplot2)
    theme_set(theme_classic())
    
    # Source: Frequency table
    df <- as.data.frame(table(mpg$class))
    colnames(df) <- c("class", "freq")
    pie <- ggplot(df, aes(x = "", y=freq, fill = factor(class))) + 
      geom_bar(width = 1, stat = "identity") +
      theme(axis.line = element_blank(), 
            plot.title = element_text(hjust=0.5)) + 
      labs(fill="class", 
           x=NULL, 
           y=NULL, 
           title="Pie Chart of class", 
           caption="Source: mpg")
    
    pie + coord_polar(theta = "y", start=0)
    

    时间序列图(Time Series多图)

    ## From Timeseries object (ts)
    library(ggplot2)
    library(ggfortify)
    theme_set(theme_classic())
    
    # Plot 
    autoplot(AirPassengers) + 
      labs(title="AirPassengers") + 
      theme(plot.title = element_text(hjust=0.5))
    
    library(ggplot2)
    theme_set(theme_classic())
    
    # Allow Default X Axis Labels
    ggplot(economics, aes(x=date)) + 
      geom_line(aes(y=returns_perc)) + 
      labs(title="Time Series Chart", 
           subtitle="Returns Percentage from 'Economics' Dataset", 
           caption="Source: Economics", 
           y="Returns %")
    
    data(economics_long, package = "ggplot2")
    library(ggplot2)
    library(lubridate)
    theme_set(theme_bw())
    
    df <- economics_long[economics_long$variable %in% c("psavert", "uempmed"), ]
    df <- df[lubridate::year(df$date) %in% c(1967:1981), ]
    
    # labels and breaks for X axis text
    brks <- df$date[seq(1, length(df$date), 12)]
    lbls <- lubridate::year(brks)
    
    # plot
    ggplot(df, aes(x=date)) + 
      geom_line(aes(y=value, col=variable)) + 
      labs(title="Time Series of Returns Percentage", 
           subtitle="Drawn from Long Data format", 
           caption="Source: Economics", 
           y="Returns %", 
           color=NULL) +  # title and caption
      scale_x_date(labels = lbls, breaks = brks) +  # change to monthly ticks and labels
      scale_color_manual(labels = c("psavert", "uempmed"), 
                         values = c("psavert"="#00ba38", "uempmed"="#f8766d")) +  # line color
      theme(axis.text.x = element_text(angle = 90, vjust=0.5, size = 8),  # rotate x axis text
            panel.grid.minor = element_blank())  # turn off minor grid
    

    堆叠面积图(Stacked Area Chart)

    library(ggplot2)
    library(lubridate)
    theme_set(theme_bw())
    
    df <- economics[, c("date", "psavert", "uempmed")]
    df <- df[lubridate::year(df$date) %in% c(1967:1981), ]
    
    # labels and breaks for X axis text
    brks <- df$date[seq(1, length(df$date), 12)]
    lbls <- lubridate::year(brks)
    
    # plot
    ggplot(df, aes(x=date)) + 
      geom_area(aes(y=psavert+uempmed, fill="psavert")) + 
      geom_area(aes(y=uempmed, fill="uempmed")) + 
      labs(title="Area Chart of Returns Percentage", 
           subtitle="From Wide Data format", 
           caption="Source: Economics", 
           y="Returns %") +  # title and caption
      scale_x_date(labels = lbls, breaks = brks) +  # change to monthly ticks and labels
      scale_fill_manual(name="", 
                        values = c("psavert"="#00ba38", "uempmed"="#f8766d")) +  # line color
      theme(panel.grid.minor = element_blank())  # turn off minor grid
    

    分层树形图(Hierarchical Dendrogram)

    library(ggplot2)
    library(ggdendro)
    theme_set(theme_bw())
    
    hc <- hclust(dist(USArrests), "ave")  # hierarchical clustering
    
    # plot
    ggdendrogram(hc, rotate = TRUE, size = 2)
    

    聚类图(Clusters)

    library(ggplot2)
    library(ggalt)
    library(ggfortify)
    theme_set(theme_classic())
    
    # Compute data with principal components ------------------
    df <- iris[c(1, 2, 3, 4)]
    pca_mod <- prcomp(df)  # compute principal components
    
    # Data frame of principal components ----------------------
    df_pc <- data.frame(pca_mod$x, Species=iris$Species)  # dataframe of principal components
    df_pc_vir <- df_pc[df_pc$Species == "virginica", ]  # df for 'virginica'
    df_pc_set <- df_pc[df_pc$Species == "setosa", ]  # df for 'setosa'
    df_pc_ver <- df_pc[df_pc$Species == "versicolor", ]  # df for 'versicolor'
     
    # Plot ----------------------------------------------------
    ggplot(df_pc, aes(PC1, PC2, col=Species)) + 
      geom_point(aes(shape=Species), size=2) +   # draw points
      labs(title="Iris Clustering", 
           subtitle="With principal components PC1 and PC2 as X and Y axis",
           caption="Source: Iris") + 
      coord_cartesian(xlim = 1.2 * c(min(df_pc$PC1), max(df_pc$PC1)), 
                      ylim = 1.2 * c(min(df_pc$PC2), max(df_pc$PC2))) +   # change axis limits
      geom_encircle(data = df_pc_vir, aes(x=PC1, y=PC2)) +   # draw circles
      geom_encircle(data = df_pc_set, aes(x=PC1, y=PC2)) + 
      geom_encircle(data = df_pc_ver, aes(x=PC1, y=PC2))
    

    气泡图

    # Libraries
    library(ggplot2)
    library(dplyr)
    library(plotly)
    library(viridis)
    library(hrbrthemes)
    
    # The dataset is provided in the gapminder library
    library(gapminder)
    data <- gapminder %>% filter(year=="2007") %>% dplyr::select(-year)
    
    # Interactive version
    p <- data %>%
      mutate(gdpPercap=round(gdpPercap,0)) %>%
      mutate(pop=round(pop/1000000,2)) %>%
      mutate(lifeExp=round(lifeExp,1)) %>%
      
      # Reorder countries to having big bubbles on top
      arrange(desc(pop)) %>%
      mutate(country = factor(country, country)) %>%
      
      # prepare text for tooltip
      mutate(text = paste("Country: ", country, "\nPopulation (M): ", pop, "\nLife Expectancy: ", lifeExp, "\nGdp per capita: ", gdpPercap, sep="")) %>%
      
      # Classic ggplot
      ggplot( aes(x=gdpPercap, y=lifeExp, size = pop, color = continent, text=text)) +
        geom_point(alpha=0.7) +
        scale_size(range = c(1.4, 19), name="Population (M)") +
        scale_color_viridis(discrete=TRUE, guide=FALSE) +
        theme_ipsum() +
        theme(legend.position="none")
    
    # turn ggplot interactive with plotly
    pp <- ggplotly(p, tooltip="text")
    pp
    

    参考

    1. R可视化19|ggplot2绘制常用30+个靓图(附R code)

    参考文章如引起任何侵权问题,可以与我联系,谢谢。

    相关文章

      网友评论

        本文标题:R可视化:基础图形可视化(二)

        本文链接:https://www.haomeiwen.com/subject/ggmhtltx.html