美文网首页
02R语言基础入门

02R语言基础入门

作者: Jachin111 | 来源:发表于2020-09-18 07:52 被阅读0次

    向量
    赋值

    > a <- c(2, 5, 8)
    > a
    [1] 2 5 8
    

    筛选

    > a[1:2]
    [1] 2 5
    > a[a>4]
    [1] 5 8
    > a>4
    [1] FALSE  TRUE  TRUE
    

    合并向量

    > c(a[1], 3, a[2:3], 1)
    [1] 2 3 5 8 1
    

    循环补齐

    > a <- c(3, 4)
    > b <- c(1, 2, 5, 6)
    > a+b
    [1]  4  6  8 10
    

    关于向量的几个函数

    > length(b)
    [1] 4
    > which.max(b)
    [1] 4
    > which(b>3)
    [1] 3 4
    

    矩阵
    本质上来说就是多维向量
    创建

    > a <- matrix(c(1, 2, 3, 4), nrow=2)
    > a
         [,1] [,2]
    [1,]    1    3
    [2,]    2    4
    > a <- matrix(c(1, 2, 3, 4), nrow=2, byrow=TRUE)
    > a
         [,1] [,2]
    [1,]    1    2
    [2,]    3    4
    

    筛选矩阵

    > a[1:2, 2]
    [1] 2 4
    

    线性代数

    > a * a
         [,1] [,2]
    [1,]    1    4
    [2,]    9   16
    > a %*% a
         [,1] [,2]
    [1,]    7   10
    [2,]   15   22
    

    矩阵相关函数

    > t(a)
         [,1] [,2]
    [1,]    1    3
    [2,]    2    4
    > solve(a)
         [,1] [,2]
    [1,] -2.0  1.0
    [2,]  1.5 -0.5
    

    数据框
    可以有不同的数据类型

    > data("iris")
    > head(iris)
      Sepal.Length Sepal.Width Petal.Length Petal.Width Species
    1          5.1         3.5          1.4         0.2  setosa
    2          4.9         3.0          1.4         0.2  setosa
    3          4.7         3.2          1.3         0.2  setosa
    4          4.6         3.1          1.5         0.2  setosa
    5          5.0         3.6          1.4         0.2  setosa
    6          5.4         3.9          1.7         0.4  setosa
    > summary(iris)
      Sepal.Length    Sepal.Width     Petal.Length    Petal.Width          Species  
     Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100   setosa    :50  
     1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300   versicolor:50  
     Median :5.800   Median :3.000   Median :4.350   Median :1.300   virginica :50  
     Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199                  
     3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800                  
     Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500 
    > names(iris)
    [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"
    

    summary() 对于数值变量,我们可以看到最小值,中位数等等统计信息。而对于分类变量,我们看到的是计数信息。

    列表
    一种递归式的向量,我们可以用列表来存储不同类型的数据

    > l <- list(name="jiawen", pigu_num=2, is_handsome=TRUE)
    > l
    $name
    [1] "jiawen"
    
    $pigu_num
    [1] 2
    
    $is_handsome
    [1] TRUE
    

    列表的多种索引方式

    > l$name
    [1] "jiawen"
    > l[[2]]
    [1] 2
    > l[['is_handsome']]
    [1] TRUE
    

    网络资源
    https://www.datacamp.com/
    http://cos.name/
    http://xccds1977.blogspot.com/
    http://adv-r.had.co.nz/

    tidyverse 生态链
    readr:读取数据
    tidyr:整理数据
    dplyr:数据转换
    ggplot:可视化
    purrr:函数式编程

    > library(tidyverse)
    > mpg
    # A tibble: 234 x 11
       manufacturer model      displ  year   cyl trans      drv     cty   hwy fl    class  
       <chr>        <chr>      <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr>  
     1 audi         a4           1.8  1999     4 auto(l5)   f        18    29 p     compact
     2 audi         a4           1.8  1999     4 manual(m5) f        21    29 p     compact
     3 audi         a4           2    2008     4 manual(m6) f        20    31 p     compact
     4 audi         a4           2    2008     4 auto(av)   f        21    30 p     compact
     5 audi         a4           2.8  1999     6 auto(l5)   f        16    26 p     compact
     6 audi         a4           2.8  1999     6 manual(m5) f        18    26 p     compact
     7 audi         a4           3.1  2008     6 auto(av)   f        18    27 p     compact
     8 audi         a4 quattro   1.8  1999     4 manual(m5) 4        18    26 p     compact
     9 audi         a4 quattro   1.8  1999     4 auto(l5)   4        16    25 p     compact
    10 audi         a4 quattro   2    2008     4 manual(m6) 4        20    28 p     compact
    # ... with 224 more rows
    

    manufacture: 制造商
    model: 车型
    displ: 汽车排放量
    year: 制造年度
    cyl: 排气管数量
    trans: 排放类型
    drv: 驱动方式
    cty: 每公里耗油量(城市道路)
    hwy: 每公里耗油量(高速路)
    fl: 油的种类
    class: 车的类型

    > ggplot(data=mpg) + geom_point(mapping=aes(x=displ, y=hwy))
    
    image.png
    > ggplot(data=mpg) + geom_point(mapping=aes(x=displ, y=hwy, color=class))
    
    image.png
    > ggplot(data=mpg) + geom_point(mapping=aes(x=displ, y=hwy)) + facet_wrap(~class)
    
    image.png
    > ggplot(data=mpg) + geom_point(mapping=aes(x=displ, y=hwy)) + geom_smooth(mapping=aes(x=displ, y=hwy))
    `geom_smooth()` using method = 'loess' and formula 'y ~ x'
    
    image.png
    > ggplot(mpg, aes(x=displ, y=hwy)) + geom_point() + geom_smooth(method="lm")
    `geom_smooth()` using formula 'y ~ x'
    
    image.png
    chrome-extension://cdonnmffkdaoajfknoeeecmchibpmkmg/assets/pdf/web/viewer.html?file=https%3A%2F%2Frstudio.com%2Fwp-content%2Fuploads%2F2015%2F03%2Fggplot2-cheatsheet.pdf

    filter() 过滤函数

    > mpg %>% filter(displ>=5, hwy<20)
    # A tibble: 29 x 11
       manufacturer model displ  year   cyl trans drv     cty   hwy fl   
       <chr>        <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr>
     1 chevrolet    c150~   5.3  2008     8 auto~ r        11    15 e    
     2 chevrolet    c150~   5.7  1999     8 auto~ r        13    17 r    
     3 chevrolet    c150~   6    2008     8 auto~ r        12    17 r    
     4 chevrolet    k150~   5.3  2008     8 auto~ 4        14    19 r    
     5 chevrolet    k150~   5.3  2008     8 auto~ 4        11    14 e    
     6 chevrolet    k150~   5.7  1999     8 auto~ 4        11    15 r    
     7 chevrolet    k150~   6.5  1999     8 auto~ 4        14    17 d    
     8 dodge        dako~   5.2  1999     8 manu~ 4        11    17 r    
     9 dodge        dako~   5.2  1999     8 auto~ 4        11    15 r    
    10 dodge        dura~   5.2  1999     8 auto~ 4        11    16 r    
    # ... with 19 more rows, and 1 more variable: class <chr>
    

    arrange() 排序函数

    > mpg %>% filter(displ>=5, hwy<20) %>% arrange(desc(year), hwy)
    # A tibble: 29 x 11
       manufacturer model displ  year   cyl trans drv     cty   hwy fl   
       <chr>        <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr>
     1 chevrolet    k150~   5.3  2008     8 auto~ 4        11    14 e    
     2 jeep         gran~   6.1  2008     8 auto~ 4        11    14 p    
     3 chevrolet    c150~   5.3  2008     8 auto~ r        11    15 e    
     4 chevrolet    c150~   6    2008     8 auto~ r        12    17 r    
     5 dodge        ram ~   5.7  2008     8 auto~ 4        13    17 r    
     6 ford         f150~   5.4  2008     8 auto~ 4        13    17 r    
     7 dodge        dura~   5.7  2008     8 auto~ 4        13    18 r    
     8 ford         expe~   5.4  2008     8 auto~ r        12    18 r    
     9 jeep         gran~   5.7  2008     8 auto~ 4        13    18 r    
    10 lincoln      navi~   5.4  2008     8 auto~ r        12    18 r    
    # ... with 19 more rows, and 1 more variable: class <chr>
    

    select() 提取函数

    > mpg %>% filter(displ>=5, hwy<20) %>% arrange(desc(year), hwy) %>% select(model)
    # A tibble: 29 x 1
       model              
       <chr>              
     1 k1500 tahoe 4wd    
     2 grand cherokee 4wd 
     3 c1500 suburban 2wd 
     4 c1500 suburban 2wd 
     5 ram 1500 pickup 4wd
     6 f150 pickup 4wd    
     7 durango 4wd        
     8 expedition 2wd     
     9 grand cherokee 4wd 
    10 navigator 2wd      
    # ... with 19 more rows
    

    mutate() 添加新列

    > mpg %>% mutate(ave_displ=displ/cyl) %>% select(ave_displ)
    # A tibble: 234 x 1
       ave_displ
           <dbl>
     1     0.45 
     2     0.45 
     3     0.5  
     4     0.5  
     5     0.467
     6     0.467
     7     0.517
     8     0.45 
     9     0.45 
    10     0.5  
    # ... with 224 more rows
    

    group_by() 条件分组函数

    > mpg %>% group_by(class) %>% summarise(mean(displ), mean(hwy))
    # A tibble: 7 x 3
      class      `mean(displ)` `mean(hwy)`
      <chr>              <dbl>       <dbl>
    1 2seater             6.16        24.8
    2 compact             2.33        28.3
    3 midsize             2.92        27.3
    4 minivan             3.39        22.4
    5 pickup              4.42        16.9
    6 subcompact          2.66        28.1
    7 suv                 4.46        18.1
    

    chrome-extension://cdonnmffkdaoajfknoeeecmchibpmkmg/assets/pdf/web/viewer.html?file=https%3A%2F%2Frstudio.com%2Fwp-content%2Fuploads%2F2015%2F02%2Fdata-wrangling-cheatsheet.pdf

    相关文章

      网友评论

          本文标题:02R语言基础入门

          本文链接:https://www.haomeiwen.com/subject/peeaektx.html