美文网首页The Art for R Programming
[R语言] 《R语言编程艺术》 第3章 矩阵和数组

[R语言] 《R语言编程艺术》 第3章 矩阵和数组

作者: 半为花间酒 | 来源:发表于2020-04-24 12:38 被阅读0次

    Creating Matrices

    The internal storage of a matrix is in column-major order
    You can set the byrow argument in matrix() to true to indicate that the data is coming in row-major order

    # 创建
    y <- matrix(c(1,2,3,4),nrow=2) 
    y
    #      [,1] [,2]
    # [1,]    1    3
    # [2,]    2    4
    
    # 也可以先声明再创建
    y <- matrix(nrow=2,ncol=2) 
    y[1,1] <- 1 
    y[2,1] <- 2 
    y[1,2] <- 3 
    y[2,2] <- 4
    
    # 取数
    y[,2] 
    # [1] 3 4
    
    # 赋值
    y[2,1] <- 7
    
    # 修改储存顺序
    y <- matrix(c(1,2,3,4),nrow=2,byrow=TRUE) 
    y
    #      [,1] [,2]
    # [1,]    1    2
    # [2,]    3    4
    

    General Matrix Operations

    - Performing Linear Algebra Operationson Matrices

    y
    #      [,1] [,2]
    # [1,]    1    3
    # [2,]    2    4
    
    y * y
    #      [,1] [,2]
    # [1,]    1    9
    # [2,]    4   16
    
    # 用%会修正为线性代数的矩阵运算
    y %*% y
    #      [,1] [,2]
    # [1,]    7   15
    # [2,]   10   22
    
    • 子矩阵赋值
    y 
    #    [,1] [,2] 
    # [1,] 11  12 
    # [2,] 21  22 
    # [3,] 31  32
    
    y[c(1,3),] <- matrix(c(1,1,8,12),nrow=2) 
    

    - 拓展案例一

    —— Image Manipulation

    课本上用的是拉什莫尔山,这里换了个图

    附上一个在线转换图片格式的网站:在线图片转换器

    library(pixmap)
    
    # pgm格式rio包无法识别
    art <- read.pnm('Art.pgm')
    art
    # # Pixmap image
    #  Type          : pixmapGrey 
    #  Size          : 225x225 
    #  Resolution    : 1x1 
    #  Bounding box  : 0 0 225 225 
    plot(art)
    
    str(art)
    # Formal class 'pixmapGrey' [package "pixmap"] with 6 slots
    # ..@ grey    : num [1:225, 1:225] 1 1 1 1 1 1 1 1 1 1 ...
    # ..@ channels: chr "grey"
    # ..@ size    : int [1:2] 225 225
    # ..@ cellres : num [1:2] 1 1
    # ..@ bbox    : num [1:4] 0 0 225 225
    # ..@ bbcent  : logi FALSE
    
    • The class here is of the S4 type, whose components are designated by@
    • The intensities are stored as numbers ranging from 0.0 (black) to 1.0 (white)
    • The random noise itself is a sample from U(0,1), the uniform distribution on the interval (0,1)
    • 使用locator()点击图片再按finish获取点的坐标
    locator()
    # $x
    # [1]  53.50925 181.81603
    # 
    # $y
    # [1] 150.38467  85.09246
    
    # 注意,用locator获取的坐标x和y是颠倒的
    art2 <- art
    art2@grey[85:150,53:181] <- 1
    plot(art2)
    
    • 添加噪声
      (课本上给出的代码有误..)
    # q值调节噪声的权重
    blurpart <- function(img,rows,cols,q) { 
      lrows <- length(rows) 
      lcols <- length(cols) 
      newimg <- img 
      randomnoise <- matrix(nrow=lrows, ncol=lcols, runif(lrows * lcols)) 
      newimg@grey[rows,cols] <- (1-q) * img@grey[rows,cols] + q * randomnoise 
      return(newimg) 
      }
    
    library(rafalib)
    mypar(1,3)
    
    art1 <- blurpart(art,85:150,53:181,0.3)
    plot(art1)
    art2 <- blurpart(art,85:150,53:181,0.6)
    plot(art2)
    art3 <- blurpart(art,85:150,53:181,0.9)
    plot(art3)
    

    Filtering on Matrices

    x <- matrix(c(1,2,2,3,3,4),nrow=3,byrow = TRUE)
    z <- c(5,12,13)
    x[z %% 2 == 1,]
    #      [,1] [,2]
    # [1,]    1    2
    # [2,]    3    4
    
    • 矩阵的提取会自动降维的特性
    x[x[,1] > 2 & x[,2] > 3,]
    # [1] 3 4
    # 矩阵变向量
    
    # 提取子集时加上 drop 参数可以限制降维
    x[x[,1] > 2 & x[,2] > 3,,drop = FALSE]
    #      [,1] [,2]
    # [1,]    3    4
    
    • 矩阵本质是长向量
    x
    #      [,1] [,2]
    # [1,]    1    2
    # [2,]    2    3
    # [3,]    3    4
    which(x > 2)
    # [1] 3 5 6
    

    - 拓展案例二

    —— Generating a Covariance Matrix

    # 举例row和col函数的用法
    x <- matrix(c(1,4,7,8,3,5),nrow=3,byrow = TRUE)
    x
    #      [,1] [,2]
    # [1,]    1    4
    # [2,]    7    8
    # [3,]    3    5
    
    row(x)
    #      [,1] [,2]
    # [1,]    1    1
    # [2,]    2    2
    # [3,]    3    3
    
    col(x)
    #      [,1] [,2]
    # [1,]    1    2
    # [2,]    1    2
    # [3,]    1    2
    

    Our matrix will have n rows and n columns, and we wish each of the n variables to have variance 1, with correlation rho between pairs of variables.

    makecov <- function(rho,n) { 
      m <- matrix(nrow=n,ncol=n) 
      m <- ifelse(row(m) == col(m),1,rho) 
      return(m) 
    }
    
    makecov(0.3,4)
    #      [,1] [,2] [,3] [,4]
    # [1,]  1.0  0.3  0.3  0.3
    # [2,]  0.3  1.0  0.3  0.3
    # [3,]  0.3  0.3  1.0  0.3
    # [4,]  0.3  0.3  0.3  1.0
    

    Applying Functions to Matrix Rows and Columns

    x
    #      [,1] [,2]
    # [1,]    1    4
    # [2,]    7    8
    # [3,]    3    5
    
    apply(x, 1, mean)
    # [1] 2.5 7.5 4.0
    
    apply(x, 2, mean)
    # [1] 3.666667 5.666667
    
    • 使用自定义函数
    x
    #      [,1] [,2]
    # [1,]    1    4
    # [2,]    7    8
    # [3,]    3    5
    
    func <- function(x) x / c(2,1)
    
    apply(x, 1, func)
    #      [,1] [,2] [,3]
    # [1,]  0.5  3.5  1.5
    # [2,]  4.0  8.0  5.0
    
    t(apply(x, 1, func))
    #      [,1] [,2]
    # [1,]  0.5    4
    # [2,]  3.5    8
    # [3,]  1.5    5
    
    apply(x, 2, func)
    #      [,1] [,2]
    # [1,]  0.5  2.0
    # [2,]  7.0  8.0
    # [3,]  1.5  2.5
    # Warning messages:
    # 1: In x/c(2, 1) : 长的对象长度不是短的对象长度的整倍数
    # 2: In x/c(2, 1) : 长的对象长度不是短的对象长度的整倍数
    
    • function中需多个参数时的apply
    copymaj <- function(rw,d) { 
      maj <- sum(rw[1:d]) / d 
      return(if(maj > 0.5) 1 else 0) 
      }
    
    x <- matrix(c(1,0,1,1,0,
                  1,1,1,1,0,
                  1,0,0,1,1,
                  0,1,1,1,0),nrow=4,byrow = TRUE)
    
    apply(x,1,copymaj,2)
    # [1] 0 1 0 0
    apply(x,1,copymaj,3)
    # [1] 1 1 0 1
    apply(x,1,copymaj,4)
    # [1] 1 1 0 1
    

    - 拓展案例三

    —— Finding Outliers
    Say we have retail sales data in a matrix rs.
    Each row of data is for a different store, and observations within a row are daily sales figures.

    # 嵌套函数的玩法
    findols <- function(x) {
      findol <- function(xrow) { 
        mdn <- median(xrow) 
        devs <- abs(xrow-mdn) 
        # 寻找偏离中位数最大值的索引
        return(which.max(devs)) 
        }  
      return(apply(x,1,findol)) 
      }
    

    Adding and Deleting Matrix Rows and Columns

    - Changing the Size of a Matrix

    cbindrbind

    one <- c(1,1,1,1)
    one
    # [1] 1 1 1 1
    
    z <- matrix(c(1,2,3,4,
                  1,1,0,0,
                  1,0,1,0),nrow = 4)
    z
    #      [,1] [,2] [,3]
    # [1,]    1    1    1
    # [2,]    2    1    0
    # [3,]    3    0    1
    # [4,]    4    0    0
    
    z <- cbind(one,z);z
    #       one      
    # [1,]   1 1 1 1
    # [2,]   1 2 1 0
    # [3,]   1 3 0 1
    # [4,]   1 4 0 0
    
    cbind(1,z) 
    #     one      
    # [1,] 1   1 1 1 1
    # [2,] 1   1 2 1 0
    # [3,] 1   1 3 0 1
    # [4,] 1   1 4 0 0
    
    • 可以用rbind或者cbind快速创建矩阵
    cbind(c(1,2),c(3,4))
    #      [,1] [,2]
    # [1,]    1    3
    # [2,]    2    4
    
    rbind(c(1,2),c(3,4))
    #      [,1] [,2]
    # [1,]    1    2
    # [2,]    3    4
    
    • 创建和bind形成新矩阵的时间消耗很多,尽可能在一开始分配一个足够大的空矩阵

    - 拓展案例四

    —— Finding the Closest Pair of Vertices in a Graph
    图论中的基本案例,输入一个城市距离矩阵q,由城市i和城市j共同决定,需要寻找矩阵中的最小距离以及对应的两个城市

    q <- matrix(c(0 ,12,13, 8,20,
                  12, 0,15,28,88,
                  13,15, 0, 6, 9,
                   8,28, 6, 0,33,
                  20,88, 9,33, 0),nrow=5,byrow=TRUE)
    

    思路:
    apply找到每一行最小值后再比较全部最小值

    由于矩阵是对称的(行数n也等于列数),寻找第i行最小值只需要比较i+1,i+2 ... n这些元素,最后一行无需比较

    q <- matrix(c(0,12,13,8,20,
                  12,0,15,28,88,
                  13,15,0,6,9,
                  8,28,6,0,33,
                  20,88,9,33,0),nrow=5,byrow=TRUE); q
    
    imin <- function(x) { 
      lx <- length(x) 
      i <- x[lx]
      j <- which.min(x[(i+1):(lx-1)]) 
      k <- i + j 
      return(c(k,x[k])) 
    }
    
    mind <- function(d) {
      n <- nrow(d) 
      # 将行号拼到矩阵中,便于imin每一行的判断从行号+1开始
      dd <- cbind(d,1:n) 
      # 由于对称原理可以去掉最后一行不用参与最小值计算
      wmins <- apply(dd[-n,],1,imin) 
      # print(wmins)
      #      [,1] [,2] [,3] [,4]
      # [1,]    4    3    4    5
      # [2,]    8   15    6   33
      # apply是纵向存储,第一行是列号,返回矩阵的列号是原行号
      # 第二行是原矩阵行最小值
      i <- which.min(wmins[2,]) 
      j <- wmins[1,i] 
      return(c(d[i,j],i,j)) 
      }
    
    mind(q)
    # [1] 6 3 4
    

    如果矩阵中数据不重复可以用以下代码:

    minda <- function(d) { 
      smallest <- min(d) 
      ij <- which(d == smallest,arr.ind = TRUE) 
      return(c(smallest,ij)) 
    }
    # 执行速度比上一个代码要慢很多
    

    arr.ind = TRUE允许返回矩阵的行列索引,否则当作向量处理,细节如下

    x <- matrix(c(5,2,7,4,1,6,3,8,9),nrow=3)
    x
    #      [,1] [,2] [,3]
    # [1,]    5    4    3
    # [2,]    2    1    8
    # [3,]    7    6    9
    
    min(x)
    # [1] 1
    
    ij <- which(x == min(x));ij
    # [1] 5
    ij <- which(x == min(x),arr.ind = TRUE);ij
    #      row col
    # [1,]   2   2
    
    c(min(x),ij)
    # [1] 1 2 2
    

    More on the Vector/Matrix Distinction

    1. dim()
    2. nrow()
    nrow
    # function (x) 
    # dim(x)[1L]
    # <bytecode: 0x0000025e707d6600>
    # <environment: namespace:base>
    
    1. ncol()

    Avoiding Unintended Dimension Reduction

    x
    #      [,1] [,2] [,3] [,4] [,5]
    # [1,]    1    0    1    1    0
    # [2,]    1    1    1    1    0
    # [3,]    1    0    0    1    1
    # [4,]    0    1    1    1    0
    
    r <- x[2,];r
    # [1] 1 1 1 1 0
    
    dim(x)
    # [1] 4 5
    
    dim(r)
    # NULL
    
    str(x)
    # num [1:4, 1:5] 1 1 1 0 0 1 0 1 1 1 ...
    
    str(r)
    # num [1:5] 1 1 1 1 0
    
    # r is a vector, not a matrix. 
    

    This is a vector of length 2, rather than a 1-by-2 matrix.

    策略:

    1. drop=FALSE避免意外降维
    2. as.matrix()将vector转换回矩阵

    Naming Matrix Rows and Columns

    z
    #      [,1] [,2] [,3] [,4]
    # [1,]    1    1    1    1
    # [2,]    1    2    1    0
    # [3,]    1    3    0    1
    # [4,]    1    4    0    0  
    
    colnames(z)
    # NULL
    
    colnames(z) <- c("a","b","c","d")
    z
    #      a b c d
    # [1,] 1 1 1 1
    # [2,] 1 2 1 0
    # [3,] 1 3 0 1
    # [4,] 1 4 0 0
    
    z[2,"c"]
    # c 
    # 1 
    

    These names can then be used to reference specific columns. The function rownames() works similarly.

    Higher-Dimensional Arrays

    firsttest 
    #     [,1] [,2] 
    # [1,]  46  30 
    # [2,]  21  25 
    # [3,]  50  50
    
    secondtest 
    #    [,1] [,2] 
    # [1,]  46  43 
    # [2,]  41  35 
    # [3,]  50  50
    
    tests <- array(data=c(firsttest,secondtest),dim=c(3,2,2))
    # dim中第三个数字指layer
    

    相关文章

      网友评论

        本文标题:[R语言] 《R语言编程艺术》 第3章 矩阵和数组

        本文链接:https://www.haomeiwen.com/subject/bvlmihtx.html