美文网首页
R,笔记03

R,笔记03

作者: 按着易得 | 来源:发表于2018-12-15 15:08 被阅读0次

    基本运算

    > x = 3 ** 2
    > x
    [1] 9
    > 
    > x = 3 ^ 2
    > x
    [1] 9
    > 
    > x = sqrt(64)
    > x
    [1] 8
    > 
    > x = -7
    > y = abs(x)
    > y
    [1] 7
    > 
    

    对数 指数

    > x = exp(1) # exp()是指自然数e的x次方
    > x
    [1] 2.718282
    > 
    > x = exp(3) # e的3次方
    > x
    [1] 20.08554
    > 
    > x = exp(0.5) # e的0.5次方
    > x
    [1] 1.648721
    > 
    > 
    > # exp()和log()互为反函数
    > # log()以e为底的对数
    > # 一般基底的对数,语法是log(x,m),其中m是底。如果底是10,也可这么写log10()
    > x = log(2) # 计算以e为底的对数
    > x
    [1] 0.6931472
    > 
    > x = log(2, 10) # 计算以10为底的对数
    > x 
    [1] 0.30103
    > 
    > x = log10(2) # 计算以10为底的对数
    > x 
    [1] 0.30103
    > 
    > x = 1.2 * 10^4
    > x
    [1] 12000
    > 
    > x = 1.2e4
    > x
    [1] 12000
    > 
    

    近似数

    > round(98.345, digits = 2) 
    [1] 98.34
    > round(98.345, 2)
    [1] 98.34
    > 
    > signif(1234567.896543, digits = 7) #第二个位置是有效数字的个数
    [1] 1234568
    > signif(1234567.896543, digits = 8)
    [1] 1234568
    > signif(1234567.896543, digits = 6)
    [1] 1234570
    > signif(1234567.896543, digits = 5)
    [1] 1234600
    > signif(1234567.896543, digits = 1)
    [1] 1e+06
    > 
    > # 近似函数floo(), ceiling(), trunc()可直接区整数
    > 
    

    设置重复

    > # rep(x, times=重复次数,each=每次每个元素的重复次数,length.out=向量长度)
    > 
    

    简单统计

    > # sum(), max(), min(), mean()
    > 
    > # prod() 计算所有元素的积
    > 
    > # cumsum()计算所有元素的累计和
    > 
    > # cumprod()计算所有元素的累计积
    > 
    > # cummax()可返回各元素从向量起点到该元素位置间所有元素的最大值
    > 
    > # cummin()可返回各元素从向量起点到该元素位置间所有元素的最小值
    > 
    > # diff()返回各元素与下一元素的差
    > 
    > # sort(x, decreasing = FALSE)排序
    > 
    > # rank()返回将元素从小到大排序后的位置编号
    > 
    > # rev()将向量对象颠倒排列
    > 
    > # length()计算向量对象长度,即向量对象元素个数
    > 
    > # sd()标准差
    > 
    > # var()样本变异数
    > 
    

    查询确认/转换数据类型

    > # is.integer()
    > # is.numeric()
    > # is.double()
    > # is.character()
    > # is.matrix()
    > # is.array()
    > 
    > # as.character()
    > # as.numeric()
    > 
    > 
    > # str()探索对象结构,了解数据类型/长度/内容
    > # class()对向量对象而言,可使用它了解对象元素的数据类型
    > 
    

    逻辑运算

    > # x & y 如果x和y均为T,则传回T
    > # x | y 如果x或y为T,则传回T
    > # !x 传回非x
    > # xor (x, y) 相当于XOR运算,如果x和y不同,传回T
    

    which函数

    > # which()所使用的参数是一个比较表达式,可以返回符合条件的索引值
    > x <- c(6, 5, 3, 4, 5)
    > which( x > 4 )
    [1] 1 2 5
    > 
    > # wich.max():给出最大值的第一个索引值。注意,一个向量中,最大值可能出现多次。
    > # wich.min()
    

    NA的去除

    > # 关于NA。可先用is.na()判断向量中是否含有NA,然后用!is.na()即可删除NA
    > x <- c(9, 1, NA, 8, 6)
    > x[x>5 & !is.na(x)]
    [1] 9 8 6
    > x
    [1]  9  1 NA  8  6
    

    any函数

    > # any(),给与比较条件,只要参数向量对象有1个元素是T,则返回T
    > 
    > # 向量对象元素的命名obj <- c (name1 = data1, name2 = data2, ……)
    

    names等函数

    > # names()可查询向量对象元素名称,也可更改向量对象元素名称。如想删除向量对象的元素的名称,将其设为NULL即可。
    > 
    > # matrix(data, nrow=?, ncol=?, byrow=logical, dimnames=NULL)
    > x <- matrix(3:11, nrow = 3, byrow=T, dimnames=list(rownames,colnames))
    Error in matrix(3:11, nrow = 3, byrow = T, dimnames = list(rownames, colnames)) : 
      length of 'dimnames' [1] not equal to array extent
    > colnames <- c("col1", "col2", "col3")
    > rownames <- c("row1","row2","row3")
    > x
    [1]  9  1 NA  8  6
    

    矩阵

    > # ncol()可得到矩阵列数
    > # nrow()
    > # dim()获得矩阵的行和列
    > # length()也可用于矩阵和数组对象的元素个数
    
    > # 将向量组成矩阵,用cbind()或rbind()
    > # 如何取得元素值?
    > # 如何修改元素值?
    > # 如何取得和修改矩阵对象的行名和列名?也可以用dimnames()来获得
    > # 在矩阵中可用行名和列名代替索引取得元素值
    
    > # rowSums()
    > # colSums()
    > # rowMeans()
    > # colMeans()
    
    > # t()转置
    

    factor

    > # 使用factor()或as.factor()函数建立因子。参数当中,x向量:是要转为因子得向量;levels:原x内元素的可能值
    > 
    > yes.or.no <- c("yes", "no", "no", "yes", "yes")
    > first.factor <- factor(yes.or.no)
    > first.factor
    [1] yes no  no  yes yes
    Levels: no yes
    > 
    > 
    > yes.or.no <- c("yes", "no", "no", "yes", "yes")
    > second.factor <- factor(yes.or.no, levels = c("yes", "no")) # 指定顺序
    > second.factor
    [1] yes no  no  yes yes
    Levels: yes no
    > 
    > 
    > # 指定缺失的levels值
    > directions <- c("east", "west", "north", "east", "west")
    > a.factor <- factor(directions)
    > a.factor # 缺少一个factor, 可补上
    [1] east  west  north east  west 
    Levels: east north west
    > 
    > b.factor <- factor(a.factor, levels = c("east", "west", "south", "north")) #补上了
    > b.factor
    [1] east  west  north east  west 
    Levels: east west south north
    > 
    > # 因子带标签
    > c.factor <- factor(b.factor, levels = c("east", "west", "south", "north"), labels = c("E", "W", "S", "N"))
    > c.factor
    [1] E W N E W
    Levels: E W S N
    > 
    > # 因子的level参数。用nlevels()可以传回levels的数量,length()是传回因子元素的数量。
    > 
    > # 数值型因子在转换时常见的错误
    > temperature <- factor(c(28, 32, 30, 34, 32, 34))
    > str(temperature) #level有4个值28/30/32/34,分别对应1/2/3/4.注意下面的情况:
     Factor w/ 4 levels "28","30","32",..: 1 3 2 4 3 4
    > as.numeric(temperature) # 解决方法如下:
    [1] 1 3 2 4 3 4
    > as.numeric(as.character(temperature))
    [1] 28 32 30 34 32 34
    > 
    > #有序因子
    > str1 <- c("A", "B", "A", "C", "D", "B", "D")
    > str1.order <- factor(str1, levels = c("D", "C", "B", "A"), ordered = T)
    > str1.order
    [1] A B A C D B D
    Levels: D < C < B < A
    > 
    > #tabel()统计在因子的所有元素中,levels中各值出现的次数。即level可能取得的值的出现的次数。
    > #state.name向量集收集了美国50各州,state.region是因子,记录每个州属于美国那个区。
    > state.region
     [1] South         West          West          South         West          West          Northeast     South         South         South         West         
    [12] West          North Central North Central North Central North Central South         South         Northeast     South         Northeast     North Central
    [23] North Central South         North Central West          North Central West          Northeast     Northeast     West          Northeast     South        
    [34] North Central North Central South         West          Northeast     Northeast     South         North Central South         South         West         
    [45] Northeast     South         West          South         North Central West         
    Levels: Northeast South North Central West
    > table(state.region)
    state.region
        Northeast         South North Central          West 
                9            16            12            13 
    

    数据框

    > # 数据框
    > name <- c("Kevin", "Peter", "Frank", "Maggie")
    > gender <- c("M", "M", "M", "F")
    > height <- c(170, 175, 165, 168)
    > info <- data.frame(name, gender, height)
    > info
        name gender height
    1  Kevin      M    170
    2  Peter      M    175
    3  Frank      M    165
    4 Maggie      F    168
    > 
    > #分别用names()和colnames()查询info数据框的列名
    > names(info)
    [1] "name"   "gender" "height"
    > colnames(info)
    [1] "name"   "gender" "height"
    > 
    > #查询行名
    > row.names(info)
    [1] "1" "2" "3" "4"
    > 
    > #用names()给第一列改名
    > names(info)[1] <- "n.name"
    > info
      n.name gender height
    1  Kevin      M    170
    2  Peter      M    175
    3  Frank      M    165
    4 Maggie      F    168
    > 
    > # 认识数据框结构
    > str(info) #发现字符串变成因子了。这是R默认。如不想要,则stringsAsFactors = F
    'data.frame':   4 obs. of  3 variables:
     $ n.name: Factor w/ 4 levels "Frank","Kevin",..: 2 4 1 3
     $ gender: Factor w/ 2 levels "F","M": 2 2 2 1
     $ height: num  170 175 165 168
    > 
    > # 取数据框内容
    > info[, "n.name"]
    [1] Kevin  Peter  Frank  Maggie
    Levels: Frank Kevin Maggie Peter
    > info[2, ]
      n.name gender height
    2  Peter      M    175
    > info$n.name
    [1] Kevin  Peter  Frank  Maggie
    Levels: Frank Kevin Maggie Peter
    > info[, 1] #返回的是向量
    [1] Kevin  Peter  Frank  Maggie
    Levels: Frank Kevin Maggie Peter
    > info[1] #返回的四数据框
      n.name
    1  Kevin
    2  Peter
    3  Frank
    4 Maggie
    > 
    > # 用$为数据框添加列数据
    > weight <- c (65, 71, 58, 55)
    > info$weight <- weight
    > info
      n.name gender height weight
    1  Kevin      M    170     65
    2  Peter      M    175     71
    3  Frank      M    165     58
    4 Maggie      F    168     55
    > 
    > rm(list = ls())
    > name <- c("Kevin", "Peter", "Frank", "Maggie")
    > gender <- c("M", "M", "M", "F")
    > height <- c(170, 175, 165, 168)
    > info <- data.frame(name, gender, height)
    > age <- c(19, 20, 20, 19)
    > score <- c(88, 91, 75, 80)
    > addinfo <- data.frame(age, score)
    > addinfo
      age score
    1  19    88
    2  20    91
    3  20    75
    4  19    80
    > newinfo <- cbind(info, addinfo)
    > newinfo
        name gender height age score
    1  Kevin      M    170  19    88
    2  Peter      M    175  20    91
    3  Frank      M    165  20    75
    4 Maggie      F    168  19    80
    > 
    > #注意,数据框是一些列的列向量组成,如果把矩阵转为数据框,则用到data.frame()函数。
    > 
    

    list

    > # list
    > 
    > x <- c(7, 8, 6, 11, 9, 12, 12, 8, 9, 15, 7, 12)
    > colnames <- c("1st", "2nd", "3rd", "4th", "5th", "6th")
    > rownames <- c("lin","ge")
    > team.cal <- matrix(x, 2, byrow = T, dimnames=list(rownames,colnames))
    > baskets.cal <- list("zhang", "2018-12", team.cal) # 注意这里的team.cal不要加引号
    > baskets.cal
    [[1]]
    [1] "zhang"
    
    [[2]]
    [1] "2018-12"
    
    [[3]]
        1st 2nd 3rd 4th 5th 6th
    lin   7   8   6  11   9  12
    ge   12   8   9  15   7  12
    
    > 
    > # 给刚才的list里的对象命名
    > n.baskets.cal <- list(teamname = "zhang", season = "2018-12", score.info = team.cal)  # 注意这里的team.cal不要加引号
    > n.baskets.cal
    $`teamname`
    [1] "zhang"
    
    $season
    [1] "2018-12"
    
    $score.info
        1st 2nd 3rd 4th 5th 6th
    lin   7   8   6  11   9  12
    ge   12   8   9  15   7  12
    
    > 
    > #names()函数可以获得及修改list里对象的名称
    > names(n.baskets.cal)
    [1] "teamname"   "season"     "score.info"
    > names(n.baskets.cal)[1] <- "great"
    > n.baskets.cal
    $`great`
    [1] "zhang"
    
    $season
    [1] "2018-12"
    
    $score.info
        1st 2nd 3rd 4th 5th 6th
    lin   7   8   6  11   9  12
    ge   12   8   9  15   7  12
    
    > 
    > # 获得list里的元素
    > n.baskets.cal$great
    [1] "zhang"
    > n.baskets.cal$score.info[2, 4]
    [1] 15
    > n.baskets.cal[[3]][2, 4]
    [1] 15
    > 
    > # list内的对象名可当索引
    > n.baskets.cal[["score.info"]]
        1st 2nd 3rd 4th 5th 6th
    lin   7   8   6  11   9  12
    ge   12   8   9  15   7  12
    > n.baskets.cal[names(n.baskets.cal) != "great"]
    $`season`
    [1] "2018-12"
    
    $score.info
        1st 2nd 3rd 4th 5th 6th
    lin   7   8   6  11   9  12
    ge   12   8   9  15   7  12
    
    > 
    > # 如何修改、添加、删除(赋值为NULL)list里元素的内容?
    > # 如何合并list?
    >
    

    文本操作

    > # 语句分割
    > x <- c("Hello R World")
    > x
    [1] "Hello R World"
    > strsplit(x, " ") # 以空格为界拆分。注意返回的是什么格式
    [[1]]
    [1] "Hello" "R"     "World"
    
    > 
    > # 延上例,拆分后存入向量对象内
    > a <- strsplit(x, " ")[[1]]
    > a
    [1] "Hello" "R"     "World"
    > 
    > # toupper() 小写变大写
    > # tolower() 
    > 
    > # uniqe() 使向量内容不重复出现
    > 
    > # paste()的collapse参数
    > coffee.str <- c("boiling", "coffee", "brings", "out", "a", "bitterly", "taste")
    > paste(coffee.str)
    [1] "boiling"  "coffee"   "brings"   "out"      "a"        "bitterly" "taste"   
    > paste(coffee.str, collapse = " ") # 字符串以空格相连
    [1] "boiling coffee brings out a bitterly taste"
    > 
    > 
    > # paste()主要作用是将两个或多个向量连接
    > str_1 <- letters[1:6]
    > str_2 <- 1:6
    > paste(str_1, str_2)
    [1] "a 1" "b 2" "c 3" "d 4" "e 5" "f 6"
    > paste(str_1, str_2, sep = "") # 去掉空格
    [1] "a1" "b2" "c3" "d4" "e5" "f6"
    > paste(str_1, str_2, sep = "", collapse = " ")
    [1] "a1 b2 c3 d4 e5 f6"
    > 
    > 
    > # 使用索引值搜索
    > # 列出state.name数据集内第2到第4个子字符串
    > substr(state.name, start = 2, stop = 4)
     [1] "lab" "las" "riz" "rka" "ali" "olo" "onn" "ela" "lor" "eor" "awa" "dah" "lli" "ndi" "owa" "ans" "ent" "oui" "ain" "ary" "ass" "ich" "inn" "iss" "iss" "ont" "ebr"
    [28] "eva" "ew " "ew " "ew " "ew " "ort" "ort" "hio" "kla" "reg" "enn" "hod" "out" "out" "enn" "exa" "tah" "erm" "irg" "ash" "est" "isc" "yom"
    > 
    > # grep(pattern, x) pattern,搜索目标;x,字符串向量
    > grep("M", state.name) # 返回的是索引值
    [1] 19 20 21 22 23 24 25 26 31
    > state.name[grep("M", state.name)]
    [1] "Maine"         "Maryland"      "Massachusetts" "Michigan"      "Minnesota"     "Mississippi"   "Missouri"      "Montana"       "New Mexico"   
    > state.name[grep(" ", state.name)]# 搜索州名中有空格的
     [1] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"       "North Carolina" "North Dakota"   "Rhode Island"   "South Carolina" "South Dakota"  
    [10] "West Virginia" 
    > 
    > # 字符串内容更改
    > # sub(pattern, replacement, x) 其中replacement用空字符""代替,相当于删除。
    > 
    > state.name[grep("New|South", state.name)] # New|South 不要有空格
    [1] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"       "South Carolina" "South Dakota"  
    > 
    > str_a <- c("ch6.xls", "ch7.xls", "ch7.c", "ch7.doc", "ch8.xls")
    > str_b <- c("ch.xls", "ch7.xls", "ch77.xls", "ch87.xls", "ch88.xls")
    > str_a[grep("ch(6|7).xls", str_a)]
    [1] "ch6.xls" "ch7.xls"
    > str_b[grep("ch(7*|8*).xls",str_b)] # 注意,*代笔0次或多次;+代表1次或多次
    [1] "ch.xls"   "ch7.xls"  "ch77.xls" "ch88.xls"
    > str_b[grep("ch(7+|8+).xls", str_b)]
    [1] "ch7.xls"  "ch77.xls" "ch88.xls"
    

    相关文章

      网友评论

          本文标题:R,笔记03

          本文链接:https://www.haomeiwen.com/subject/odrahqtx.html