3. 函数

自定义函数

function(para1,para2,para3){func}

func <- function(x = 1, y = 2, z = 3) {  #默认参数
    print(x + y + z)
}
func()
>>>
6

func <- function(x,y,z) {   #定义
    print(x + y + z)
}
func(1,2,3)  #调用
>>>
6

数学/统计函数及运用

abs()
sqrt()
ceiling(x)          #不小于x的最小整数
floor(x)            #不大于x的最大整数
turnc(x)            #向0的方向截取的x中的整数部分 trunc(5.99) >>> 5
round(x, digits=n)  #将x舍入为指定位的小数
signif(x, digits=n) #将x四舍五入为指定的有效数字位数

cos()
sin()
tan()
acos()  #反余弦
asin()  #反正弦
atan()  #反正切
cosh()  #双曲余弦
sinh()  #双曲正弦
tanh()  #双曲正切
acosh() #反双曲余弦
asinh() #反双曲正弦
atanh() #反双曲正切

log(x, base=n)
log(x)
log10(x)
exp(x)  #以自然常数e为底的指数函数

sum()
cumsum() #累积和 cumsum(1:100)
prod()   #乘积   prod(1:10)
min()
max()
mean()
median()
sd()
var()  #方差
mad()  #绝对中位差(median absolute deviation)
quantile(x, probs)      # prob=[0,1]  #quantile(x, c(.3, .84) x的30%和84%分位点
range()
diff(x, lag=n)          # 滞后差分，lag用以指定滞后几项。默认lag值为1


#标准化/归一化/中心化/无量纲化
scale(x, center=TRUE)   # 为数据对象x 按列进行中心化(center=TRUE)霍标准化

# 等差数列
seq(from, to, by) 

# 等比数列
library(bsts)
GeometricSequence(length, start_num, ratio)

多函数调用方法

#mean(mtcars$mpg)
#sd(mtcars$cyl)

attach(mtcars)
mean(mpg)
sd(cyl)

字符处理函数

image-20200808111606716

nchar(str)

str = c('1234','aaa')
nchar(str)
>>>
4 3

substr(x, start, stop)

'''
df = data.frame(col_1=c('hahha','heheh','xixiixi'), col_2=(1:3))
>>>
col_1  col_2
hahha    1 
heheh    2 
xixiixi  3
'''
substr(df$col_1,1,3)
>>>
'hah' 'heh' 'xix'

转置
```
t()
```

连接字符串 paste

paste(df$col_1, df$col_2, sep=',')
>>> 
'hahha,1' 'heheh,2' 'xixiixi,3'

paste("A", 1:5, sep = "")
>>>
 "A1" "A2" "A3" "A4" "A5"

其他实用函数

image-20200808112233091

length()
seq()
rep(x, times)

rep(paste("A", 1:5, sep = ""), rep(2,5))
>>>
"A1" "A1" "A2" "A2" "A3" "A3" "A4" "A4" "A5" "A5"

cut()
pretty()
cat()

apply/ sapply/ lapply/ tapply

apply()

apply会把一个函数同时作用于一个数组或者矩阵的一个margin，然后返回值存在一个向量或者数组中，

也就是说把每一个margin作为一个函数的输入，对应一个输出，所有的输出放在一起返回来。

margin：可以是数组的每一行 / 每一列。值得注意的是这里的数组未必是 2维的，更高维也可以。一个具体例子，求一个2维数组每一列和每一行的平均值

apply需要三个参数

> x=array(rnorm(12),c(3,4))
> x
          [,1]        [,2]      [,3]      [,4]
[1,] 2.2119685 -0.03697591 0.1248037  1.347029
[2,] 0.2455940  0.04773594 0.1007243  1.420932
[3,] 0.3983581  1.65307142 1.3371164 -3.089809

# apply(df, para, func)
# para: 1--列； 2--行
> apply(x,2,mean)
[1]  0.9519735  0.5546105  0.5208814 -0.1072824

# 计算每行的平方和
> myFun=function(x){sum(x^2)}
> apply(x,1,myFun)
[1]  6.724236  2.091790 14.226134

sapply()和lapply()

用于一个vector/list上面

一般需要两个参数，第一个参数是输入数据，第二个是函数。

区别：sapply返回的是一个vector；lapply返回的是一个list

sapply is a user-friendly version and wrapper of lapply by default
- returning a vector, matrix or, if simplify = "array",
- an array if appropriate, by applying simplify2array().
sapply(x, f, simplify = FALSE, USE.NAMES = FALSE) is the same as lapply(x, f).

lapply(X, FUN, ...)
sapply(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE)
'''
X         a vector (atomic or list) or an expression object. Other objects 
          (including classed objects) will be coerced by base::as.list.

FUN           the function to be applied to each element of X: see ‘Details’. In the 
          case of functions like +, %*%, the function name must be backquoted or             quoted.

simplify  logical or character string; 
          should the result be simplified to a vector, matrix or higher        
            dimensional array if possible? 
          For sapply it must be named and not abbreviated. 
          The default value, TRUE, returns a vector or matrix if appropriate;             whereas if simplify = "array" the result may be an array of “rank”  
          (=length(dim(.))) one higher than the result of FUN(X[[i]]).
'''

# snapply
> x=array(1:20,c(4,5))
> x
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    5    9   13   17
[2,]    2    6   10   14   18
[3,]    3    7   11   15   19
[4,]    4    8   12   16   20

> sapply(x,function(x) {x^2})
 [1]   1   4   9  16  25  36  49  64  81 100 121 144 169 196 225 256 289 324 361 400

-------------------------------------------------
myvars <- c("mpg", "hp", "wt") 
mystats <- function(x, na.omit=FALSE){ 
  if (na.omit) 
    x <- x[!is.na(x)] 
  m <- mean(x) 
  n <- length(x) 
  s <- sd(x) 
  skew <- sum((x-m)^3/s^3)/n 
  kurt <- sum((x-m)^4/s^4)/n - 3 
  return(c(n=n, mean=m, stdev=s, skew=skew, kurtosis=kurt)) 
} 

sapply(mtcars[myvars], mystats)