美文网首页 生物信息学分析
R语言中多线程运行程序

R语言中多线程运行程序

作者: JeremyL | 来源:发表于2020-11-22 21:20 被阅读0次

R 代码的运行效率不高,因此有时候可以考虑并行运行程序。

#apply系列函数

  • 实际不是并行
apply(X, MARGIN, FUN, ...)

lapply(X, FUN, ...)

sapply(X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE)

vapply(X, FUN, FUN.VALUE, ..., USE.NAMES = TRUE)

##lapply()

  • 接受一个向量或者列表作为处理对象,返回结果是是与输入等长的列表:
lapply(1:5, function(x) x^2) 
[[1]]
[1] 1
[[2]]
[1] 4
[[3]]
[1] 9
[[4]]
[1] 16
[[5]]
[1] 25

lapply(1:5, function(x) c(x^2,x^3)) 
[[1]]
[1] 1 1
[[2]]
[1] 4 8
[[3]]
[1]  9 27
[[4]]
[1] 16 64
[[5]]
[1]  25 125

##sapply()

sapply(1:5, function(x) x^2) #This output is a vector
[1]  1  4  9 16 25
sapply(1:5, function(x) c(x^2,x^3)) #This outputs a matrix
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    4    9   16   25
[2,]    1    8   27   64  125

注: 当设置simplify = FALSE, USE.NAMES = FALSE时,sapply() 返回结果与lappy()一样

sapply(1:5, function(x) x^2, simplify = FALSE, USE.NAMES = FALSE) 
#Output is same as for lapply()
[[1]]
[1] 1 1
[[2]]
[1] 4 8
[[3]]
[1]  9 27
[[4]]
[1] 16 64
[[5]]
[1]  25 125

#parallel包

parallel包可以将本地计算机的核分配给R程序,从而并行运行程序。

parallel的工作原理:

  • 查找系统中的内核数量;
  • 分配一部分核创建集群;
  • 程序并行运行时,只需要添加创建好的集群(作为一个参数);
  • 程序运行结束,关闭集群,释放内存。
install.packages(“parallel”)
library(parallel)
 
#检测系统核的数目
no_cores <- detectCores()
 
#创建集群
clust <- makeCluster(no_cores) 
#lapply()的并行版本是parLapply(),只需要一个额外的集群参数。
parLapply(clust,1:5, function(x) c(x^2,x^3))
[[1]]
[1] 1 1
[[2]]
[1] 4 8
[[3]]
[1]  9 27
[[4]]
[1] 16 64
[[5]]
[1]  25 125
stopCluster(clust)

#sapply()的并行版本是parSapply()
library(parallel)
no_cores <- detectCores()
clust <- makeCluster(no_cores) #This line will take time
base <- 4
clusterExport(clust, "base")
parSapply(clust, 1:5, function(exponent) base^exponent)
[1]    4   16   64  256 1024
stopCluster(clust)

#foreach

  • foreach包需要调用doParallel 包,使用registerdopar()函数使进程并行。
library(foreach)
library(doParallel)

#registerDoParallel(no_cores)也可以
registerDoParallel(makeCluster(no_cores))
  • foreach()函数需要%dopar%命令并行化程序
#输出向量设置.combine = c
foreach(exponent = 1:5, .combine = c)  %dopar%  base^exponent

[1]   3   9  27  81 243

#输出矩阵设置.combine = rbind
foreach(exponent = 1:5, .combine = rbind)  %dopar%  base^exponent

         [,1]
result.1    3
result.2    9
result.3   27
result.4   81
result.5  243


#输出列表设置.combine = list
foreach(exponent = 1:5, .combine = list, .multicombine=TRUE)  %dopar%  base^exponent

[[1]]
[1] 3

[[2]]
[1] 9

[[3]]
[1] 27

[[4]]
[1] 81

[[5]]
[1] 243

#输出数据框设置.combine = data.frame
foreach(exponent = 1:5, .combine = data.frame)  %dopar%  base^exponent
  result.1 result.2 result.3 result.4 result.5
1        2        4        8       16       32

#关闭集群
stopImplicitCluster()

#总结

lapply(1:5, function(x) x^2) #input is 1,2,3,4,5 and output is square of the input

lapply(1:5, function(x) c(x^2,x^3)) #The output should be square and cube of input

sapply(1:5, function(x) x^2) #This output is a vector

sapply(1:5, function(x) c(x^2,x^3)) #This outputs a matrix

sapply(1:5, function(x) x^2, simplify = FALSE, USE.NAMES = FALSE) #Output is same as for lapply()

#Include the parallel library. If the next line does not work, run install.packages(“parallel”) first
library(parallel)

# Use the detectCores() function to find the number of cores in system
no_cores <- detectCores()

# Setup cluster
clust <- makeCluster(no_cores) #This line will take time

#The parallel version of lapply() is parLapply() and needs an additional cluster argument.
parLapply(clust,1:5, function(x) c(x^2,x^3))
stopCluster(clust)

#Include the parallel library. If the next line does not work, run install.packages(“parallel”) first
library(parallel)

# Use the detectCores() function to find the number of cores in system
no_cores <- detectCores()

# Setup cluster
clust <- makeCluster(no_cores) #This line will take time

#Setting a base variable 
base <- 4
#Note that this line is required so that all cores in cluster have this variable available
clusterExport(clust, "base")

#Using the parSapply() function
parSapply(clust, 1:5, function(exponent) base^exponent)
stopCluster(clust)

clusterEvalQ(clust,library(randomForest))

library(foreach)
library(doParallel)

registerDoParallel(makeCluster(no_cores))

#Vector output
foreach(exponent = 1:5, .combine = c)  %dopar%  base^exponent

#Matrix output
foreach(exponent = 1:5, .combine = rbind)  %dopar%  base^exponent

#List output
foreach(exponent = 1:5, .combine = list, .multicombine=TRUE)  %dopar%  base^exponent

#Data Frame output
foreach(exponent = 1:5, .combine = data.frame)  %dopar%  base^exponent


#This also works
registerDoParallel(no_cores)

stopImplicitCluster()

#using .export parameter
registerDoParallel(no_cores)

base <- 2 #Declaring this variable outside the scope of foreach() function

sample_func <- function (exponent) {
  #Using the .export function here to include the base variable
  foreach(exponent = 1:5, .combine = c,.export = "base")  %dopar%  base^exponent
}
sample_func()
stopImplicitCluster()

#using .packages parameter
library(dplyr)
registerDoParallel(no_cores)
foreach(i = 1:5, .combine=c, .packages="dplyr") %dopar% {
  iris[i, ] %>% select(-Species) %>% sum
}
stopImplicitCluster()

clust<-makeCluster(no_cores, type="FORK")

registerDoParallel(makeCluster(no_cores, outfile="debug_file.txt"))
foreach(x=list(1:5, "a"))  %dopar%  print(x)

registerDoParallel(makeCluster(no_cores, outfile="debug_file.txt"))
foreach(x=list(1,2,3,4,5, "a"))  %dopar%  cat(dput(x), file = paste0("debug_file_", x, ".txt"))

registerDoParallel(makeCluster(no_cores))
foreach(x=list(1, 2, "a"))  %dopar%  
{
  tryCatch({
    c(1/x) #Should give an error when x is “a”
  }, error = function(e) return(paste0("Error occurred for '", x, "'", 
                                       " The error is '", e, "'")))
}

base=4 #Create a variable base whose value is 4
base_copy=base #Make a copy of the variable 
rm(base) #I can now remove the base variable and free up memory

rm(list=ls())

#原文

R bloggers Implementing Parallel Processing in R

相关文章

  • R语言中多线程运行程序

    R 代码的运行效率不高,因此有时候可以考虑并行运行程序。 #apply系列函数 实际不是并行 ##lapply()...

  • Java基础之多线程

    什么是多线程?   线程是指程序运行的流程,多线程则是指可以运行一个以上线程的程序,多线程使程序运行的效率变得更高...

  • Play Sound

    R语言中,当你想在程序运行完之后,发出一点声音提醒你,就可以这样玩:

  • 1. R语言运行效率分析(1)

    测试程序运行所需时间的函数的选择 在R语言中,统计一个程序体运行时间一般采用的函数为Sys.time()或者为pr...

  • 【第 23 天】多线程篇 - 创建多线程

    多线程篇 - 创建多线程 多线程概念:一个程序同时运行多个程序块,比如同时运行 选择语句、循环语句,和其它语句。 ...

  • python线程的实现,线程池

    python通过标准库threading实现多线程的运行。程序的运行总要考虑并发,并行数。在多线程程序中为了确保程...

  • go 的并发机制

    并发与并行的概念 ① 多线程程序在单核cpu上运行就是并发;② 多线程程序在多核cpu上运行就是并行; gorou...

  • R语言绘图:28个实用程序包

    R语言绘图:28个实用程序包 - R语言中文社区 - CSDN博客

  • Python多线程

    Python 多线程 多线程类似于同时执行多个不同程序,多线程运行有如下优点: 使用线程可以把占据长时间的程序...

  • java多线程

    ackageThreadDemo;/** * 多线程 * 程序:一组指定的集合(没有运行的程序) * 进程...

网友评论

    本文标题:R语言中多线程运行程序

    本文链接:https://www.haomeiwen.com/subject/xijpiktx.html