美文网首页
R语言并行计算

R语言并行计算

作者: kittybaby | 来源:发表于2020-02-03 13:05 被阅读0次

    R包

    parallel
    doparallel
    foreach

    parallel包

    1.鉴定本机的核数

    # Load the parallel package
    library(parallel)
    
    # Store the number of cores in the object no_of_cores
    no_of_cores <-detectCores()
    
    # Print no_of_cores
    print(no_of_cores)
    

    2.parApply

    3.parSapply

    可变范围
    在Mac / Linux上,您可以选择使用自动包含所有环境变量的makeCluster(no_core,type =“FORK”)(以下详细信息)。 在Windows上,您必须使用并行插座集群(PSOCK),其中仅包含已加载的基本包(请注意,PSOCK在所有系统上都是默认值)。 因此,您应该始终指定并行功能所需的哪些变量和库,例如 以下失败:

    > cl<-makeCluster(4)
    > base <- 2
    >  
    > parLapply(cl, 
    +           2:4, 
    +           function(exponent) 
    +             base^exponent)
    Error in checkForRemoteErrors(val) : 
      3 nodes produced errors; first error: 找不到对象'base'
    >  
    > stopCluster(cl)
    
    > cl<-makeCluster(4)
    >  
    > base <- 2
    > clusterExport(cl, "base")
    > parLapply(cl, 
    +           2:4, 
    +           function(exponent) 
    +             base^exponent)
    [[1]]
    [1] 4
    
    [[2]]
    [1] 8
    
    [[3]]
    [1] 16
    

    您需要使用clusterExport(cl,“base”)才能使该函数看到基本变量。 如果您正在使用某些特殊软件包,那么同样需要通过clusterEvalQ来加载它们。 我经常使用rms包,因此我使用clusterEvalQ(cl,library(rms))。 请注意,对clusterExport后变量的任何更改都将被忽略:

    > cl<-makeCluster(no_cores)
    > clusterExport(cl, "base")
    > base <- 4
    > # Run
    > parLapply(cl, 
    +           2:4, 
    +           function(exponent) 
    +             base^exponent)
    [[1]]
    [1] 4
    
    [[2]]
    [1] 8
    
    [[3]]
    [1] 16
    
    >  
    > # Finish
    > stopCluster(cl)
    
    

    方法一

    y  <- 1:10
    sapply(1:5, function(x) x + y)
    
    library(parallel)
    cl <- makeCluster(2)
    y  <- 1:10
    # add y to function definition and parSapply call
    parSapply(cl, 1:5, function(x,y) x + y, y)
    # export y to the global environment of each node
    # then call your original code
    clusterExport(cl, "y")
    parSapply(cl, 1:5, function(x) x + y)
    
    

    方法二

    library(parallel)
    fun <- function(cl, y) {
      parSapply(cl, 1:5, function(x) x + y)
    }
    cl <- makeCluster(2)
    fun(cl, 1:10)
    stopCluster(cl)
    

    4.mclapply(wins不能使用)

    workerFunc <- function(n) { return(n^2) }
    values <- 1:100
    library(parallel)
    ## Number of workers (R processes) to use:
    numWorkers <- 8
    ## Parallel calculation (mclapply):
    res <- mclapply(values, workerFunc, mc.cores = numWorkers)
    print(unlist(res))
    #Error in mclapply(values, workerFunc, mc.cores = numWorkers) : 
    #   Windows不支持'mc.cores' > 1
    

    5.parLapply

    workerFunc <- function(n) { return(n^2) }
    values <- 1:100
    library(parallel)
    ## Number of workers (R processes) to use:
    numWorkers <- 8
    ## Set up the ’cluster’
    cl <- makeCluster(numWorkers, type = "PSOCK")
    ## Parallel calculation (parLapply):
    res <- parLapply(cl, values, workerFunc)
    ## Shut down cluster
    stopCluster(cl)
    print(unlist(res))
    

    foreach包

    > library(foreach)
    > library(doParallel)
    载入需要的程辑包:iterators
    >  
    > cl<-makeCluster(no_cores)
    > registerDoParallel(cl)
    > foreach(exponent = 2:4, 
    +         .combine = c)  %dopar%  
    +   base^exponent
    [1]  16  64 256
    
    
    > foreach(exponent = 2:4, 
    +         .combine = rbind)  %dopar%  
    +   base^exponent
             [,1]
    result.1   16
    result.2   64
    result.3  256
    
    
    > foreach(exponent = 2:4, 
    +         .combine = list,
    +         .multicombine = TRUE)  %dopar%  
    +   base^exponent
    [[1]]
    [1] 16
    
    [[2]]
    [1] 64
    
    [[3]]
    [1] 256
    
    
    > foreach(exponent = 2:4, 
    +         .combine = list)  %dopar%  
    +   base^exponent
    [[1]]
    [[1]][[1]]
    [1] 16
    
    [[1]][[2]]
    [1] 64
    
    
    [[2]]
    [1] 256
    #stopImplicitCluster()
    

    变量的域
    默认情况下,相同的本地环境中的变量是可用的:

    base <- 2
    cl<-makeCluster(2)
    registerDoParallel(cl)
    foreach(exponent = 2:4, 
            .combine = c)  %dopar%  
      base^exponent
    stopCluster(cl)
    
    
    > cl <- makeCluster(2)
    > test <- function (exponent) {
    +   foreach(exponent = 2:4, 
    +           .combine = c)  %dopar%  
    +     base^exponent
    + }
    > test()
     Show Traceback
     
     Rerun with Debug
     Error in base^exponent : task 1 failed - "找不到对象'base'" 
     
     
     > base <- 2
    > cl<-makeCluster(2)
    > registerDoParallel(cl)
    >  
    > base <- 4
    > test <- function (exponent) {
    +   foreach(exponent = 2:4, 
    +           .combine = c,
    +           .export = "base")  %dopar%  
    +     base^exponent
    + }
    > test()
    [1]  16  64 256
    >  
    > stopCluster(cl)
    

    同样,您可以使用.packages选项加载软件包,例如 .packages = c(“rms”,“mouse”)。 我强烈建议您始终导出所需的变量,因为它会限制在函数中封装代码时出现的问题。

    cl <- makeCluster(4)
    > registerDoParallel(cl)
    > x <- iris[which(iris[,5] != "setosa"), c(1,5)]
    > trials <- 10000
    > ptime <- system.time({
    +    r <- foreach(icount(trials), .combine=cbind) %dopar% {
    +      ind <- sample(100, 100, replace=TRUE)
    +      result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit))
    +      coefficients(result1)
    +      }
    +    })[3]
    > ptime
    elapsed 
      20.01 
      
      
    > stime <- system.time({
    +    r <- foreach(icount(trials), .combine=cbind) %do% {
    +      ind <- sample(100, 100, replace=TRUE)
    +      result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit))
    +      coefficients(result1)
    +      }
    +    })[3]
    > stime
    elapsed 
      39.17 
    stopCluster(cl)
    
    

    参考资料

    http://gforge.se/2015/02/how-to-go-parallel-in-r-basics-tips/
    https://stackoverflow.com/questions/24040280/parallel-computation-of-multiple-imputation-by-using-mice-r-package/27087791#27087791

    相关文章

      网友评论

          本文标题:R语言并行计算

          本文链接:https://www.haomeiwen.com/subject/cjdhxhtx.html