I have a function sum_var that take an integer as input, and returns a real number as output. I checked this function on some inputs and it runs well.
I would like to use clusterApply to utilize my CPU (6 cores and 12 logical processors). I've tried to modify the code given in the class
library("parallel")
cl <- makeCluster(6)
res_par <- clusterApply(cl, 1:10000, fun = sum_var)
But it returns an error Error in checkForRemoteErrors(val) : 10000 nodes produced errors; first error: object 'df_simulate' not found.
Could you please elaborate on how to achieve my goal? Below is the full code.
### Generate dataframe
n_simu <- 1000
set.seed(1)
df_simulate <- data.frame(x_1 = rnorm(n_simu))
for (k in 2:10000) {
set.seed(k)
df_simulate[, paste0("x_", k)] <- rnorm(n_simu)
}
df_simulate[, "y"] <- runif(n_simu, 0, 0.5)
df_simulate[df_simulate$x_40 > 0 & df_simulate$x_99 > 0.8, "y"] <-
df_simulate[df_simulate$x_40 > 0 & df_simulate$x_99 > 0.8, "y"] + 5.75
df_simulate[df_simulate$x_40 > 0 & df_simulate$x_99 <= 0.8 & df_simulate$x_30 > 0.5, "y"] <-
df_simulate[df_simulate$x_40 > 0 & df_simulate$x_99 <= 0.8 & df_simulate$x_30 > 0.5, "y"] + 18.95
df_simulate[df_simulate$x_40 > 0 & df_simulate$x_99 <= 0.8 & df_simulate$x_30 <= 0.5, "y"] <-
df_simulate[df_simulate$x_40 > 0 & df_simulate$x_99 <= 0.8 & df_simulate$x_30 <= 0.5, "y"] + 20.55
df_simulate[df_simulate$x_40 <= 0 & df_simulate$x_150 < 0.5, "y"] <-
df_simulate[df_simulate$x_40 <= 0 & df_simulate$x_150 < 0.5, "y"] - 5
df_simulate[df_simulate$x_40 <= 0 & df_simulate$x_150 >= 0.5, "y"] <-
df_simulate[df_simulate$x_40 <= 0 & df_simulate$x_150 >= 0.5, "y"] - 10
### Function to calculate the sum of variances
n_min <- 5
index <- n_min:(1000 - n_min)
sum_var <- function(m){
df1 <- df_simulate[, m]
df2 <- as.data.frame(sort(df1))
for (i in index){
df3 <- df2[1:i, 1]
df4 <- df2[(i+1):1000, 1]
df2[i, 2] <- sd(df3) + sd(df4)
}
position <- which.min(df2[, 2])
return(df2[position, 1])
}
### Parallel Computing
library("parallel")
cl <- makeCluster(6)
res_par <- clusterApply(cl, 1:10000, fun = sum_var)