######################### #####Script to evaluate the amount of variation comming from different sources in ENM by #####Hierarchical partitioning analyses. Complete process ######################### ############## ###Extraction of data from 2640 models ############## ####Numeric code options (Replicates=1-10, GCMs=1-22, RCPs=1 and 2, and Parameters=1-6) #Package if(!require(raster)){ install.packages("raster") library(raster) } rasterOptions(maxmemory = 1e+15) #increasing speed for raster processing in R #Create and set directory dir.create("Z:/Marlon_E_Cobos/R/Variab_models/Hier_par") setwd("Z:/Marlon_E_Cobos/R/Variab_models/Hier_par") #Sample units G_mods <- list.files("Z:/Marlon_E_Cobos/R/Variab_models/Modeling/L_0.1_S2",pattern="asc", #list a model projected in G full.names=TRUE)[1] G_mod <- stack(G_mods) crs(G_mod) <- "+proj=longlat +ellps=WGS84 +towgs84=0,0,0,0 +no_defs +datum=WGS84" #if define projection is needed P_modg <- rasterToPoints(G_mod) #convert raster to points pg <- P_modg[sample(nrow(P_modg), 10000, replace = FALSE),1:2] #select ramdomly 10000 points, just xy coordinates #Create a vector with the name of directories you are going to read the models from dirs <- paste("Z:/Marlon_E_Cobos/R/Variab_models/Modeling", dir("Z:/Marlon_E_Cobos/R/Variab_models/Modeling")[8:17], sep = "/") Pars <- 1:10 #ten parametrizations used (their names follow an alphabetical order) #Create the objects you are going to need mod_ppars <- list() mod_ppar <- list() vals <- list() vals_o <- list() #Extract the data of each model and organizing them as it is needed, by loop for(i in 1:length(dirs)){ mod_ppars[[i]] <- list.files(dirs[i],pattern="asc", full.names=TRUE)[1:400] #Suitability models just the replicates mod_ppar[[i]] <- stack(mod_ppars[[i]]) crs(mod_ppar[[i]]) <- "+proj=longlat +ellps=WGS84 +towgs84=0,0,0,0 +no_defs +datum=WGS84" #if define projection is needed vals[[i]] <- as.data.frame(extract(mod_ppar[[i]],pm)) #Extracting data in random points and making them a data.frame vals_o[[i]] <- stack(vals[[i]]) #Joing all the suitable values in one column colnames(vals_o[[i]])[1] <- "Suit" #Changing coulumn name by Suit vals_o[[i]]$ID <-rep(seq(1,10000,1), length(mod_ppars[[i]])) #adding ID column (points numbers) ind <- as.vector(unique(vals_o[[i]]$ind)) #unique replicate names Replicates <- rep(1:10, each = 40) #unique replicate numbers vals_o[[i]]$Replicates <- Replicates[match(vals_o[[i]]$ind, ind)] #changing replicate names by numbers GCMs <- rep(rep(1:20, each = 2), 10) #unique GCM numbers (codes 1:20 in alfabetic order) vals_o[[i]]$GCMs <- GCMs[match(vals_o[[i]]$ind, ind)] RCPs <- rep(1:2, 400) #unique RCP numbers (codes 1:2 RCPs 4.5 and 8.5) vals_o[[i]]$RCPs <- RCPs[match(vals_o[[i]]$ind, ind)] vals_o[[i]]$Parameters <- rep(Pars[i], 4000000) #codes of parametrizations (10 in total in this case) print(paste(i, "of", length(dirs), sep = " ")) } #Join all the tables as one suit_all <- do.call(rbind, vals_o) #Write the final table write.csv(suit_all, file = "suit_all.csv", dec = ".", row.names=FALSE) ############## ###Subsampling (n = 100) the big data base (n = 10000) to create 100 tables (Similar to a bootstrap) ############## #Create and set directory dir.create("Z:/Marlon_E_Cobos/R/Variab_models/Hier_par/Subsamples") setwd("Z:/Marlon_E_Cobos/R/Variab_models/Hier_par/Subsamples") ##Prepare the data #"stratified" funtion should be colled from the git-hub if(!require(devtools)){ install.packages("devtools") library(devtools) } source_gist("https://gist.github.com/mrdwab/6424112") #Create the objects you are going to need suit_ran_nam <- paste("suit_ran_g", 1:100, ".csv", sep = "") #names of the resultant tables suit_ran <- list() for(i in 1:length(suit_ran_nam)){ suit_ran[[i]] <- stratified(suit_all, "ind", 100) #Random selection of 100 rows from each combination (4000 suitability models) suit_ran[[i]]$ID <- NULL #Erasing ID column write.csv(suit_ran[[i]], file = suit_ran_nam[[i]], row.names=FALSE) #writing each new table print(paste(i, "of", length(suit_ran_nam), sep = " ")) } ############## ###Hierarchical partitioning analyses ############## #Call the needed package if(!require(hier.part)){ install.packages("hier.part") library(hier.part)} #Define work directory setwd("Z:/Marlon_E_Cobos/R/Variab_models/Hier_par/Subsamples") fl <- list.files(pattern="*.csv") #Files to be used list2env(lapply(setNames(fl, make.names(gsub("*.csv$", "", fl))), read.csv), envir = .GlobalEnv) #Calling the data fls<- Filter(function(x) is(x, "data.frame"), mget(ls())) #List of all the dataframes to be analyzed #Create the lists we are going to need ran <- list() #dataframes to be analyzed hpar_ran <- list() #objects containing the analyses results r1_hpar_ran <- list() #objects containing the gofs r2_hpar_ran <- list() #objects containing the independent (I) and J effects r3_hpar_ran <- list() # objects containing the I effects in % #Do the analyses in loop for (i in 1:length (fls)) { ran[[i]] <- as.data.frame(fls[[i]]) ran[[i]]$Suit <- (ran[[i]]$Suit + 1) #Adding 1 to all the Suitability to use gamma family ran[[i]]$Replicates <- as.factor(ran[[i]]$Replicates) #Converting to factors ran[[i]]$GCMs <- as.factor(ran[[i]]$GCMs) #" ran[[i]]$RCPs <- as.factor(ran[[i]]$RCPs) #" ran[[i]]$Parameters <- as.factor(ran[[i]]$Parameters) #" #Hierarchical Partitioning analyses hpar_ran[[i]] <- hier.part(ran[[i]]$Suit, ran[[i]][,3:6], fam = "Gamma", gof = "logLik",barplot = F) r1_hpar_ran[[i]] <- t(as.matrix(hpar_ran[[i]]$gfs)) row.names(r1_hpar_ran[[i]]) <- paste("Goodness of fit values", i) #names of the rows r2_hpar_ran[[i]] <- hpar_ran[[i]]$IJ row.names(r2_hpar_ran[[i]]) <- paste(row.names(r2_hpar_ran[[i]]), i) #names of the rows r3_hpar_ran[[i]] <- t(hpar_ran[[i]]$I.perc) row.names(r3_hpar_ran[[i]]) <- paste("Independent effects", i) #names of the rows print(paste(i, "of", length(fls), sep = " ")) } #Create the final tables r1_hpar_ran <- do.call(rbind, r1_hpar_ran) r2_hpar_ran <- do.call(rbind, r2_hpar_ran) r3_hpar_ran <- do.call(rbind, r3_hpar_ran) #Write these ressults write.csv(r1_hpar_ran, file = "hpar1_allg.csv", eol = "\n", na = "NA", row.names=TRUE) write.csv(r2_hpar_ran, file = "hpar2_allg.csv", eol = "\n", na = "NA", row.names=TRUE) write.csv(r3_hpar_ran, file = "hpar3_allg.csv", eol = "\n", na = "NA", row.names=TRUE) ############## ###Statistical significance test (compare observed values with a null distribution created by randomization) ############## #select data for Signif. test #The closer result to the median of the most important source of variation medians_ind_e <- apply(r3_hpar_ran, 2, median) mod_sel <- row.names(r3_hpar_ran[r3_hpar_ran[,4] >= (medians_ind_e - .15) & r3_hpar_ran[,4] <= (medians_ind_e + .15),]) #Prepare the date, this was done previously, but just in case ran1 <- as.data.frame(suit_ran_g92) #Here the subsample to be used ran1$Suit <- (ran1$Suit + 1) #Adding 1 to all the Suitability to use gamma family ran1$Repliclates <- as.factor(ran1$Replicates) #Converting to factors ran1$GCMs <- as.factor(ran1$GCMs) #" ran1$RCPs <- as.factor(ran[[i]]$RCPs) #" ran1$Parameters <- as.factor(ran1$Parameters) #" #Significance test sig_hpar_ran <- rand.hp(ran1$Suit, ran1[,3:6], fam = "Gamma", gof = "logLik", num.reps = 100) #Get the results as independent tables r1_sig_hpar_ran <- sig_hpar_ran$Irands #Observed and random independent effects r2_sig_hpar_ran <- sig_hpar_ran[[2]] #Simbol of statistical significace (if absent = non significat) r2_sig_hpar_ran[,1:2] <- NULL r2_sig_hpar_ran <- t(r2_sig_hpar_ran) #Write these ressults write.csv(r1_sig_hpar_ran, file = "sig1_hpar_allg.csv", eol = "\n", na = "NA", row.names=TRUE) write.csv(r2_sig_hpar_ran, file = "sig2_hpar_allg.csv", eol = "\n", na = "NA", row.names=TRUE) ############## ###Barplot graphic (customize it as you want) ############## ##Mean and SD values of the Independent effects calculated for the 100 subsamples r3_hpar_ran.means <- apply(r3_hpar_ran[,2:5], 2, mean) names(r3_hpar_ran.means) <- c("Replicates", "GCMs", "RCPs", "Parameters") r3_hpar_ran.sd <- apply(r3_hpar_ran[,2:5], 2, sd) #funtion to graphic bars and error bars error.bar <- function(x, y, upper, lower=upper, length=0.1,...){ if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper)) stop("vectors must be same length") arrows(x,y+upper, x, y-lower, angle=90, code=3, length=length, ...)} ##Figure #Hierarchical partitioning, density plot, and correlation with normality in an only figure jpeg("Hier_par_graph.jpg", width = 174, height = 174, units = "mm", res = 600) #image to be saved par(mar = c(4.5,4.5,0,0.5), cex = 1) barx <- barplot(r3_hpar_ran.means, col = "gray80", ylim = c(0,55), border = "gray30", width = 0.4955, space = 1.25, main = NULL, xlab = "", ylab = "", las = 1) title(xlab = "Independent factors", ylab = "Independent effects (%)", cex.lab = 1.2) error.bar(barx, r3_hpar_ran.means, 1.96*r3_hpar_ran.sd/10) dev.off()