install these packages to start

library(“FactoMineR”, lib.loc=“C:/Program Files/R/R-3.2.2/library”) library(“missMDA”, lib.loc=“C:/Program Files/R/R-3.2.2/library”) library(“GenABEL”, lib.loc=“C:/Program Files/R/R-3.2.2/library”) library(“factoextra”, lib.loc=“C:/Program Files/R/R-3.2.2/library”)

#used subsurface.csv data.

##transform data:

rank transform Ksat and carbon

KSAT<-rntransform(subsurface$Ksat) CARBON<-rntransform(subsurface$carbon) NITROGEN<-rntransform(subsurface$nitrogen)

arcsinsqrt transform the texture data

SILT<-(subsurface$silt/100) CLAY<-(subsurface$clay)¼ VCSAND<-(subsurface$vcsand)¼ CSAND<-(subsurface$csand)1/3 MSAND<-asin(sqrt(subsurface$msand/100)) FSAND<-(subsurface$fsand)¼ VFSAND<-asin(sqrt(subsurface$vfsand/100))

##sqrt transform nitrogen data

check for normality

shapiro.test(KSAT) shapiro.test(SILT) shapiro.test(CLAY) shapiro.test(VCSAND) shapiro.test(CSAND) shapiro.test(MSAND) shapiro.test(FSAND) shapiro.test(VFSAND) shapiro.test(CARBON) shapiro.test(NITROGEN)

##bring columns back into the dataset

subsurface2<- subset(subsurface, select = -c(1:11)) subsurface_T<-cbind(subsurface2,KSAT, CARBON, NITROGEN, VCSAND, CSAND, MSAND, FSAND, VFSAND,SILT,CLAY)

##Impute missing values, ncp estimates how many components are need to model the data nb_sub <- estim_ncpPCA(subsurface_T[,5:14],ncp.min=0, ncp.max=4, scale=T) nb_sub <- estim_ncpPCA(subsurface_T[,5:14],ncp.min=0, ncp.max=4, method.cv=“Kfold”,scale=T) nb_sub ##inlcude the estimated # of components here res_impute_sub<- imputePCA(subsurface_T[,5:14],ncp=4) ##can compare the modeled dataset and the original res_impute_sub$completeObs subsurface_T

##combine data with the categories that were excluded from the data to impute earlier categories<-subsurface_T[,1:4] New_subsurface<-cbind(res_impute_sub$completeObs,categories)

##run PCA res.PCA_impute_sub<-PCA(New_subsurface,quali.sup=11:14, scale.unit=TRUE, ncp=4, graph=T)

summary.PCA(res.PCA_impute_sub, nb.dec=2,ncp=3)

##res.PCA_impute ##Results for the Principal Component Analysis (PCA) ##*The results are available in the following objects:

name description

##1 “$eig” “eigenvalues”
##2 “$var” “results for the variables”
##3 “$var$coord” “coord. for the variables”
##4 “$var$cor” “correlations variables - dimensions”
##5 “$var$cos2” “cos2 for the variables”
##6 “$var$contrib” “contributions of the variables”
##7 “$ind” “results for the individuals”
##8 “$ind$coord” “coord. for the individuals”
##9 “$ind$cos2” “cos2 for the individuals”
##10 “$ind$contrib” “contributions of the individuals”
##11 “$quali.sup” “results for the supplementary categorical variables” ##12 “$quali.sup$coord” “coord. for the supplementary categories”
##13 “$quali.sup$v.test” “v-test of the supplementary categories”
##14 “$call” “summary statistics”
##15 “$call$centre” “mean of the variables”
##16 “$call$ecart.type” “standard error of the variables”
##17 “$call$row.w” “weights for the individuals”
##18 “$call$col.w” “weights for the variables”

dimdesc(res.PCA_impute_sub, axes=c(1,2))

res.PCA_impute_sub$eig res.PCA_impute_sub$var res.PCA_impute_sub$var$contrib res.PCA_impute_sub$var$cos2

fviz_contrib(res.PCA_impute_sub, choice = “var”, axes = 1)+labs(title = “Drainage Base Model - Dim1”)+theme(text = element_text(size=17)) fviz_contrib(res.PCA_impute_sub, choice = “var”, axes = 2)+labs(title = “Drainage Base Model - Dim2”)+theme(text = element_text(size=17)) fviz_contrib(res.PCA_impute_sub, choice = “var”, axes = 3) sweep(res.PCA_impute_sub$var$coord,2,sqrt(res.PCA_impute_sub$eig[1:ncol(res.PCA_impute_sub$var$coord),1]),FUN=“/”)

fviz_pca_var(res.PCA_impute_sub, col.var=“cos2”) + scale_color_gradient2(expression(“Cos”“2”), low=“white”, mid=“light green”, high=“dark green”, midpoint=0.5) + theme_minimal()+labs(title = “Drainage Base Model”) fviz_pca_var(res.PCA_impute_sub, col.var=“contrib”,labelsize = 5, jitter = list(what = “label”, width = 0.1, height = 0.1)) + scale_color_gradient2(“Contribution (%)”,low=“white”, mid=“light green”, high=“dark green”, midpoint=0.2) + theme_minimal()+labs(title = “Drainage Base Model”)+theme(text = element_text(size=19))

#DRAINAGE plot.PCA(res.PCA_impute_sub, axes=c(1, 2), label=c(“none”), palette=c(“dark red”,“green”, “orange”,“red”), cex=1.5, habillage=13)

plot.PCA(res.PCA_impute_sub, axes=c(1, 2), choix=c(“var”), label=c(“all”) ,select=“all”, cex.main=1,col.var=“Black”)

plotellipses(res.PCA_impute_sub,level = 0.95, cex=1, lwd=3,keepvar=c(12:13))

############################

##carry out the PCA with data that combines the silt and vfsand fraction as well as the csand and vcsand fractions fines<-subsurface$vfsand+subsurface$silt+subsurface$clay+subsurface$fsand coarsematerial<-subsurface$csand+subsurface$vcsand+subsurface$msand

##transform data:

arcsinsqrt transform the texture data

FINES<-(fines/100) COARSEMATERIAL<-(coarsematerial)½

shapiro.test(FINES) shapiro.test(COARSEMATERIAL)

#add the changed and transformed columns into the dataset subsurface_T2<-cbind(subsurface2, KSAT,CARBON,NITROGEN,COARSEMATERIAL,FINES)

##Impute missing values, ncp estimates how many components are need to model the data nb2_sub <- estim_ncpPCA(subsurface_T2[,5:9],ncp.min=0, ncp.max=4, scale=T) nb2_sub <- estim_ncpPCA(subsurface_T2[,5:9],ncp.min=0, ncp.max=4, method.cv=“Kfold”,scale=T) nb2_sub ##inlcude the estimated # of components here res_impute2_sub<- imputePCA(subsurface_T2[,5:9],ncp=4) ##can compare the modeled dataset and the original res_impute2_sub$completeObs subsurface_T2

##combine data with the categories that were excluded from the data to impute earlier categories2<-subsurface_T2[,1:4] New_subsurface2<-cbind(res_impute2_sub$completeObs,categories2)

##run PCA res.PCA_impute2_sub<-PCA(New_subsurface2,quali.sup=6:9, scale.unit=TRUE, ncp=5, graph=T)

summary.PCA(res.PCA_impute2_sub, nb.dec=2,ncp=3)

##res.PCA_impute ##Results for the Principal Component Analysis (PCA) ##*The results are available in the following objects:

name description

##1 “$eig” “eigenvalues”
##2 “$var” “results for the variables”
##3 “$var$coord” “coord. for the variables”
##4 “$var$cor” “correlations variables - dimensions”
##5 “$var$cos2” “cos2 for the variables”
##6 “$var$contrib” “contributions of the variables”
##7 “$ind” “results for the individuals”
##8 “$ind$coord” “coord. for the individuals”
##9 “$ind$cos2” “cos2 for the individuals”
##10 “$ind$contrib” “contributions of the individuals”
##11 “$quali.sup” “results for the supplementary categorical variables” ##12 “$quali.sup$coord” “coord. for the supplementary categories”
##13 “$quali.sup$v.test” “v-test of the supplementary categories”
##14 “$call” “summary statistics”
##15 “$call$centre” “mean of the variables”
##16 “$call$ecart.type” “standard error of the variables”
##17 “$call$row.w” “weights for the individuals”
##18 “$call$col.w” “weights for the variables”

res.PCA_impute2_sub$eig res.PCA_impute2_sub$var res.PCA_impute2_sub$var$contrib res.PCA_impute2_sub$var$cos2

sweep(res.PCA_impute2_sub$var$coord,2,sqrt(res.PCA_impute2_sub$eig[1:ncol(res.PCA_impute2_sub$var$coord),1]),FUN=“/”)

fviz_contrib(res.PCA_impute2_sub, choice = “var”, axes = 1)+labs(title = “Drainage Adjusted Model - Dim1”)+theme(text = element_text(size=17)) fviz_contrib(res.PCA_impute2_sub, choice = “var”, axes = 2)+labs(title = “Drainage Adjusted Model - Dim2”)+theme(text = element_text(size=17)) fviz_contrib(res.PCA_impute2_sub, choice = “var”, axes = 3) sweep(res.PCA_impute2_sub$var$coord,2,sqrt(res.PCA_impute2_sub$eig[1:ncol(res.PCA_impute2_sub$var$coord),1]),FUN=“/”)

fviz_pca_var(res.PCA_impute2_sub, col.var=“cos2”) + scale_color_gradient2(expression(“Cos”“2”), low=“white”, mid=“light green”, high=“dark green”, midpoint=0.4) + theme_minimal()+labs(title = “Drainage Adjusted Model”)+theme(text = element_text(size=19)) fviz_pca_var(res.PCA_impute2_sub, col.var=“contrib”,labelsize = 5, jitter = list(what = “label”, width = 0.1, height = 0.1)) + scale_color_gradient2(“Contribution (%)”,low=“white”, mid=“light green”, high=“dark green”, midpoint=0.2) + theme_minimal()+labs(title = “Drainage Adjusted Model”)+theme(text = element_text(size=19))

#DRAINAGE

plot.PCA(res.PCA_impute2_sub, axes=c(1, 2), choix=c(“var”), label=c(“all”) ,select=“all”, cex.main=1,col.var=“Black”)

plotellipses(res.PCA_impute2_sub,level = 0.95, cex=1, lwd=3,keepvar=c(7:8))