install these packages to start

library(“FactoMineR”, lib.loc=“C:/Program Files/R/R-3.2.2/library”) library(“missMDA”, lib.loc=“C:/Program Files/R/R-3.2.2/library”) library(“GenABEL”, lib.loc=“C:/Program Files/R/R-3.2.2/library”) library(“factoextra”, lib.loc=“C:/Program Files/R/R-3.2.2/library”)

used surface.csv data.

transform data:

rank transform Kunsat, carbon, and bulk density

KUNSAT<-log10(redo_surface$Kunsat) CARBON<-rntransform(redo_surface$carbon) BULKDENSITY<-rntransform(redo_surface$bulkdensity)

arcsinsqrt transform the texture data

CSAND<-((redo_surface$csand/100)1/3) MSAND<-asin(sqrt(redo_surface$msand/100)) SILT<-redo_surface$silt/10 SAND<-asin(sqrt(redo_surface$sand/100))

sqrt or sqrt(sqrt()) transform nitrogen and texture data

CLAY<-sqrt(sqrt(redo_surface$clay/100)) NITROGEN<-rntransform(redo_surface$nitrogen) VCSAND<-sqrt(sqrt(redo_surface$vcsand/100)) FSAND<-asin(sqrt(redo_surface$fsand/100)) VFSAND<-asin(sqrt(redo_surface$vfsand/100))

check for normality

shapiro.test(KUNSAT) shapiro.test(SILT) shapiro.test(CLAY) shapiro.test(VCSAND) shapiro.test(CSAND) shapiro.test(MSAND) shapiro.test(FSAND) shapiro.test(VFSAND) shapiro.test(CARBON) shapiro.test(NITROGEN) shapiro.test(BULKDENSITY)

bring columns back into the dataset

surfacecat<- subset(redo_surface, select = -c(1:17)) surface_T<-cbind(surfacecat,KUNSAT,CARBON,NITROGEN,BULKDENSITY,VCSAND,CSAND,MSAND,FSAND,VFSAND,SILT,CLAY)

Impute missing values, ncp estimates how many components are need to model the data

nb <- estim_ncpPCA(surface_T[,5:15],ncp.min=0, ncp.max=4, scale=T) nb <- estim_ncpPCA(surface_T[,5:15],ncp.min=0, ncp.max=4, method.cv=“Kfold”,scale=T) nb

inlcude the estimated # of components here

res_impute<- imputePCA(surface_T[,5:15],ncp=3)

can compare the modeled dataset and the original

res_impute$completeObs surface_T

combine data with the categories that were excluded from the data to impute earlier

surfacecat2<-surface_T[,1:4] New_surface<-cbind(res_impute$completeObs,surfacecat2)

run PCA

res.PCA_impute<-PCA(New_surface,quali.sup=13:16, scale.unit=TRUE, ncp=5, graph=T) summary.PCA(res.PCA_impute, nb.dec=2,ncp=3)

res.PCA_impute

Results for the Principal Component Analysis (PCA)

*The results are available in the following objects:

name description

1 “$eig” “eigenvalues

2 ”$var" “results for the variables”

3 “$var$coord” “coord. for the variables”

4 “$var$cor” “correlations variables - dimensions”

5 “$var$cos2” “cos2 for the variables”

6 “$var$contrib” “contributions of the variables”

7 “$ind” “results for the individuals”

8 “$ind$coord” “coord. for the individuals”

9 “$ind$cos2” “cos2 for the individuals”

10 “$ind$contrib” “contributions of the individuals”

11 “$quali.sup” “results for the supplementary categorical variables”

12 “$quali.sup$coord” “coord. for the supplementary categories”

13 “$quali.sup$v.test” “v-test of the supplementary categories”

14 “$call” “summary statistics”

15 “$call$centre” “mean of the variables”

16 “$call$ecart.type” “standard error of the variables”

17 “$call$row.w” “weights for the individuals”

18 “$call$col.w” “weights for the variables”

res.PCA_impute$eig res.PCA_impute$var res.PCA_impute$var$cor res.PCA_impute$var$contrib res.PCA_impute$ind res.PCA_impute$call dimdesc(res.PCA_impute, axes=c(1,2)) res.PCA_impute$eig res.PCA_impute$var res.PCA_impute$var$contrib res.PCA_impute$var$cos2 fviz_contrib(res.PCA_impute, choice = “var”, axes = 1)+labs(title = “Infiltration Base Model - Dim1”)+theme(text = element_text(size=17)) fviz_contrib(res.PCA_impute, choice = “var”, axes = 2)+labs(title = “Infiltration Base Model - Dim2”)+theme(text = element_text(size=17)) sweep(res.PCA_impute$var$coord,2,sqrt(res.PCA_impute$eig[1:ncol(res.PCA_impute$var$coord),1]),FUN=“/”) fviz_pca_var(res.PCA_impute, col.var=“cos2”) + scale_color_gradient2(expression(“Cos”“2”), low=“white”, mid=“light green”, high=“dark green”, midpoint=0.5) + theme_minimal()+labs(title = “Infiltration Base Model”) fviz_pca_var(res.PCA_impute, col.var=“contrib”,labelsize = 5, jitter = list(what = “label”, width = 0.05, height = 0.2)) + scale_color_gradient2(“Contribution (%)”,low=“white”, mid=“light green”, high=“dark green”, midpoint=0.5) + theme_minimal()+labs(title = “Infiltration Base Model”)+theme(text = element_text(size=19))

INFILTRATION

plot.PCA(res.PCA_impute, axes=c(1, 2), choix=c(“var”), label=c(“all”) ,select=“all”, cex.main=1,col.var=“Black”) plotellipses(res.PCA_impute,level = 0.95, cex=1, lwd=3,keepvar=c(13,15))

carry out the PCA with data that combines the silt and vfsand fraction as well as the csand and vcsand fractions

fines<-redo_surface$vfsand+redo_surface$fsand+redo_surface$silt+redo_surface$clay coarsematerial<-redo_surface$csand+redo_surface$vcsand+redo_surface$msand

transform data:

arcsinsqrt transform the texture data

FINES<-(sqrt(fines/100)) COARSEMATERIAL<-(sqrt(coarsematerial/100)) shapiro.test(FINES) shapiro.test(COARSEMATERIAL)

add the changed and transformed columns into the dataset

surface_T2<-cbind(surfacecat2, KUNSAT,CARBON, NITROGEN, BULKDENSITY,COARSEMATERIAL,FINES)

Impute missing values, ncp estimates how many components are need to model the data

nb2 <- estim_ncpPCA(surface_T2[,5:10],ncp.min=0, ncp.max=4, scale=T) nb2 <- estim_ncpPCA(surface_T2[,5:10],ncp.min=0, ncp.max=4, method.cv=“Kfold”,scale=T) nb2

inlcude the estimated # of components here

res_impute2<- imputePCA(surface_T2[,5:10],ncp=2)

can compare the modeled dataset and the original

res_impute2$completeObs surface_T2

combine data with the categories that were excluded from the data to impute earlier

categories2<-surface_T2[,1:4] New_surface2<-cbind(res_impute2$completeObs,categories2)

run PCA

res.PCA_impute2<-PCA(New_surface2,quali.sup=7:10, scale.unit=TRUE, ncp=5, graph=T) summary.PCA(res.PCA_impute2, nb.dec=2,ncp=3)

res.PCA_impute

res.PCA_impute2$eig res.PCA_impute2$var res.PCA_impute2$var$cor res.PCA_impute2$var$contrib fviz_contrib(res.PCA_impute2, choice = “var”, axes = 1)+labs(title = “Infiltration Adjusted Model - Dim1”)+theme(text = element_text(size=17)) fviz_contrib(res.PCA_impute2, choice = “var”, axes = 2)+labs(title = “Infiltration Adjusted Model - Dim2”)+theme(text = element_text(size=17)) sweep(res.PCA_impute2$var$coord,2,sqrt(res.PCA_impute2$eig[1:ncol(res.PCA_impute2$var$coord),1]),FUN=“/”) fviz_pca_var(res.PCA_impute2, col.var=“cos2”) + scale_color_gradient2(expression(“Cos”“2”), low=“white”, mid=“light green”, high=“dark green”, midpoint=0.5) + theme_minimal()+labs(title = “Infiltration Adjusted Model”) fviz_pca_var(res.PCA_impute2, col.var=“contrib”,labelsize = 5, jitter = list(what = “label”, width = 0.1, height = 0.25)) + scale_color_gradient2(“Contribution (%)”,low=“white”, mid=“light green”, high=“dark green”, midpoint=0.5) + theme_minimal()+labs(title = “Infiltration Adjusted Model”)+theme(text = element_text(size=19))

INFILTRATION

plot.PCA(res.PCA_impute2, axes=c(1, 2), choix=c(“var”), label=c(“all”) ,select=“all”, cex.main=1,col.var=“Black”) plotellipses(res.PCA_impute2,level = 0.95, cex=1, lwd=3,keepvar=c(9,7))