library(“FactoMineR”, lib.loc=“C:/Program Files/R/R-3.2.2/library”) library(“missMDA”, lib.loc=“C:/Program Files/R/R-3.2.2/library”) library(“GenABEL”, lib.loc=“C:/Program Files/R/R-3.2.2/library”) library(“factoextra”, lib.loc=“C:/Program Files/R/R-3.2.2/library”)
#used subsurface.csv data.
##transform data:
KSAT<-rntransform(subsurface$Ksat) CARBON<-rntransform(subsurface$carbon) NITROGEN<-rntransform(subsurface$nitrogen)
SILT<-(subsurface$silt/100) CLAY<-(subsurface$clay)¼ VCSAND<-(subsurface$vcsand)¼ CSAND<-(subsurface$csand)1/3 MSAND<-asin(sqrt(subsurface$msand/100)) FSAND<-(subsurface$fsand)¼ VFSAND<-asin(sqrt(subsurface$vfsand/100))
##sqrt transform nitrogen data
shapiro.test(KSAT) shapiro.test(SILT) shapiro.test(CLAY) shapiro.test(VCSAND) shapiro.test(CSAND) shapiro.test(MSAND) shapiro.test(FSAND) shapiro.test(VFSAND) shapiro.test(CARBON) shapiro.test(NITROGEN)
##bring columns back into the dataset
subsurface2<- subset(subsurface, select = -c(1:11)) subsurface_T<-cbind(subsurface2,KSAT, CARBON, NITROGEN, VCSAND, CSAND, MSAND, FSAND, VFSAND,SILT,CLAY)
##Impute missing values, ncp estimates how many components are need to model the data nb_sub <- estim_ncpPCA(subsurface_T[,5:14],ncp.min=0, ncp.max=4, scale=T) nb_sub <- estim_ncpPCA(subsurface_T[,5:14],ncp.min=0, ncp.max=4, method.cv=“Kfold”,scale=T) nb_sub ##inlcude the estimated # of components here res_impute_sub<- imputePCA(subsurface_T[,5:14],ncp=4) ##can compare the modeled dataset and the original res_impute_sub$completeObs subsurface_T
##combine data with the categories that were excluded from the data to impute earlier categories<-subsurface_T[,1:4] New_subsurface<-cbind(res_impute_sub$completeObs,categories)
##run PCA res.PCA_impute_sub<-PCA(New_subsurface,quali.sup=11:14, scale.unit=TRUE, ncp=4, graph=T)
summary.PCA(res.PCA_impute_sub, nb.dec=2,ncp=3)
##res.PCA_impute ##Results for the Principal Component Analysis (PCA) ##*The results are available in the following objects:
##1 “$eig” “eigenvalues”
##2 “$var” “results for the variables”
##3 “$var$coord” “coord. for the variables”
##4 “$var$cor” “correlations variables - dimensions”
##5 “$var$cos2” “cos2 for the variables”
##6 “$var$contrib” “contributions of the variables”
##7 “$ind” “results for the individuals”
##8 “$ind$coord” “coord. for the individuals”
##9 “$ind$cos2” “cos2 for the individuals”
##10 “$ind$contrib” “contributions of the individuals”
##11 “$quali.sup” “results for the supplementary categorical variables”
##12 “$quali.sup$coord” “coord. for the supplementary categories”
##13 “$quali.sup$v.test” “v-test of the supplementary categories”
##14 “$call” “summary statistics”
##15 “$call$centre” “mean of the variables”
##16 “$call$ecart.type” “standard error of the variables”
##17 “$call$row.w” “weights for the individuals”
##18 “$call$col.w” “weights for the variables”
dimdesc(res.PCA_impute_sub, axes=c(1,2))
res.PCA_impute_sub$eig res.PCA_impute_sub$var res.PCA_impute_sub$var$contrib res.PCA_impute_sub$var$cos2
fviz_contrib(res.PCA_impute_sub, choice = “var”, axes = 1)+labs(title = “Drainage Base Model - Dim1”)+theme(text = element_text(size=17)) fviz_contrib(res.PCA_impute_sub, choice = “var”, axes = 2)+labs(title = “Drainage Base Model - Dim2”)+theme(text = element_text(size=17)) fviz_contrib(res.PCA_impute_sub, choice = “var”, axes = 3) sweep(res.PCA_impute_sub$var$coord,2,sqrt(res.PCA_impute_sub$eig[1:ncol(res.PCA_impute_sub$var$coord),1]),FUN=“/”)
fviz_pca_var(res.PCA_impute_sub, col.var=“cos2”) + scale_color_gradient2(expression(“Cos”“2”), low=“white”, mid=“light green”, high=“dark green”, midpoint=0.5) + theme_minimal()+labs(title = “Drainage Base Model”) fviz_pca_var(res.PCA_impute_sub, col.var=“contrib”,labelsize = 5, jitter = list(what = “label”, width = 0.1, height = 0.1)) + scale_color_gradient2(“Contribution (%)”,low=“white”, mid=“light green”, high=“dark green”, midpoint=0.2) + theme_minimal()+labs(title = “Drainage Base Model”)+theme(text = element_text(size=19))
#DRAINAGE plot.PCA(res.PCA_impute_sub, axes=c(1, 2), label=c(“none”), palette=c(“dark red”,“green”, “orange”,“red”), cex=1.5, habillage=13)
plot.PCA(res.PCA_impute_sub, axes=c(1, 2), choix=c(“var”), label=c(“all”) ,select=“all”, cex.main=1,col.var=“Black”)
plotellipses(res.PCA_impute_sub,level = 0.95, cex=1, lwd=3,keepvar=c(12:13))
############################
##carry out the PCA with data that combines the silt and vfsand fraction as well as the csand and vcsand fractions fines<-subsurface$vfsand+subsurface$silt+subsurface$clay+subsurface$fsand coarsematerial<-subsurface$csand+subsurface$vcsand+subsurface$msand
##transform data:
FINES<-(fines/100) COARSEMATERIAL<-(coarsematerial)½
shapiro.test(FINES) shapiro.test(COARSEMATERIAL)
#add the changed and transformed columns into the dataset subsurface_T2<-cbind(subsurface2, KSAT,CARBON,NITROGEN,COARSEMATERIAL,FINES)
##Impute missing values, ncp estimates how many components are need to model the data nb2_sub <- estim_ncpPCA(subsurface_T2[,5:9],ncp.min=0, ncp.max=4, scale=T) nb2_sub <- estim_ncpPCA(subsurface_T2[,5:9],ncp.min=0, ncp.max=4, method.cv=“Kfold”,scale=T) nb2_sub ##inlcude the estimated # of components here res_impute2_sub<- imputePCA(subsurface_T2[,5:9],ncp=4) ##can compare the modeled dataset and the original res_impute2_sub$completeObs subsurface_T2
##combine data with the categories that were excluded from the data to impute earlier categories2<-subsurface_T2[,1:4] New_subsurface2<-cbind(res_impute2_sub$completeObs,categories2)
##run PCA res.PCA_impute2_sub<-PCA(New_subsurface2,quali.sup=6:9, scale.unit=TRUE, ncp=5, graph=T)
summary.PCA(res.PCA_impute2_sub, nb.dec=2,ncp=3)
##res.PCA_impute ##Results for the Principal Component Analysis (PCA) ##*The results are available in the following objects:
##1 “$eig” “eigenvalues”
##2 “$var” “results for the variables”
##3 “$var$coord” “coord. for the variables”
##4 “$var$cor” “correlations variables - dimensions”
##5 “$var$cos2” “cos2 for the variables”
##6 “$var$contrib” “contributions of the variables”
##7 “$ind” “results for the individuals”
##8 “$ind$coord” “coord. for the individuals”
##9 “$ind$cos2” “cos2 for the individuals”
##10 “$ind$contrib” “contributions of the individuals”
##11 “$quali.sup” “results for the supplementary categorical variables”
##12 “$quali.sup$coord” “coord. for the supplementary categories”
##13 “$quali.sup$v.test” “v-test of the supplementary categories”
##14 “$call” “summary statistics”
##15 “$call$centre” “mean of the variables”
##16 “$call$ecart.type” “standard error of the variables”
##17 “$call$row.w” “weights for the individuals”
##18 “$call$col.w” “weights for the variables”
res.PCA_impute2_sub$eig res.PCA_impute2_sub$var res.PCA_impute2_sub$var$contrib res.PCA_impute2_sub$var$cos2
sweep(res.PCA_impute2_sub$var$coord,2,sqrt(res.PCA_impute2_sub$eig[1:ncol(res.PCA_impute2_sub$var$coord),1]),FUN=“/”)
fviz_contrib(res.PCA_impute2_sub, choice = “var”, axes = 1)+labs(title = “Drainage Adjusted Model - Dim1”)+theme(text = element_text(size=17)) fviz_contrib(res.PCA_impute2_sub, choice = “var”, axes = 2)+labs(title = “Drainage Adjusted Model - Dim2”)+theme(text = element_text(size=17)) fviz_contrib(res.PCA_impute2_sub, choice = “var”, axes = 3) sweep(res.PCA_impute2_sub$var$coord,2,sqrt(res.PCA_impute2_sub$eig[1:ncol(res.PCA_impute2_sub$var$coord),1]),FUN=“/”)
fviz_pca_var(res.PCA_impute2_sub, col.var=“cos2”) + scale_color_gradient2(expression(“Cos”“2”), low=“white”, mid=“light green”, high=“dark green”, midpoint=0.4) + theme_minimal()+labs(title = “Drainage Adjusted Model”)+theme(text = element_text(size=19)) fviz_pca_var(res.PCA_impute2_sub, col.var=“contrib”,labelsize = 5, jitter = list(what = “label”, width = 0.1, height = 0.1)) + scale_color_gradient2(“Contribution (%)”,low=“white”, mid=“light green”, high=“dark green”, midpoint=0.2) + theme_minimal()+labs(title = “Drainage Adjusted Model”)+theme(text = element_text(size=19))
#DRAINAGE
plot.PCA(res.PCA_impute2_sub, axes=c(1, 2), choix=c(“var”), label=c(“all”) ,select=“all”, cex.main=1,col.var=“Black”)
plotellipses(res.PCA_impute2_sub,level = 0.95, cex=1, lwd=3,keepvar=c(7:8))