library(rpart)
## Warning: package 'rpart' was built under R version 3.3.3
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.3.3
setwd("//aa.ad.epa.gov/ORD/CIN/USERS/MAIN/L-P/LSchifma/Net MyDocuments/PROJECTS/Schifman - Black Carbon/BC_R_Analysis")

CART_Layer1 <- read.csv("//aa.ad.epa.gov/ORD/CIN/USERS/MAIN/L-P/LSchifma/Net MyDocuments/PROJECTS/Schifman - Black Carbon/BC_R_Analysis/CART_Layer1.csv")
names(CART_Layer1)
##  [1] "NDVI_Avg"               "Elevation"             
##  [3] "Aspect"                 "Curvature"             
##  [5] "Slope_Percent"          "CityAge"               
##  [7] "Number_FuelDealers"     "Number_NewCarDealers"  
##  [9] "Number_UsedCarDealers"  "All_CarDealers"        
## [11] "Avg_AADT_1kmBuf"        "Avg_AADT_5kmBuf"       
## [13] "Avg_AADT_10kmBuf"       "Avg_AADT_15kmBuf"      
## [15] "AllFac_1m"              "AllFac_5km"            
## [17] "AllFac_10km"            "AllFac_15lm"           
## [19] "Unsaturated_K2cm_cmhr"  "Zinc1_ppm"             
## [21] "Copper1_ppm"            "Total_HorizonDepth_cm1"
## [23] "M3_CEC1"                "pred_Sand1"            
## [25] "pred_Silt1"             "pred_Clay1"            
## [27] "pHpH_Res_entry1"        "BC_mgg1"
CART_Layer1_2<-CART_Layer1[,-c(22)]
names(CART_Layer1_2)
##  [1] "NDVI_Avg"              "Elevation"            
##  [3] "Aspect"                "Curvature"            
##  [5] "Slope_Percent"         "CityAge"              
##  [7] "Number_FuelDealers"    "Number_NewCarDealers" 
##  [9] "Number_UsedCarDealers" "All_CarDealers"       
## [11] "Avg_AADT_1kmBuf"       "Avg_AADT_5kmBuf"      
## [13] "Avg_AADT_10kmBuf"      "Avg_AADT_15kmBuf"     
## [15] "AllFac_1m"             "AllFac_5km"           
## [17] "AllFac_10km"           "AllFac_15lm"          
## [19] "Unsaturated_K2cm_cmhr" "Zinc1_ppm"            
## [21] "Copper1_ppm"           "M3_CEC1"              
## [23] "pred_Sand1"            "pred_Silt1"           
## [25] "pred_Clay1"            "pHpH_Res_entry1"      
## [27] "BC_mgg1"
cor(CART_Layer1_2)->M
corrplot(M, method="ellipse")

toss<-CART_Layer1_2[,c(21,22,15,16,18,7,8,9,10,26,6,2)]
names(toss)
##  [1] "Copper1_ppm"           "M3_CEC1"              
##  [3] "AllFac_1m"             "AllFac_5km"           
##  [5] "AllFac_15lm"           "Number_FuelDealers"   
##  [7] "Number_NewCarDealers"  "Number_UsedCarDealers"
##  [9] "All_CarDealers"        "pHpH_Res_entry1"      
## [11] "CityAge"               "Elevation"
CART_Layer1_2<-CART_Layer1_2[,-c(21,22,15,16,18,7,8,9,10,26,6,2)]
names(CART_Layer1_2)
##  [1] "NDVI_Avg"              "Aspect"               
##  [3] "Curvature"             "Slope_Percent"        
##  [5] "Avg_AADT_1kmBuf"       "Avg_AADT_5kmBuf"      
##  [7] "Avg_AADT_10kmBuf"      "Avg_AADT_15kmBuf"     
##  [9] "AllFac_10km"           "Unsaturated_K2cm_cmhr"
## [11] "Zinc1_ppm"             "pred_Sand1"           
## [13] "pred_Silt1"            "pred_Clay1"           
## [15] "BC_mgg1"
spac.tree1 = rpart(BC_mgg1 ~ ., data = CART_Layer1_2)
names(spac.tree1)
##  [1] "frame"               "where"               "call"               
##  [4] "terms"               "cptable"             "method"             
##  [7] "parms"               "control"             "functions"          
## [10] "numresp"             "splits"              "variable.importance"
## [13] "y"                   "ordered"
spac.tree1$cptable
##           CP nsplit rel error   xerror      xstd
## 1 0.16589667      0 1.0000000 1.013746 0.4035955
## 2 0.09163935      1 0.8341033 1.322275 0.4356013
## 3 0.04161278      2 0.7424640 1.167651 0.4200402
## 4 0.01965938      4 0.6592384 1.252901 0.4410452
## 5 0.01470421      5 0.6395790 1.289290 0.4432701
## 6 0.01000000      6 0.6248748 1.273485 0.4433764
rsq.rpart(spac.tree1)
## 
## Regression tree:
## rpart(formula = BC_mgg1 ~ ., data = CART_Layer1_2)
## 
## Variables actually used in tree construction:
## [1] Avg_AADT_10kmBuf      Avg_AADT_1kmBuf       NDVI_Avg             
## [4] pred_Clay1            Unsaturated_K2cm_cmhr Zinc1_ppm            
## 
## Root node error: 19402/101 = 192.1
## 
## n= 101 
## 
##         CP nsplit rel error xerror    xstd
## 1 0.165897      0   1.00000 1.0137 0.40360
## 2 0.091639      1   0.83410 1.3223 0.43560
## 3 0.041613      2   0.74246 1.1677 0.42004
## 4 0.019659      4   0.65924 1.2529 0.44105
## 5 0.014704      5   0.63958 1.2893 0.44327
## 6 0.010000      6   0.62487 1.2735 0.44338

printcp(spac.tree1)
## 
## Regression tree:
## rpart(formula = BC_mgg1 ~ ., data = CART_Layer1_2)
## 
## Variables actually used in tree construction:
## [1] Avg_AADT_10kmBuf      Avg_AADT_1kmBuf       NDVI_Avg             
## [4] pred_Clay1            Unsaturated_K2cm_cmhr Zinc1_ppm            
## 
## Root node error: 19402/101 = 192.1
## 
## n= 101 
## 
##         CP nsplit rel error xerror    xstd
## 1 0.165897      0   1.00000 1.0137 0.40360
## 2 0.091639      1   0.83410 1.3223 0.43560
## 3 0.041613      2   0.74246 1.1677 0.42004
## 4 0.019659      4   0.65924 1.2529 0.44105
## 5 0.014704      5   0.63958 1.2893 0.44327
## 6 0.010000      6   0.62487 1.2735 0.44338
plot(spac.tree1,compress=T, branch=0.25,margin=0.1,main="Horizon 1")
text(spac.tree1, use.n=T,minlength=0.5)

spac.tree1$cptable[which.min(spac.tree1$cptable[,"xerror"]),"CP"]
## [1] 0.1658967
prune(spac.tree1, cp = 0.015)->spac.tree1_2

rsq.rpart(spac.tree1_2)
## 
## Regression tree:
## rpart(formula = BC_mgg1 ~ ., data = CART_Layer1_2)
## 
## Variables actually used in tree construction:
## [1] Avg_AADT_10kmBuf      NDVI_Avg              pred_Clay1           
## [4] Unsaturated_K2cm_cmhr Zinc1_ppm            
## 
## Root node error: 19402/101 = 192.1
## 
## n= 101 
## 
##         CP nsplit rel error xerror    xstd
## 1 0.165897      0   1.00000 1.0137 0.40360
## 2 0.091639      1   0.83410 1.3223 0.43560
## 3 0.041613      2   0.74246 1.1677 0.42004
## 4 0.019659      4   0.65924 1.2529 0.44105
## 5 0.015000      5   0.63958 1.2893 0.44327

printcp(spac.tree1_2)
## 
## Regression tree:
## rpart(formula = BC_mgg1 ~ ., data = CART_Layer1_2)
## 
## Variables actually used in tree construction:
## [1] Avg_AADT_10kmBuf      NDVI_Avg              pred_Clay1           
## [4] Unsaturated_K2cm_cmhr Zinc1_ppm            
## 
## Root node error: 19402/101 = 192.1
## 
## n= 101 
## 
##         CP nsplit rel error xerror    xstd
## 1 0.165897      0   1.00000 1.0137 0.40360
## 2 0.091639      1   0.83410 1.3223 0.43560
## 3 0.041613      2   0.74246 1.1677 0.42004
## 4 0.019659      4   0.65924 1.2529 0.44105
## 5 0.015000      5   0.63958 1.2893 0.44327
plot(spac.tree1_2,compress=T, branch=0.25,margin=0.1,main="Horizon 1")
text(spac.tree1_2, use.n=T,minlength=0.5)


library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3

library(scales)
## Warning: package 'scales' was built under R version 3.3.3
Pho_unsat<-(c(8.9909,
              3.3000,
              15.0314,
              17.0298,
              5.6000,
              4.0000,
              6.4159,
              14.5820,
              14.5820))

Pho_BC<-(c(8.0460,
           15.6900,
           12.5500,
           8.9360,
           18.3100,
           22.4500,
           13.0400,
           13.9000,
           17.6100))

Pho<-as.data.frame(cbind(Pho_unsat,Pho_BC))

CART_Layer1$K_mmhr<-CART_Layer1$Unsaturated_K2cm_cmhr*10

p1<-ggplot(CART_Layer1, aes(x=BC_mgg1, y=K_mmhr)) 
p1 +
  geom_point(aes(size=BC_mgg1, col=K_mmhr)) +
  theme_bw() +
  labs(y="Infiltration Rate (mm/hr)", 
       x="Black Carbon (mg/g)", 
          color = expression(K[unsat]~ (mm~hr^{-1})),
       size="Black Carbon (mg/g)") +
  scale_color_gradientn(colours = c("red","light blue","dark blue"), 
                         values = rescale(c(0,13,150)),
                         guide = "colorbar", limits=c(0,190))+
  geom_point(data = Pho, mapping = aes(x=Pho_BC, y= Pho_unsat, alpha=0.15),show_guide=FALSE)
## Warning: `show_guide` has been deprecated. Please use `show.legend`
## instead.

ggsave(filename="Figure4.tiff", dpi=600)
## Saving 7 x 5 in image
#########################################LAYER 2
CART_Layer2 <- read.csv("//aa.ad.epa.gov/ORD/CIN/USERS/MAIN/L-P/LSchifma/Net MyDocuments/PROJECTS/Schifman - Black Carbon/BC_R_Analysis/CART_Layer2.csv")

names(CART_Layer2)
##  [1] "City1"                         "NDVI_Avg"                     
##  [3] "Elevation"                     "Aspect"                       
##  [5] "Curvature"                     "Slope_Percent"                
##  [7] "CityAge"                       "Number_FuelDealers"           
##  [9] "Number_NewCarDealers"          "Number_UsedCarDealers"        
## [11] "All_CarDealers"                "Avg_AADT_1kmBuf"              
## [13] "Avg_AADT_5kmBuf"               "Avg_AADT_10kmBuf"             
## [15] "Avg_AADT_15kmBuf"              "JoinCount_Ambient_1km"        
## [17] "JoinCount_Ambient_5km"         "JoinCount_Ambient_10km"       
## [19] "JoinCount_Ambient_15km"        "JoinCount_FacilitesOther_1km" 
## [21] "JoinCount_FacilitesOther_5km"  "JoinCount_FacilitesOther_10km"
## [23] "JoinCount_FacilitesOther_15km" "JoinCount_PowerPlant_1km"     
## [25] "JoinCount_PowerPlant_5km"      "JoinCount_PowerPlant_10km"    
## [27] "JoinCount_PowerPlant_15km"     "Unsaturated_K2cm_cmhr"        
## [29] "Zinc2_ppm"                     "Copper2_ppm"                  
## [31] "Total_HorizonDepth_cm2"        "M3_CEC2"                      
## [33] "pred_Sand2"                    "pred_Silt2"                   
## [35] "pred_Clay2"                    "pHpH_Res_entry2"              
## [37] "SOM_mgg2"                      "BC_mgg2"                      
## [39] "IC_mgg2"                       "BCfraction2"
CART_Layer1$Total_HorizonDepth_cm1->hzdepth1
cbind(CART_Layer2, hzdepth1)->CART_Layer2_2
names(CART_Layer2_2) 
##  [1] "City1"                         "NDVI_Avg"                     
##  [3] "Elevation"                     "Aspect"                       
##  [5] "Curvature"                     "Slope_Percent"                
##  [7] "CityAge"                       "Number_FuelDealers"           
##  [9] "Number_NewCarDealers"          "Number_UsedCarDealers"        
## [11] "All_CarDealers"                "Avg_AADT_1kmBuf"              
## [13] "Avg_AADT_5kmBuf"               "Avg_AADT_10kmBuf"             
## [15] "Avg_AADT_15kmBuf"              "JoinCount_Ambient_1km"        
## [17] "JoinCount_Ambient_5km"         "JoinCount_Ambient_10km"       
## [19] "JoinCount_Ambient_15km"        "JoinCount_FacilitesOther_1km" 
## [21] "JoinCount_FacilitesOther_5km"  "JoinCount_FacilitesOther_10km"
## [23] "JoinCount_FacilitesOther_15km" "JoinCount_PowerPlant_1km"     
## [25] "JoinCount_PowerPlant_5km"      "JoinCount_PowerPlant_10km"    
## [27] "JoinCount_PowerPlant_15km"     "Unsaturated_K2cm_cmhr"        
## [29] "Zinc2_ppm"                     "Copper2_ppm"                  
## [31] "Total_HorizonDepth_cm2"        "M3_CEC2"                      
## [33] "pred_Sand2"                    "pred_Silt2"                   
## [35] "pred_Clay2"                    "pHpH_Res_entry2"              
## [37] "SOM_mgg2"                      "BC_mgg2"                      
## [39] "IC_mgg2"                       "BCfraction2"                  
## [41] "hzdepth1"
toss<-CART_Layer2_2[,c(1,37,39,40,34,8,9,10,12,13,15,16,17,18,19,22,20,21,23)]
names(toss)
##  [1] "City1"                         "SOM_mgg2"                     
##  [3] "IC_mgg2"                       "BCfraction2"                  
##  [5] "pred_Silt2"                    "Number_FuelDealers"           
##  [7] "Number_NewCarDealers"          "Number_UsedCarDealers"        
##  [9] "Avg_AADT_1kmBuf"               "Avg_AADT_5kmBuf"              
## [11] "Avg_AADT_15kmBuf"              "JoinCount_Ambient_1km"        
## [13] "JoinCount_Ambient_5km"         "JoinCount_Ambient_10km"       
## [15] "JoinCount_Ambient_15km"        "JoinCount_FacilitesOther_10km"
## [17] "JoinCount_FacilitesOther_1km"  "JoinCount_FacilitesOther_5km" 
## [19] "JoinCount_FacilitesOther_15km"
CART_Layer2_2<-CART_Layer2_2[,-c(1,37,39,40,34,8,9,10,12,13,15,16,17,18,19,22,20,21,23)]

cor(CART_Layer2_2)->M2
corrplot(M2, method="ellipse")

spac.tree2 = rpart(BC_mgg2 ~ ., data = CART_Layer2_2)
names(spac.tree2)
##  [1] "frame"               "where"               "call"               
##  [4] "terms"               "cptable"             "method"             
##  [7] "parms"               "control"             "functions"          
## [10] "numresp"             "splits"              "variable.importance"
## [13] "y"                   "ordered"
spac.tree2$cptable[1:5, ]
##           CP nsplit rel error    xerror      xstd
## 1 0.22272114      0 1.0000000 1.0376462 0.2181727
## 2 0.11820858      1 0.7772789 0.9311949 0.2366134
## 3 0.01978570      2 0.6590703 0.9359502 0.2505813
## 4 0.01051655      3 0.6392846 0.9412725 0.2471113
## 5 0.01000000      5 0.6182515 0.9601708 0.2467840
rsq.rpart(spac.tree2)
## 
## Regression tree:
## rpart(formula = BC_mgg2 ~ ., data = CART_Layer2_2)
## 
## Variables actually used in tree construction:
## [1] Avg_AADT_10kmBuf      hzdepth1              NDVI_Avg             
## [4] pred_Clay2            Unsaturated_K2cm_cmhr
## 
## Root node error: 13831/101 = 136.94
## 
## n= 101 
## 
##         CP nsplit rel error  xerror    xstd
## 1 0.222721      0   1.00000 1.03765 0.21817
## 2 0.118209      1   0.77728 0.93119 0.23661
## 3 0.019786      2   0.65907 0.93595 0.25058
## 4 0.010517      3   0.63928 0.94127 0.24711
## 5 0.010000      5   0.61825 0.96017 0.24678

printcp(spac.tree2)
## 
## Regression tree:
## rpart(formula = BC_mgg2 ~ ., data = CART_Layer2_2)
## 
## Variables actually used in tree construction:
## [1] Avg_AADT_10kmBuf      hzdepth1              NDVI_Avg             
## [4] pred_Clay2            Unsaturated_K2cm_cmhr
## 
## Root node error: 13831/101 = 136.94
## 
## n= 101 
## 
##         CP nsplit rel error  xerror    xstd
## 1 0.222721      0   1.00000 1.03765 0.21817
## 2 0.118209      1   0.77728 0.93119 0.23661
## 3 0.019786      2   0.65907 0.93595 0.25058
## 4 0.010517      3   0.63928 0.94127 0.24711
## 5 0.010000      5   0.61825 0.96017 0.24678
plot(spac.tree2,uniform=T,compress=F, branch=0.25,margin=0.1,main="Horizon 2")
text(spac.tree2, use.n=T,minlength=0.5)

cp2<-spac.tree2$cptable[which.min(spac.tree2$cptable[,"xerror"]),"CP"]

prune(spac.tree2, cp = 0.015)->spac.tree2_2

rsq.rpart(spac.tree2_2)
## 
## Regression tree:
## rpart(formula = BC_mgg2 ~ ., data = CART_Layer2_2)
## 
## Variables actually used in tree construction:
## [1] Avg_AADT_10kmBuf hzdepth1         NDVI_Avg        
## 
## Root node error: 13831/101 = 136.94
## 
## n= 101 
## 
##         CP nsplit rel error  xerror    xstd
## 1 0.222721      0   1.00000 1.03765 0.21817
## 2 0.118209      1   0.77728 0.93119 0.23661
## 3 0.019786      2   0.65907 0.93595 0.25058
## 4 0.015000      3   0.63928 0.94127 0.24711

printcp(spac.tree2_2)
## 
## Regression tree:
## rpart(formula = BC_mgg2 ~ ., data = CART_Layer2_2)
## 
## Variables actually used in tree construction:
## [1] Avg_AADT_10kmBuf hzdepth1         NDVI_Avg        
## 
## Root node error: 13831/101 = 136.94
## 
## n= 101 
## 
##         CP nsplit rel error  xerror    xstd
## 1 0.222721      0   1.00000 1.03765 0.21817
## 2 0.118209      1   0.77728 0.93119 0.23661
## 3 0.019786      2   0.65907 0.93595 0.25058
## 4 0.015000      3   0.63928 0.94127 0.24711
plot(spac.tree2_2,uniform=T,compress=F, branch=0.25,margin=0.1,main="Horizon 2")
text(spac.tree2_2, use.n=T,minlength=0.5)