# R code for tables and figures of the manuscript "Measuring urban tree loss # dynamics across US residential landscapes" by Alessandro Ossola, Matthew Hopton, # US EPA/ORD/NRMRL/ORD/STD, 26 W. Martin Luther King Dr., Cincinnati, OH, 45268, USA. # Corresponding Author, Email: hopton.matthew@epa.gov, T: +1 513 569 7718 # version 2/27/2017 # data from "DataDenverMilwaukee.xlsx" file "toR_subset_FINAL" tab # data<-read.table("clipboard", header=T) # attach(data) # dataset preparation .libPaths("C:/users/aossola/R/library") den<-subset(data, City=="Denver" & MEANheight>0, select=c(Age:NcutStemsResAREA)) mil<-subset(data, City=="Milwaukee" & MEANheight>0, select=c(Age:NcutStemsResAREA)) # checking variable multicollinearity using Variance Inflation Factors (VIF) and excluding variables which have # a correlation higher than ~0.67 (VIF threshold =3). Denver (den) and # Milwaukee (mil) data are run separately. library(usdm) den.MC<-subset(den, select=c(Age:PercentRes)) den.MC = data.frame(den.MC) vif(den.MC) vifstep(den.MC, th=3) mil.MC<-subset(mil, select=c(Age:PercentRes)) mil.MC = data.frame(mil.MC) vif(mil.MC) vifstep(mil.MC, th=3) # bringing along only the non collinear variables from VIF selections. DEN<-subset(data, City=="Denver" & MEANheight>0, select=c(MEANheight, NcutStemsResAREA, Age , FAMnonRatio, VacantUnits., BelowPoverty, Gini, Unemployed20_64, Rent, Decade, TcanAREAstart, ParcelSize, HousingDensity, PercentRes)) MIL<-subset(data, City=="Milwaukee" & MEANheight>0, select=c(MEANheight, NcutStemsResAREA,Age , RaceDiv, FAMnonRatio, VacantUnits., Gini, Unemployed20_64, Rent, Decade, TcanAREAstart, HousingDensity, PercentRes)) # check dimensions datasets dim(DEN) dim(MIL) ####### running DENVER dataset ################################################### library(lmtest) # bptest for testing heteroscedasticity library(gvlma) # global validation of linear model assumptions ####### linear models predicting n. cut stems per census tract (DEN$NcutStemsResAREA) #global model with outliers lm.den.N.all<-lm(sqrt(DEN$NcutStemsResAREA)~ log(DEN$Age+1) + log(DEN$FAMnonRatio+1) + log(DEN$VacantUnits.+1) + sqrt(DEN$BelowPoverty) + DEN$Gini + sqrt(DEN$Unemployed20_64) + log(DEN$Rent+1) + (DEN$Decade) + (DEN$TcanAREAstart) + log(DEN$ParcelSize+1) + log(DEN$HousingDensity+1) + (DEN$PercentRes)) summary(lm.den.N.all) plot(lm.den.N.all, which=2) shapiro.test(residuals(lm.den.N.all)) gvlma(lm.den.N.all) #removing outliers (obs 79 and 83) which have the highest N cut stems DEN[c(79, 83), 1:12] DEN1<-DEN[-c(79, 83),] #global model without ouliers lm.den.N.all1<-lm(sqrt(DEN1$NcutStemsResAREA)~ log(DEN1$Age+1) + log(DEN1$FAMnonRatio+1) + log(DEN1$VacantUnits.+1) + sqrt(DEN1$BelowPoverty) + DEN1$Gini + sqrt(DEN1$Unemployed20_64) + log(DEN1$Rent+1) + (DEN1$Decade) + (DEN1$TcanAREAstart) + log(DEN1$ParcelSize+1) + log(DEN1$HousingDensity+1) + (DEN1$PercentRes)) summary(lm.den.N.all1) plot(lm.den.N.all1, which=2) shapiro.test(residuals(lm.den.N.all1)) gvlma(lm.den.N.all1) #urban morphology model without ouliers lm.den.N.env1<-lm(sqrt(DEN1$NcutStemsResAREA)~ (DEN1$Decade) + (DEN1$TcanAREAstart) + log(DEN1$ParcelSize+1) + log(DEN1$HousingDensity+1) + (DEN1$PercentRes)) summary(lm.den.N.env1) plot(lm.den.N.env1, which=2) shapiro.test(residuals(lm.den.N.env1)) gvlma(lm.den.N.env1) #socio-eco model without ouliers lm.den.N.socio1<-lm(sqrt(DEN1$NcutStemsResAREA)~ log(DEN1$Age+1) + log(DEN1$FAMnonRatio+1) + log(DEN1$VacantUnits.+1) + sqrt(DEN1$BelowPoverty) + DEN1$Gini + sqrt(DEN1$Unemployed20_64) + log(DEN1$Rent+1)) summary(lm.den.N.socio1) plot(lm.den.N.socio1, which=1) shapiro.test(residuals(lm.den.N.socio1)) gvlma(lm.den.N.socio1) ### selecting best model based on AICc library(AICcmodavg) Cand.models<-list() Cand.models[[1]]<-lm.den.N.all1 Cand.models[[2]]<-lm.den.N.env1 Cand.models[[3]]<-lm.den.N.socio1 aictab(cand.set = Cand.models, sort = TRUE) # best model is lm.den.N.env1 ####### linear models predicting stem height in each census tract (MEANheight) DEN<-subset(data, City=="Denver" & MEANheight>0, select=c(MEANheight, NcutStemsResAREA, Age , FAMnonRatio, VacantUnits., BelowPoverty, Gini, Unemployed20_64, Rent, Decade, TcanAREAstart, ParcelSize, HousingDensity, PercentRes)) #global model with outliers lm.den.H.all<-lm(DEN$MEANheight ~ log(DEN$Age+1) + log(DEN$FAMnonRatio+1) + log(DEN$VacantUnits.+1) + sqrt(DEN$BelowPoverty) + DEN$Gini + sqrt(DEN$Unemployed20_64) + log(DEN$Rent+1) + (DEN$Decade) + (DEN$TcanAREAstart) + log(DEN$ParcelSize+1) + log(DEN$HousingDensity+1) + (DEN$PercentRes)) summary(lm.den.H.all) plot(lm.den.H.all, which=2) shapiro.test(residuals(lm.den.H.all)) gvlma(lm.den.H.all) #Removed outliers 105, 157, 170, which have the highest height DEN[c(105, 157, 170), 1:12] DEN1<-DEN[-c(105, 157, 170),] #global model without outliers lm.den.H.all1<-lm(DEN1$MEANheight ~ log(DEN1$Age+1) + log(DEN1$FAMnonRatio+1) + log(DEN1$VacantUnits.+1) + sqrt(DEN1$BelowPoverty) + DEN1$Gini + sqrt(DEN1$Unemployed20_64) + log(DEN1$Rent+1) + (DEN1$Decade) + (DEN1$TcanAREAstart) + log(DEN1$ParcelSize+1) + log(DEN1$HousingDensity+1) + (DEN1$PercentRes)) summary(lm.den.H.all1) plot(lm.den.H.all1, which=2) shapiro.test(residuals(lm.den.H.all1)) gvlma(lm.den.H.all1) #urban morphology model without outliers lm.den.H.env1<-lm(DEN1$MEANheight ~ (DEN1$Decade) + (DEN1$TcanAREAstart) + log(DEN1$ParcelSize+1) + log(DEN1$HousingDensity+1) + (DEN1$PercentRes)) summary(lm.den.H.env1) plot(lm.den.H.env1, which=2) shapiro.test(residuals(lm.den.H.env1)) gvlma(lm.den.H.env1) #socio-eco model without outliers lm.den.H.socio1<-lm(DEN1$MEANheight ~ log(DEN1$Age+1) + log(DEN1$FAMnonRatio+1) + log(DEN1$VacantUnits.+1) + sqrt(DEN1$BelowPoverty) + DEN1$Gini + sqrt(DEN1$Unemployed20_64) + log(DEN1$Rent+1)) summary(lm.den.H.socio1) plot(lm.den.H.socio1, which=2) shapiro.test(residuals(lm.den.H.socio1)) gvlma(lm.den.H.socio1) library(AICcmodavg) Cand.models<-list() Cand.models[[1]]<-lm.den.H.all1 Cand.models[[2]]<-lm.den.H.env1 Cand.models[[3]]<-lm.den.H.socio1 aictab(cand.set = Cand.models, sort = TRUE) # best model is lm.den.H.all1 ####### running MILWAUKEE dataset ################################################### library(lmtest) # bptest for testing heteroscedasticity library(gvlma) # global validation of linear model assumptions ####### linear models predicting n. cut stems per census tract (DEN$NcutStemsResAREA) #global model with outliers lm.mil.N<-lm(log(MIL$NcutStemsResAREA+1)~ log(MIL$Age+1) + log(MIL$RaceDiv+1) + log(MIL$FAMnonRatio+1) + log(MIL$VacantUnits.+1) + log(MIL$Gini+1) + log(MIL$Unemployed20_64+1) + log(MIL$Rent+1) + (MIL$Decade) + log(MIL$TcanAREAstart+1)+ log(MIL$HousingDensity+1) + (MIL$PercentRes)) summary(lm.mil.N) plot(lm.mil.N, which=2) shapiro.test(residuals(lm.mil.N)) gvlma(lm.mil.N) #bptest(lm.mil.N) # Removed outliers (obs 72,108,118) which have the highest and lowest N cut stems MIL[c(72,108,118), 1:12] MIL1<-MIL[-c(72,108,118),] #global model without outliers lm.mil.N.all1<-lm(log(MIL1$NcutStemsResAREA+1)~ log(MIL1$Age+1) + log(MIL1$RaceDiv+1) + log(MIL1$FAMnonRatio+1) + log(MIL1$VacantUnits.+1) + log(MIL1$Gini+1) + log(MIL1$Unemployed20_64+1) + log(MIL1$Rent+1) + (MIL1$Decade) + log(MIL1$TcanAREAstart+1)+ log(MIL1$HousingDensity+1) + (MIL1$PercentRes)) summary(lm.mil.N.all1) plot(lm.mil.N.all1, which=2) shapiro.test(residuals(lm.mil.N.all1)) gvlma(lm.mil.N.all1) #urban morphology model without outliers lm.mil.N.env1<-lm(log(MIL1$NcutStemsResAREA+1)~ (MIL1$Decade) + log(MIL1$TcanAREAstart+1)+ log(MIL1$HousingDensity+1) + (MIL1$PercentRes)) summary(lm.mil.N.env1) plot(lm.mil.N.env1, which=2) shapiro.test(residuals(lm.mil.N.env1)) gvlma(lm.mil.N.env1) #socio-eco model without outliers lm.mil.N.socio1<-lm(log(MIL1$NcutStemsResAREA+1)~ log(MIL1$Age+1) + log(MIL1$RaceDiv+1) + log(MIL1$FAMnonRatio+1) + log(MIL1$VacantUnits.+1) + log(MIL1$Gini+1) + log(MIL1$Unemployed20_64+1) + log(MIL1$Rent+1)) summary(lm.mil.N.socio1) plot(lm.mil.N.socio1, which=2) shapiro.test(residuals(lm.mil.N.socio1)) gvlma(lm.mil.N.socio1) Cand.models<-list() Cand.models[[1]]<-lm.mil.N.all1 Cand.models[[2]]<-lm.mil.N.env1 Cand.models[[3]]<-lm.mil.N.socio1 aictab(cand.set = Cand.models, sort = TRUE) # best model is lm.den.H.all1 ####### linear models predicting stem height in each census tract (MEANheight) MIL<-subset(data, City=="Milwaukee" & MEANheight>0, select=c(MEANheight, NcutStemsResAREA,Age , RaceDiv, FAMnonRatio, VacantUnits., Gini, Unemployed20_64, Rent, Decade, TcanAREAstart, HousingDensity, PercentRes)) lm.mil.H<-lm((MIL$MEANheight)~ log(MIL$Age+1) + log(MIL$RaceDiv+1) + log(MIL$FAMnonRatio+1) + log(MIL$VacantUnits.+1) + log(MIL$Gini+1) + log(MIL$Unemployed20_64+1) + log(MIL$Rent+1) + (MIL$Decade) + log(MIL$TcanAREAstart+1)+ log(MIL$HousingDensity+1) + (MIL$PercentRes)) summary(lm.mil.H) plot(lm.mil.H, which=2) shapiro.test(residuals(lm.mil.H)) gvlma(lm.mil.H) #bptest(lm.mil.H) # Removed outliers (obs 41,108,118) which have the highest and lowest N cut stems MIL[c(41,108,118), 1:12] MIL1<-MIL[-c(41,108,118),] #global model without outliers lm.mil.H.all1<-lm((MIL1$MEANheight)~ log(MIL1$Age+1) + log(MIL1$RaceDiv+1) + log(MIL1$FAMnonRatio+1) + log(MIL1$VacantUnits.+1) + log(MIL1$Gini+1) + log(MIL1$Unemployed20_64+1) + log(MIL1$Rent+1) + (MIL1$Decade) + log(MIL1$TcanAREAstart+1)+ log(MIL1$HousingDensity+1) + (MIL1$PercentRes)) summary(lm.mil.H.all1) plot(lm.mil.H.all1, which=2) shapiro.test(residuals(lm.mil.H.all1)) gvlma(lm.mil.H.all1) #environmental model without outliers lm.mil.H.env1<-lm((MIL1$MEANheight)~ (MIL1$Decade) + log(MIL1$TcanAREAstart+1)+ log(MIL1$HousingDensity+1) + (MIL1$PercentRes)) summary(lm.mil.H.env1) plot(lm.mil.H.env1, which=2) shapiro.test(residuals(lm.mil.H.env1)) gvlma(lm.mil.H.env1) #socio-eco model without outliers lm.mil.H.socio1<-lm((MIL1$MEANheight)~ log(MIL1$Age+1) + log(MIL1$RaceDiv+1) + log(MIL1$FAMnonRatio+1) + log(MIL1$VacantUnits.+1) + log(MIL1$Gini+1) + log(MIL1$Unemployed20_64+1) + log(MIL1$Rent+1)) summary(lm.mil.H.socio1) plot(lm.mil.H.socio1, which=2) shapiro.test(residuals(lm.mil.H.socio1)) gvlma(lm.mil.H.socio1) library(AICcmodavg) Cand.models<-list() Cand.models[[1]]<-lm.mil.H.all1 Cand.models[[2]]<-lm.mil.H.env1 Cand.models[[3]]<-lm.mil.H.socio1 aictab(cand.set = Cand.models, sort = TRUE) # best model is lm.mil.H.all1 ####### code for producing Fig.4 ################################################################### library(ggplot2) den<-subset(data, City=="Denver") mil<-subset(data, City=="Milwaukee") dev.new(width=20, height=17) p <- ggplot(data = data, aes(y = NcutStemsResAREA, x = TvolAREAstart, fill=City)) p + geom_point(aes(fill = factor(City)), shape=21, alpha=0.3, size = 4) + scale_fill_manual(values=c("red", "cyan")) + geom_smooth(aes(color=City), method=lm, se=F) + theme_bw() + ylab(bquote('Tree stems removed (' *km^2*')')) + xlab(bquote('Vegetation volume ('*m^3~m^-2*') initial year')) + ggtitle (" ") lmNvol.den<-lm((den$NcutStemsResAREA)~den$TvolAREAstart) summary(lmNvol.den) plot(lmNvol.den) lmNvol.mil<-lm((mil$NcutStemsResAREA)~(mil$TvolAREAstart)) summary(lmNvol.mil) plot(lmNvol.mil) # den<-subset(data, City=="Denver") mil<-subset(data, City=="Milwaukee") dev.new(width=20, height=17) p <- ggplot(data = data, aes(y = NcutStemsResAREA, x = TcanAREAstart, fill=City)) p + geom_point(aes(fill = factor(City)), shape=21, alpha=0.3, size = 4) + scale_fill_manual(values=c("red", "cyan")) + geom_smooth(aes(color=City), method=lm, se=FALSE) + theme_bw() + ylab(bquote('Tree stems removed (' *km^2*')')) + xlab(bquote('Canopy cover ('*m^2~m^-2*') initial year')) + ggtitle (" ") lmNcan.den<-lm((den$NcutStemsResAREA)~den$TcanAREAstart) summary(lmNcan.den) plot(lmNcan.den) lmNcan.mil<-lm((mil$NcutStemsResAREA)~(mil$TcanAREAstart)) summary(lmNcan.mil) plot(lmNcan.mil) ####### code for producing Fig.5 ################################################################### library(ggplot2) ####################NcutStems~age Figure 5A myData <- aggregate(data$NcutStemsResAREA, by = list(Decade = Decade, City), FUN = function(x) c(mean = mean(x), sd = sd(x), n = length(x))) myData <- do.call(data.frame, myData) myData$se <- myData$x.sd / sqrt(myData$x.n) colnames(myData) <- c("Decade", "City", "mean", "sd", "n", "se") myData$labels <- c("1940s or earlier", "1950s or earlier", "1960s or earlier", "1970s or earlier", "1980s or earlier", "1990s or earlier", "2000s or earlier", "2005s or later", "1940s or earlier", "1950s or earlier", "1960s or earlier", "1970s or earlier", "1980s or earlier", "1990s or earlier") dodge <- position_dodge(width = 0.9) limits <- aes(ymax = myData$mean + myData$se, ymin = myData$mean - myData$se) dev.new(width=30, height=17) p <- ggplot(data = myData, aes(x = labels, y = mean, fill=City)) p + geom_bar(stat = "identity", col="black", position=position_dodge(), size=.3) + geom_errorbar(limits, position = dodge, width = 0.25) + scale_fill_manual(values=alpha(c("red", "cyan"), 0.2)) + theme_bw() + xlab("Decade of maximum housing development") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylab(bquote('Tree stems removed (' *km^2*')')) + ylim(0,170) + ggtitle (" ") + #ggtitle("Number of tree stems removed") + geom_text(aes(label=myData$n), position=position_dodge(width=0.9), vjust=-6, size=2) ####################MEANheight~age Figure 5B myData <- aggregate(data$MEANheight, by = list(Decade = Decade, City), FUN = function(x) c(mean = mean(x), sd = sd(x), n = length(x))) myData <- do.call(data.frame, myData) myData$se <- myData$x.sd / sqrt(myData$x.n) colnames(myData) <- c("Decade", "City", "mean", "sd", "n", "se") myData$labels <- c("1940s or earlier", "1950s or earlier", "1960s or earlier", "1970s or earlier", "1980s or earlier", "1990s or earlier", "2000s or earlier", "2005s or later", "1940s or earlier", "1950s or earlier", "1960s or earlier", "1970s or earlier", "1980s or earlier", "1990s or earlier") dodge <- position_dodge(width = 0.9) limits <- aes(ymax = myData$mean + myData$se, ymin = myData$mean - myData$se) dev.new(width=30, height=17) p <- ggplot(data = myData, aes(x = labels, y = mean, fill=City)) p + geom_bar(stat = "identity", col="black", position=position_dodge(), size=.3) + geom_errorbar(limits, position = dodge, width = 0.25) + scale_fill_manual(values=alpha(c("red", "cyan"), 0.2)) + theme_bw() + xlab("Decade of maximum housing development") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + ylab("Tree stem height (m)") + ylim(0,15) + ggtitle("Average stem height of trees removed") #+ #geom_text(aes(label=myData$n), position=position_dodge(width=0.9), vjust=-6, size=2) ####### code for producing Figure 3 ##################################################################### # data from "DataDenverMilwaukee.xlsx" file "treecut_all" tab # data<-read.table("clipboard", header=T) # attach(data) library(ggplot2) #now make your lovely plot ggplot(data, aes(Height_m, fill = City)) + geom_density(alpha = 0.2) + xlim(0,25) + xlab("Height (m)") + ylab("Frequency") + theme_bw() ####### code for producing Appendix B ################################################################### # data from "DataDenverMilwaukee.xlsx" file "CalibrationDEMsDSMs" tab # data<-read.table("clipboard", header=T) # attach(data) library(scales) par(mfrow=c(2,2)) plot(DEM2008, DEM2013, xlab="DEM 2008", ylab="DEM 2013", main="Denver", col=alpha("red", 0.3), pch=16, cex=1.4) cor.test(DEM2008, DEM2013) text(1640, 1740, expression(paste(rho, "=0.999, p<0.001"))) plot(nDSM2008, nDSM2013, xlab="nDSM 2008", ylab="nDSM 2013", main="Denver", xlim=c(3,14), ylim=c(3,14), col=alpha("red", 0.3), pch=16, cex=1.4) cor.test(nDSM2008, nDSM2013) text(7, 13, expression(paste(rho, "=0.986, p<0.001"))) plot(DEM2010, DEM2015, xlab="DEM 2010", ylab="DEM 2015", main="Milwaukee", col=alpha("cyan", 0.3), pch=16, cex=1.4) cor.test(DEM2010, DEM2015) text(205,245, expression(paste(rho, "=0.999, p<0.001"))) plot(nDSM2010, nDSM2015, xlab="nDSM 2010", ylab="nDSM 2015", main="Milwaukee", xlim=c(3,14), ylim=c(3,14), col=alpha("cyan", 0.3), pch=16, cex=1.4) cor.test(nDSM2010, nDSM2015) text(7,13, expression(paste(rho, "=0.976, p<0.001")))