library(spsurvey) #---------- # Read in data NRSA <- read.csv('NRSA_df.csv'); nrow(NRSA) # Prepare data for cdf and change analysis sites <- data.frame( siteID=NRSA$SITE_ID, All_Sites=rep(TRUE, nr)) subpop <- data.frame( siteID=NRSA$SITE_ID, All_Regions=NRSA$SURVEY) design <- data.frame( siteID=NRSA$SITE_ID, wgt=NRSA$WGT_SP_CORE, xcoord=NRSA$XCOORD, ycoord=NRSA$YCOORD) data.cont <- data.frame( siteID=NRSA$SITE_ID, StreamTN=NRSA$TN_PPM, StreamNO3=NRSA$NO3_PPM, StreamTON=NRSA$TON_PPM, SDEP_RATE =NRSA$S_Deposition, TNinput=NRSA$TN_RATE, DEPO_RATE=NRSA$DEPO_RATE, AGSUR_RATE2=NRSA$AGSUR_RATE2, HM_RATE=NRSA$HM_RATE, DOC = NRSA$DOC # Additional data # PRECIP=NRSA$Precip, # TEMP=NRSA$Tmean, # ANC = NRSA$ANC ) #---------- # Perform CDF for all regions (national CDF) cdftest <- cont.analysis(sites, subpop, design, data.cont) d1<- cdftest$CDF # Add cdf results for 4 regions sites <- data.frame( siteID=NRSA$SITE_ID, Sites=NRSA$Region == "SE_Coastal") subpop <- data.frame( siteID=NRSA$SITE_ID, Coastal=NRSA$SURVEY) cdftest <- cont.analysis(sites, subpop, design, data.cont) d2 <- cdftest$CDF d1 <- rbind(d1,d2) sites <- data.frame( siteID=NRSA$SITE_ID, Sites=NRSA$Region == "Eastern") subpop <- data.frame( siteID=NRSA$SITE_ID, Appalachians=NRSA$SURVEY) cdftest <- cont.analysis(sites, subpop, design, data.cont) d2 <- cdftest$CDF d1 <- rbind(d1,d2) sites <- data.frame( siteID=NRSA$SITE_ID, Sites=NRSA$Region == "Western") subpop <- data.frame( siteID=NRSA$SITE_ID, Western=NRSA$SURVEY) cdftest <- cont.analysis(sites, subpop, design, data.cont) d2 <- cdftest$CDF d1 <- rbind(d1,d2) sites <- data.frame( siteID=NRSA$SITE_ID, Sites=NRSA$Region == "Plains") subpop <- data.frame( siteID=NRSA$SITE_ID, Plains=NRSA$SURVEY) cdftest <- cont.analysis(sites, subpop, design, data.cont) d2 <- cdftest$CDF d1 <- rbind(d1,d2) # Write output file for CDF write.csv(d1, 'cdf_NRSA_2022.csv') #---------- # Change analysis: compare changes in variable values between two surveys # s1s2. Example: changes between first and second survey # Modify survey years in the next two lines for desired comparisons tst1 <- NRSA$SURVEY == "First" tst2 <- NRSA$SURVEY == "Second" sites <- data.frame( siteID = NRSA$SITE_ID, Survey1 = tst1, Survey2 = tst2) rep1 <- NRSA$UNIQUE_ID[tst1] %in% NRSA$UNIQUE_ID[tst2] rep2 <- NRSA$UNIQUE_ID[tst2] %in% NRSA$UNIQUE_ID[tst1] ID1 <- NRSA$SITE_ID[tst1][rep1] ID2 <- NRSA$SITE_ID[tst2][rep2] indx <- match(NRSA$UNIQUE_ID[tst2][rep2], NRSA$UNIQUE_ID[tst1][rep1]) ID1 <- ID1[indx] repeats <- data.frame( siteID_1 = ID1, siteID_2 = ID2) subpop <- data.frame( siteID=NRSA$SITE_ID, All_Regions=rep("All", nr), Region=NRSA$Region) Change_Estimates <- change.analysis(sites, repeats, subpop, design, NULL, data.cont, test=c("mean", "median")) temp <- names(Change_Estimates$contsum_mean) temp <- c(temp[1:8], "z_Score", "p_Value", temp[9:18]) Change_Estimates$contsum_mean$z_Score <- with(Change_Estimates$contsum_mean, abs(DiffEst / StdError)) Change_Estimates$contsum_mean$p_Value <- 2 * (1 - pnorm(Change_Estimates$contsum_mean$z_Score)) Change_Estimates$contsum_mean <- subset(Change_Estimates$contsum_mean, select=temp) temp <- names(Change_Estimates$contsum_median) temp <- c(temp[1:8], "z_Score", "p_Value", temp[9:30]) Change_Estimates$contsum_median$z_Score <- with(Change_Estimates$contsum_median, abs(DiffEst.P / StdError.P)) Change_Estimates$contsum_median$p_Value <- 2 * (1 - pnorm(Change_Estimates$contsum_median$z_Score)) Change_Estimates$contsum_median <- subset(Change_Estimates$contsum_median, select=temp) # Output csv files # Using mean values write.csv(Change_Estimates$contsum_mean, file="Change_Estimates_Mean_NRSA_s1s2.csv", row.names=FALSE) # Using median values write.csv(Change_Estimates$contsum_median, file="Change_Estimates_Median_NRSA_s1s2.csv", row.names=FALSE)