This file contains code and inputs for projections of temperature related moratlity based on historical data for 209 cities. The original modeling was performed by Joel Schwartz and Alina Vodonos Zilberg using data obtained directly from counties, and historical temperatures calculated by Abt.
lpath <- "/home/ad.abt.local/layc/R/library" # assign library for user w/o admin priveleges on ACE3
.libPaths(lpath)
#library(chron)
library(tictoc) # for system time
library(readxl) # for reading in excel
library(readr) # for csv
library(survival) # from original vodonos code. Loaded for compatibility
library(splines) # for splines
library(MASS) # for mvmeta, likely
#library(nlme)
library(mgcv) # from original vodonos code. Loaded for compatibility
Loading required package: nlme
This is mgcv 1.8-25. For overview type 'help("mgcv-package")'.
library(dlnm) # from original vodonos code. Loaded for compatibility
This is dlnm 2.3.6. For details: help(dlnm) and vignette('dlnmOverview').
library(tsModel) # from original vodonos code. Loaded for compatibility
Time Series Modeling for Air Pollution and Health (0.6)
library(mvmeta) # from original vodonos code. Loaded for compatibility
This is mvmeta 0.4.11. For an overview type: help('mvmeta-package').
library(here) # convenience for filepaths dynamic
here() starts at /media/projects/Projects/Climecon_EPA/cira3-mortality
here <- here::here # to avoid overwrite
library(data.table) # fread for faster read (read.table takes >15 min)
data.table 1.11.8 Latest news: r-datatable.com
library(tidyverse)
[37m── [1mAttaching packages[22m ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──[39m
[37m[32m✔[37m [34mggplot2[37m 3.1.0 [32m✔[37m [34mpurrr [37m 0.2.5
[32m✔[37m [34mtibble [37m 2.1.3 [32m✔[37m [34mdplyr [37m 0.8.3
[32m✔[37m [34mtidyr [37m 0.8.2 [32m✔[37m [34mstringr[37m 1.3.1
[32m✔[37m [34mggplot2[37m 3.1.0 [32m✔[37m [34mforcats[37m 0.3.0[39m
[37m── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[37m [34mdplyr[37m::[32mbetween()[37m masks [34mdata.table[37m::between()
[31m✖[37m [34mdplyr[37m::[32mcollapse()[37m masks [34mnlme[37m::collapse()
[31m✖[37m [34mdplyr[37m::[32mfilter()[37m masks [34mstats[37m::filter()
[31m✖[37m [34mdplyr[37m::[32mfirst()[37m masks [34mdata.table[37m::first()
[31m✖[37m [34mdplyr[37m::[32mlag()[37m masks [34mstats[37m::lag()
[31m✖[37m [34mdplyr[37m::[32mlast()[37m masks [34mdata.table[37m::last()
[31m✖[37m [34mdplyr[37m::[32mselect()[37m masks [34mMASS[37m::select()
[31m✖[37m [34mpurrr[37m::[32mtranspose()[37m masks [34mdata.table[37m::transpose()[39m
select <- dplyr::select # to re-assign select to specify dplyr as is more commonly used than raster and MASS functions in script below.
#library(RcppRoll) # rolling averages from vodonos? Likey not needed
library(purrr) # for iterative processing.
library(gridExtra) # for grid.arrange
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine
Identify degrees change by GCM and year (for RCP8.5): data/GCM_Period_dT_LUT.xlsx Identify cluster and county identy for each city: data/city_county_LUT.xlsx
Climate projections in: //boufile01.corp.abtassoc.com/data1/Common/ENR/Project/Climecon4/COOLIT13_RiskComm/Mortality/Output/Trim/ and Data/Trim Day of week deaths averaged over the last period (2003-2013) for cities: output/vodonos_out/historical.deaths/mean.deaths.dow.csv Day of year deaths averaged over the last period (2003-2013) for cities: output/vodonos_out/historical.deaths/mean.deaths.doy.csv Contains graphical results output and a matrix to link FIPS assigned by Schwartz lab to city: output/vodonos_out/Results-5-28-19.xlsx
Use 5 years on each side of model period year to start (11 years in total for each gcm). Use RCP 85 only. Create 2 rows if the period spans 2050 and 2090 GCM periods, as these will need to have two separate Tmax projection files read in.
gcm_dt_lut <- read_xlsx(here("data/GCM_Period_dT_LUT.xlsx"), sheet = "GCM_Period_dT_LUT")
gcm_dt_lut <- gcm_dt_lut %>%
rename (dt_year = model.period) %>%
mutate(min_dt_yr = dt_year - 5,
max_dt_yr = dt_year + 5,
)
options("object.size" = 2000000000000) # for bigger data limit
# nca_county_LUT <- fread(here("data/cira3/nca_county_LUT.csv"))
# nca_county_LUT <- nca_county_LUT %>% mutate(`FIPS_text` = FIPS)
mort_city_county_LUT <- read_xlsx(here("data/city_county_LUT.xlsx"))
head(mort_city_county_LUT)
mort_city_county_LUT <- mort_city_county_LUT %>% mutate(`FIPS_text` = paste("'", `FIPS-text`, "'", sep = ""))
cluster_LUT <- mort_city_county_LUT%>%
mutate (CITYNAME = casefold(CITYNAME)) %>%
select (Cluster, City, CITYNAME, STATENAM , FIPS_txt = `FIPS-text`) #VALUE, COUNT, FIPS_txt = `FIPS-text`, fips_sm, FIPS,
Use the city-county-LUT to get the correct city/county values for the climate files Ron made in 2016. Those files are present on the J drive on ACE3, but require additional documentation for use. They need a description of the code used to produce them and the aggregation method. The climate model output aggregated for degrees celsius above baseline is available here on ACE3.
For first projections, use observed deaths from death certificate (sum of counties included in Schwartz city boundary). These are directly tied to total populations of the counties included. Use the sum of populations from counties included in Schwartz city delimitation as basis for RR to actual deaths conversion. Later, apply the base RR and attributable RR to the overall population for cost calculations.
dow <- read_csv(here::here("output/vodonos_out/historical.deaths/mean.deaths.dow.csv")) # Day of week mortality average for last period
Parsed with column specification:
cols(
dow = col_integer(),
dow.names = col_character(),
CITYNAME = col_character(),
tot.mean = col_double()
)
glimpse (dow)
Observations: 1,456
Variables: 4
$ dow [3m[38;5;246m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ dow.names [3m[38;5;246m<chr>[39m[23m "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday", "Sunday…
$ CITYNAME [3m[38;5;246m<chr>[39m[23m "akron", "albany", "albuquerque", "allentown", "anaheim", "annandale", "annarbor", "atlanta", "atlantic city", "atzec", "augusta", "austin", "bakersfield", "baltimore", "bangor", "barnstable", "bat…
$ tot.mean [3m[38;5;246m<dbl>[39m[23m 13.7, 6.7, 11.0, 12.4, 42.7, 10.6, 4.9, 42.9, 6.1, 1.9, 4.6, 11.0, 12.7, 35.6, 3.5, 6.9, 2.3, 9.2, 26.9, 23.7, 5.6, 53.5, 3.5, 5.4, 24.7, 2.3, 9.9, 5.4, 4.0, 6.8, 6.0, 12.3, 7.9, 128.3, 20.2, 48.7,…
doy <- read_csv(here::here("output/vodonos_out/historical.deaths/mean.deaths.doy.csv")) # Day of year mortality average for last period
Parsed with column specification:
cols(
doy = col_integer(),
CITYNAME = col_character(),
tot.mean = col_double()
)
glimpse (doy)
Observations: 76,128
Variables: 3
$ doy [3m[38;5;246m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ CITYNAME [3m[38;5;246m<chr>[39m[23m "akron", "albany", "albuquerque", "allentown", "anaheim", "annandale", "annarbor", "atlanta", "atlantic city", "atzec", "augusta", "austin", "bakersfield", "baltimore", "bangor", "barnstable", "bath…
$ tot.mean [3m[38;5;246m<dbl>[39m[23m 15.0, 6.7, 12.6, 13.8, 48.5, 12.6, 5.1, 52.2, 6.4, 2.1, 5.6, 12.3, 13.0, 45.9, 3.6, 6.2, 3.2, 10.0, 33.1, 24.5, 4.7, 63.6, 3.3, 7.1, 28.4, 2.8, 14.0, 6.4, 5.5, 5.5, 5.2, 14.3, 8.5, 147.2, 27.2, 51.5…
#County mean tmax (weighted by number of grid points included in each county) was used for temperature relationship. Do the same going forward
tmax_fips <- read_xlsx(here::here("output/vodonos_out/Results-5-28-19.xlsx"), "data.matrix")
tmax_fips <- tmax_fips %>%
select(CITYNAME, citycode) %>%
mutate (FIPS_txt = stringr::str_pad(as.character (citycode), 5, "left", "0"),
# Due to county selection within cities, some towns have no value in the original city-cluster-LUT table.
# This is only a problem for Youngstown, OH, which has no matching fips code. Due to an error in the data.matrix lut from Schwartz
FIPS_txt = gsub("39029", "39099", FIPS_txt)) # replace Schwartz data matrix for youngstown with a youngstown FIPS
tmax_fips %>% anti_join(cluster_LUT , by = "FIPS_txt")
doy<- doy %>% left_join(tmax_fips) %>%
mutate(month = lubridate::month (as.Date(doy , origin = "2015-12-31")) ,
day = lubridate::day (as.Date(doy , origin = "2015-12-31")) ) %>%
left_join(cluster_LUT[, c("Cluster", "FIPS_txt")], by = "FIPS_txt") %>%
select (cluster = Cluster, FIPS_txt, CITYNAME, doy, month, day, total_mort = tot.mean)
Joining, by = "CITYNAME"
doy %>% glimpse()
Observations: 76,128
Variables: 7
$ cluster [3m[38;5;246m<dbl>[39m[23m 2, 2, 9, 1, 5, 1, 2, 4, 1, 9, 4, 6, 8, 1, 2, 1, 2, 6, 1, 4, 9, 1, 2, 7, 2, 2, 2, 1, 2, 4, 3, 4, 4, 2, 3, 3, 2, 4, 3, 7, 4, 2, 9, 3, 7, 2, 2, 2, 2, 1, 4, 8, 1, 2, 8, 2, 1, 5, 3, 5, 2, 4, 2, 8, 2, 7…
$ FIPS_txt [3m[38;5;246m<chr>[39m[23m "39153", "36001", "35001", "42077", "06059", "51059", "26161", "13067", "34001", "35045", "13245", "48453", "06029", "24510", "23019", "25001", "36101", "22033", "34003", "01073", "16001", "25025"…
$ CITYNAME [3m[38;5;246m<chr>[39m[23m "akron", "albany", "albuquerque", "allentown", "anaheim", "annandale", "annarbor", "atlanta", "atlantic city", "atzec", "augusta", "austin", "bakersfield", "baltimore", "bangor", "barnstable", "ba…
$ doy [3m[38;5;246m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ month [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ day [3m[38;5;246m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ total_mort [3m[38;5;246m<dbl>[39m[23m 15.0, 6.7, 12.6, 13.8, 48.5, 12.6, 5.1, 52.2, 6.4, 2.1, 5.6, 12.3, 13.0, 45.9, 3.6, 6.2, 3.2, 10.0, 33.1, 24.5, 4.7, 63.6, 3.3, 7.1, 28.4, 2.8, 14.0, 6.4, 5.5, 5.5, 5.2, 14.3, 8.5, 147.2, 27.2, 51…
The information regarding much of this processing is stored on J drive under J:_Indicators(on ACE3). It may be more reproducible to pull current data directly from the USGS site, but initial tests suggested that would be quite slow, and using these previoulsy developed datasets will be faster.
check_tmax <- read_csv(here("/data/Trim/CanESM2/rcp85/tasmax/tasmax_day_CanESM2_rcp85_r1i1p1_20060101-20061231.LOCA_2016-04-02.16th.csv"))
Parsed with column specification:
cols(
date = col_date(format = ""),
city = col_character(),
tasmax = col_double()
)
Check for alignment between cities in doy and cities in this file.
chk_doy = doy %>%
dplyr::select(CITYNAME ) %>%
unique () %>%
mutate(CITYNAME = casefold (CITYNAME))
chk_city_tmax = check_tmax %>%
dplyr::select(city) %>%
unique () %>%
mutate (CITYNAME = casefold(city))
chk_doy %>% anti_join(chk_city_tmax)
Joining, by = "CITYNAME"
chk_city_tmax %>% anti_join(chk_doy)
Joining, by = "CITYNAME"
Providence is present in tmax trimmed file, but not in doy. Santa Barbara is just missing a space. Update santa barbara in doy during processing.
doy <- doy %>%
mutate (CITYNAME = gsub("santabarbara", "santa barbara", CITYNAME)) %>%
select (-month, - day) # month and day cause leap year problems
dow <- dow %>%
mutate (CITYNAME = gsub("santabarbara", "santa barbara", CITYNAME))
First, define the spline parameters that are the same across all places:
fun = "bs" ; degree = 2; cen_1 = 15.6
This finds the cluster results, and saves to a nested tibble for further use.
OPTIONS (in progress): Option 1 pulls in everything, but only keeps the coefficients, vcov, boundaries, and knots. It organizes these into two list columns (for daily and moving average arguments).
cluster_load <- function (c_cluster = 1,
out_folder = "output/vodonos_out/2019-06-13_mvcl_4_time/", nsim = 1000) {
#Load the contents of the saved Rdata for a cluster
require(here)
print (c_cluster)
cp_fname <- gsub("X", c_cluster,"/mv_clX/mv_clX/cp.clusterX.years4.Rdata")
cp_fname <- here::here(out_folder, cp_fname)
load(cp_fname)
mv_fname <- gsub("X", c_cluster,"/mv_clX/mv_clX/mv_clX_y4.Rdata")
mv_fname <- here::here(out_folder, mv_fname)
load(mv_fname)
if (exists("cp_15_years4")) {cp_05_years4 <- cp_15_years4}# corrects a naming issue in some vodonos output
daily_tmax0_arg = list ( c(
bound = list (c(range (cp_0_years4$predvar) )),
knots = list (c(knots_1)),
coeff = list (c(cp_0_years4$coefficients)),
varcov = list (c(cp_0_years4$vcov) ) ) )
ma15_tmax05_arg =list ( c(
bound = list (c(range (cp_05_years4$predvar) )),
knots = list (c(knots_2)),
coeff = list (c(cp_05_years4$coefficients)),
varcov = list (c(cp_05_years4$vcov) ) ))
cluster_0_fits <- tibble (cluster = c_cluster,
daily_tmax0_arg = daily_tmax0_arg )
cluster_05_fits <- tibble (cluster = c_cluster,
ma15_tmax05_arg = ma15_tmax05_arg )
cluster_x_fits <- cluster_0_fits %>% inner_join(cluster_05_fits)
# change to tibble for purrr operations later
return (cluster_x_fits)
}
cluster_fits <- seq(1,9) %>%
map_dfr(cluster_load,
out_folder = "output/vodonos_out/2019-06-13_mvcl_4_time/")
[1] 1
Joining, by = "cluster"
[1] 2
Joining, by = "cluster"
[1] 3
Joining, by = "cluster"
[1] 4
Joining, by = "cluster"
[1] 5
Joining, by = "cluster"
[1] 6
Joining, by = "cluster"
[1] 7
Joining, by = "cluster"
[1] 8
Joining, by = "cluster"
[1] 9
Joining, by = "cluster"
cluster_fits %>% glimpse()
Observations: 9
Variables: 3
$ cluster [3m[38;5;246m<int>[39m[23m 1, 2, 3, 4, 5, 6, 7, 8, 9
$ daily_tmax0_arg [3m[38;5;246m<list>[39m[23m [[<-13.2, 31.0>, <-1.302139, 11.913495, 23.875105>, <0.02469060, 0.04119767, 0.10576454, 0.15605829, 0.21447051>, <0.0002373320, 0.0001821380, 0.0001965471, 0.0001674620, 0.0002109098, 0.000…
$ ma15_tmax05_arg [3m[38;5;246m<list>[39m[23m [[<-9.5, 29.1>, <-3.289712, 6.960163, 16.811502, 24.736101>, <-0.02309032, -0.05991974, -0.11095517, -0.19205325, -0.22130423, -0.20194653>, <7.596052e-05, 4.825860e-05, 6.411353e-05, 6.2144…
The previously processed data can be joined on city name
cluster_fits_doy <- doy %>%
group_by(cluster, FIPS_txt, CITYNAME) %>%
nest(.key = "doy_av_mort") %>%
# ungroup() %>% group_by (cluster) %>%
# nest(.key = "city_doy_av_mort") %>%
left_join(cluster_fits)
Joining, by = "cluster"
cluster_fits_doy %>% glimpse()
Observations: 208
Variables: 6
$ cluster [3m[38;5;246m<dbl>[39m[23m 2, 2, 9, 1, 5, 1, 2, 4, 1, 9, 4, 6, 8, 1, 2, 1, 2, 6, 1, 4, 9, 1, 2, 7, 2, 2, 2, 1, 2, 4, 3, 4, 4, 2, 3, 3, 2, 4, 3, 7, 4, 2, 9, 3, 7, 2, 2, 2, 2, 1, 4, 8, 1, 2, 8, 2, 1, 5, 3, 5, 2, 4, 2, 8,…
$ FIPS_txt [3m[38;5;246m<chr>[39m[23m "39153", "36001", "35001", "42077", "06059", "51059", "26161", "13067", "34001", "35045", "13245", "48453", "06029", "24510", "23019", "25001", "36101", "22033", "34003", "01073", "16001", "2…
$ CITYNAME [3m[38;5;246m<chr>[39m[23m "akron", "albany", "albuquerque", "allentown", "anaheim", "annandale", "annarbor", "atlanta", "atlantic city", "atzec", "augusta", "austin", "bakersfield", "baltimore", "bangor", "barnstable"…
$ doy_av_mort [3m[38;5;246m<list>[39m[23m [<tbl_df[366 x 2]>, <tbl_df[366 x 2]>, <tbl_df[366 x 2]>, <tbl_df[366 x 2]>, <tbl_df[366 x 2]>, <tbl_df[366 x 2]>, <tbl_df[366 x 2]>, <tbl_df[366 x 2]>, <tbl_df[366 x 2]>, <tbl_df[366 x 2]>,…
$ daily_tmax0_arg [3m[38;5;246m<list>[39m[23m [[<-19.3, 29.6>, <1.656303, 17.759536>, <-0.02399588, 0.01688781, 0.06881664, 0.12780218>, <0.0001698283, 0.0001028722, 0.0001310583, 0.0001135017, 0.0001028722, 0.0001156094, 0.0001053619, …
$ ma15_tmax05_arg [3m[38;5;246m<list>[39m[23m [[<-15.8, 27.9>, <-4.973697, 9.766807, 22.087255>, <-0.002223833, -0.038615873, -0.096352729, -0.186909996, -0.150082280>, <0.0001845120, 0.0001403662, 0.0001586254, 0.0001548495, 0.00016076…
This function gets the full Tmax0 file for all gridded data, then subsets it to free up memory. It also assignes the reduced data to the global environment for further use. Updated for pre-processed by-city tmax data. Note that this may need to be updated, but it appears to be correct linkage to cities as shown above (with exception of ‘Providence RI’), and is from 2016 download of LOCA data. Not clear whether there have been updates since then. These modeled data were used for the last mortality manuscript.
# list.files(here("data/cira3/"))
# list.files(here('data'))
get_tmax <- function ( gcm = "CanESM2", rcp = "RCP85" , dt_yr = 2011, doy_frame = doy,
cira_folder =here("/data/Trim/"),... )
{### This function loads modeled temperature data, converts dates to month/day, and converts kelvin to degrees c
## Updated for new input from previously processed data (too large to move to C drive project folder, and not in appropriate place for final output)
# require (here)
require (dplyr) # for data cleaning
require (tidyr) # for data cleaning
require (data.table) # for fread and %like%
require (purrr) # for map_dfr
yrs = seq ((dt_yr-5),(dt_yr+5) )
## Select out just the correct files
gcm_folder <- paste(cira_folder, gcm, "/",
casefold(rcp), "/tasmax/", sep = "")
gcm_files <- list.files(gcm_folder, full.names = TRUE)
gcm_files <- map_chr(.x = yrs,
.f = function (.x) {gcm_files[gcm_files %like% paste(.x, "0101-", .x, "1231", sep = "")]})
# print(yrs)
fnames = tibble (dt_yr = rep(dt_yr, 11),
gcm = gcm,
rcp = rcp,
yr = yrs) %>%
mutate(fname = gcm_files )
tmax_dt_yr <- map_dfr(fnames$fname, fread )
tmax_dt_yr <- tmax_dt_yr %>%
mutate(CITYNAME = casefold(city),
gcm = gcm, # may save memory to remove repeated column names, but inconvenient
rcp = rcp,
dt_yr = dt_yr,
year = lubridate::year(as.Date(date)) ,
#month = lubridate::month(as.Date(date)),
#day = lubridate::day(as.Date(date)),
doy = lubridate::yday(as.Date(date)),
tmean0 = tasmax - 273.15) %>%
group_by(year, city, CITYNAME) %>% #gcm, rcp, dt_yr,
nest(.key = "doy_tmean0")
return (tmax_dt_yr)
}
For each climate model and year for which an average change in temperature occurs, pull in the data with tmax_daily. Calculate lagged maximum temperature (moving average 5-day) Join to cluster fits, and create a bounded variable that replaces maximum temperature exceeding bounds with the upper and lower bounds as needed. Use cluster-specific spline values and city-specific day of year death to calcuate daily and tmax mortality values.
lag_calc <- function (tmax_2lag = tmax_daily$doy_tmean0[[1]] ) {
lagged_tmax <- tmax_2lag %>%
mutate(
tmeanL1 = ifelse (is.na(lag(tmean0)), tmean0, lag(tmean0)),
tmeanL2 = ifelse (is.na(lag(tmean0, 2)), tmean0, lag(tmean0, 2)),
tmeanL3 = ifelse (is.na(lag(tmean0, 3)), tmean0, lag(tmean0, 3)),
tmeanL4 = ifelse (is.na(lag(tmean0, 4)), tmean0, lag(tmean0, 4)),
tmean15 = (tmean0 + tmeanL1 + tmeanL2 + tmeanL3 + tmeanL4) / 5
) %>%
select(-starts_with("tmeanL"))
return (lagged_tmax)
}
join_doy_proj <- function (tmax_daily = tmax_daily,
cluster_param = cluster_fits_doy) {
# join cluster paramaters to tmax and apply cluster-specific calculations
# some unnesting for convenience in merge
cl_doy <- cluster_param %>%
select(-ends_with("arg") ) %>%
unnest() # cluster_param
doy_tmax <- tmax_daily %>% #tmax_2join
select(CITYNAME, year, doy_tmax) %>%
unnest() %>%
right_join(cl_doy, by = c("CITYNAME", "doy")) %>%
group_by(CITYNAME, year, cluster, FIPS_txt) %>%
nest(.key = "day_tm")
cluster_param <- cluster_param %>%
select(-doy_av_mort)
tmax_daily <- tmax_daily %>% #SAVE MEMORY__DO column names LAST: tmax_daily <- tmax_daily %>% select (-gcm, -rcp, -dt_yr)
select(-doy_tmax) %>%
inner_join(doy_tmax,
by = c("year", "CITYNAME")) %>%
#To save memory, consider: ungroup() %>% group_by(cluster) %>% nest(.key = "city_doy_tmax) %>% inner_join(cluster_param, by = "cluster")
inner_join(cluster_param,
by = c("CITYNAME", "cluster", "FIPS_txt"))
return (tmax_daily)
}
bound_tmax <- function(tmax, bound) {
case_when (tmax < bound[1] ~ bound[1],# set tmax.bound to L bound
tmax > bound[2] ~ bound[2], # set tmax.bound to U bound
TRUE ~ tmax)
}
dlnm::crosspred produces just predictions for the range of the input basis. It does not produce predictions for each row. Therefore, must reproduce with onebasis and relative risk calculation. Make the basis first for the temperature series, then create a basis for the center point, then scale the basis vector by the centered basis.
rr_calc <- function ( tmax = c(10, 20, 30),
baseline_projected_mort = c(10,15),
coef_mat = cp_0_years4$coefficients,
center_val = cen_1,
degree_val = 2,
knot_vec = knots_1,
bound_vec = bound_1 ){
## This function calculates relative risk from an uncentered basis matrix and fitted coefficients
# Currently implemented for EACH tmax value.
require( dlnm) ; require (splines)
bs_ce <- bs(center_val, degree = degree_val, knots = knot_vec, Bound=bound_vec, intercept = FALSE)
bs_var <- suppressWarnings(bs(tmax, degree = degree_val, knots = knot_vec, Bound=bound_vec, intercept = FALSE ) ) # suppress repetetive warning regarding temperatures outside of original boundaries
bs_vc <- scale(bs_var, center = bs_ce, scale = FALSE)
RR_temp <- as.numeric ((1-exp(-bs_vc %*% coef_mat) ) )
#attr_death_n <- RR_temp * baseline_projected_mort
return ( RR_temp )
}
project_spline <- function (dframe = tmax_daily [1,],
cen = 15.6, deg = 2) {
day_tm <- dframe$day_tm[[1]]
param_t0 <- dframe$daily_tmax0_arg[[1]] # was cp_0_years4 in original scheme
param_ma15 <- dframe$ma15_tmax05_arg[[1]] # was cp_05_years4 in original scheme
#, fun = "bs" for rr_calc
day_tm <- day_tm %>%
mutate (year = dframe$year, city = dframe$city,
#CITYNAME = dframe$CITYNAME, cluster = dframe$CITYNAME, FIPS_txt = dframe$FIPS_txt,
# hacky solution, but purrr not mutating easily on nested tibble, and not clear how to resolve
tmean0_bound = bound_tmax(tmean0, param_t0$bound),
tmean15_bound = bound_tmax(tmean15, param_ma15$bound) ,
rr_tmean0 = rr_calc ( tmean0,
baseline_projected_mort = total_mort,
coef_mat = param_t0$coeff,
center_val = cen,
degree_val = deg,
knot_vec = param_t0$knots ,
bound_vec = param_t0$bound),
rr_tmean15 = rr_calc ( tmean15,
baseline_projected_mort = total_mort,
coef_mat = param_ma15$coeff,
center_val = cen,
degree_val = deg,
knot_vec = param_ma15$knots,
bound_vec = param_ma15$bound),
rr_tmean0_bnd = rr_calc ( tmean0_bound,
baseline_projected_mort = total_mort,
coef_mat = param_t0$coeff,
center_val = cen,
degree_val = deg,
knot_vec = param_t0$knots ,
bound_vec = param_t0$bound ) ,
rr_tmean15_bnd = rr_calc ( tmean15_bound,
baseline_projected_mort = total_mort,
coef_mat = param_ma15$coeff,
center_val = cen,
degree_val = deg,
knot_vec = param_ma15$knots,
bound_vec = param_ma15$bound) ,
an_tmean0 = rr_tmean0 * total_mort,
an_tmean15= rr_tmean15 * total_mort,
an_tmean0_bnd= rr_tmean0_bnd * total_mort,
an_tmean15_bnd= rr_tmean15_bnd * total_mort ) %>%
select (year, city, everything()) #, CITYNAME, cluster, FIPS_txt
return (day_tm)
}
Do two ways. For cities, aggregate AN over full 11 year period by day of year. Get a mean AN for each day for each city in the 11 year period. For clusters, first aggregate by year to get total increase in death each year.
Summarization functions for use in other functions. These functions currently do not weight values.
summarise_temp_c <- function(dframe , ...)
{ # alllows different grouping variables defined as variable length arguments ...
group_var <- enquos(...)
print(group_var)
dframe <- dframe %>%
group_by(!!! group_var) %>% #!!! group_var
summarise(mean_daily_c = mean (tmean0),
sd_daily_c = sd(tmean0),
max_daily_c = max(tmean0),
min_daily_c = min(tmean0),
mean_ma15_c = mean(tmean15),
sd_ma15_c = sd(tmean15),
max_ma15_c = max(tmean15),
min_ma15_c = min(tmean15))
return (dframe)
}
summarise_mean_rr <- function(dframe = proj_rr_daily,
rr_vars = c(rr_tmean0 = "rr_tmean0", rr_tmean15 = "rr_tmean15", rr_tmean0_bnd= "rr_tmean0_bnd", rr_tmean15_bnd = "rr_tmean15_bnd"),
...)
{ # alllows different grouping variables defined as variable length arguments ...
group_var <- enquos(...)
dframe <- dframe %>% rename (!!!rr_vars)
if(any(!grepl("total", rr_vars)) ) {dframe <- dframe %>% rename (!!!rr_vars) %>%
mutate(rr_total = rr_tmean0 + rr_tmean15,
rr_total_bnd = rr_tmean0_bnd + rr_tmean15_bnd)
}
dframe <- dframe %>%
group_by(!!! group_var) %>%
summarise(
mean_rr_total = mean(rr_total),
sd_rr_total = sd(rr_total),
mean_rr_total_bnd = mean(rr_total_bnd),
sd_rr_total_bnd = sd(rr_total_bnd)
)
return (dframe)
}
summarise_mean_an <- function(dframe ,
an_vars = c(an_tmean0 = "an_tmean0", an_tmean15 ="an_tmean15", an_tmean0_bnd = "an_tmean0_bnd", an_tmean15_bnd ="an_tmean15_bnd") ,
...)
{ # alllows different grouping variables defined as variable length arguments ...
group_var <- enquos(...)
dframe <- dframe %>% rename (!!!an_vars) %>%mutate()
if(any(!grepl("total", an_vars)) ) {dframe <- dframe %>% rename (!!!an_vars) %>%
mutate(an_total = an_tmean0 + an_tmean15,
an_total_bnd = an_tmean0_bnd + an_tmean15_bnd)
}
dframe <- dframe %>%
group_by(!!! group_var) %>%
summarise(baseline_proj_mort = mean (total_mort), # use mean to extract original projected baseline again
mean_an_total = mean(an_total),
sd_an_total = sd(an_total),
mean_an_total_bnd = mean(an_total_bnd),
sd_an_total_bnd = sd(an_total_bnd)) }
summarise_sum_an <- function(dframe ,
an_vars = c(total_mort = total_mort,
an_tmean0 = "an_tmean0", an_tmean15 ="an_tmean15",
an_tmean0_bnd = "an_tmean0_bnd", an_tmean15_bnd ="an_tmean15_bnd") ,
...)
{ # alllows different grouping variables defined as variable length arguments ...
group_var <- enquos(...)
dframe <- dframe %>% rename (!!!an_vars)
if(any(!grepl("an_total", an_vars)) ) {dframe <- dframe %>% rename (!!!an_vars) %>%
mutate(an_total = an_tmean0 + an_tmean15,
an_total_bnd = an_tmean0_bnd + an_tmean15_bnd)
}
dframe <- dframe %>%
group_by(!!! group_var) %>%
summarise(baseline_proj_mort = sum(total_mort),
sum_an_total = sum(an_total),
sum_an_total_bnd = sum(an_total_bnd) ) }
Tabulate by city and day of year over the whole period. This function calculates the mean number of attributable deaths over the 11 year period for a particular city and climate model.
Tabulate cluster day of year attributable deaths. This function calculates a sum AN of all cities in a cluster for each year of the 11 year period for a gcm. The mean and sd temperatures are currently unweighted.
Tabulate cluster period attributable deaths. This function calculates an average AN for the whole period, using the previously calculated total AN of all cities in a cluster for each year of the 11 year period for a gcm. The mean temperatures are not currently population or area weighted.
dT_func_simple <- function (gcm_sel ="CCSM4" ,
dT_c_sel = 1,
rcp_sel = "RCP85" ,
dt_yr_sel = 2011,
cira_folder = here("/data/Trim/"),
output_folder = here::here ("output", format(Sys.Date(), "%F"))
) {
print (gcm_sel) ; print (dT_c_sel) ; print(rcp_sel); print(dt_yr_sel)
# get data
tic(msg = "get_tmax")
tmax_daily <- get_tmax (cira_folder = cira_folder,
gcm = gcm_sel , rcp = rcp_sel , dt_yr = dt_yr_sel )
toc()
# Lag calc
tic(msg = "calculate lagged tmax")
tmax_daily <- tmax_daily %>%
mutate(dT_c = dT_c_sel ,
doy_tmax = map(tmax_daily$doy_tmean0, lag_calc)) %>%
select (-doy_tmean0)
toc()
#join to cluster fits, apply bounds, and other calculations
tic(msg = "join_doy_proj")
tmax_daily <- join_doy_proj (tmax_daily = tmax_daily , cluster_param = cluster_fits_doy)
toc()
tic(msg = "day_tm operation")
## add projected mortality and RR to tmax_daily (trouble with list position means this may be more complicated than needed)
tmax_daily$day_tm <- tmax_daily %>%
ungroup () %>%
group_split( city, year ) %>% #CITYNAME, cluster, FIPS_txt,
map_dfr ( .f = project_spline, cen = 15.6, deg = 2) %>%
group_by(city, year) %>% #CITYNAME, cluster, FIPS_txt,
nest(.key = "day_tm") %>%
pull(day_tm )
toc()
return(tmax_daily)
}
## Get GCM list for dt to run (minus baseline)
gcm_dt_lut_proj <- gcm_dt_lut %>%
mutate (gcm = gsub("_", "-", gcm) ) %>%
filter (gcm != "baseline") %>% # remove as is stored elsewhere
mutate (rcp = "RCP85")
output_folder_path <- here::here("output", format(Sys.Date(), "%F") )
if(!dir.exists(output_folder_path)) {dir.create(output_folder_path, recursive = TRUE)}
compiled_city_mean_out <- pmap_dfr (.l = list (gcm_dt_lut_proj$gcm[1:2] , gcm_dt_lut_proj$dT[1:2] , gcm_dt_lut_proj$rcp[1:2] , gcm_dt_lut_proj$dt_year[1:2] ) ,
.f = dT_func_simple,
cira_folder =here("/data/Trim/"),
output_folder = output_folder_path )
[1] "CanESM2"
[1] 1
[1] "RCP85"
[1] 2011
get_tmax: 6.143 sec elapsed
calculate lagged tmax: 4.632 sec elapsed
join_doy_proj: 0.75 sec elapsed
day_tm operation: 11.596 sec elapsed
[1] "CanESM2"
[1] 2
[1] "RCP85"
[1] 2033
get_tmax: 6.115 sec elapsed
calculate lagged tmax: 4.672 sec elapsed
join_doy_proj: 0.733 sec elapsed
day_tm operation: 11.769 sec elapsed
save(compiled_city_mean_out, file = file.path(output_folder_path, "compiled_city.Rdata") )
Average over included GCM
mean_c_dt_city_doy <- dTc_compiled_gcm_city %>%
summarise_temp_c( cluster, city, CITYNAME, FIPS_txt, dT_c, doy)
[[1]]
[1m<quosure>[22m
expr: [34m^cluster[39m
env: [34m0x55c1273d3918[39m
[[2]]
[1m<quosure>[22m
expr: [34m^city[39m
env: [34m0x55c1273d3918[39m
[[3]]
[1m<quosure>[22m
expr: [34m^CITYNAME[39m
env: [34m0x55c1273d3918[39m
[[4]]
[1m<quosure>[22m
expr: [34m^FIPS_txt[39m
env: [34m0x55c1273d3918[39m
[[5]]
[1m<quosure>[22m
expr: [34m^dT_c[39m
env: [34m0x55c1273d3918[39m
[[6]]
[1m<quosure>[22m
expr: [34m^doy[39m
env: [34m0x55c1273d3918[39m
mean_rr_dt_city_doy <- dTc_compiled_gcm_city %>% summarise_mean_rr(rr_vars = c(rr_tmean0 = "rr_tmean0", rr_tmean15 = "rr_tmean15", rr_tmean0_bnd= "rr_tmean0_bnd", rr_tmean15_bnd = "rr_tmean15_bnd"),
cluster, city, CITYNAME, FIPS_txt, dT_c, doy)
mean_an_dt_city_doy <- dTc_compiled_gcm_city %>% summarise_mean_an( an_vars = c(an_tmean0 = "an_tmean0", an_tmean15 ="an_tmean15", an_tmean0_bnd = "an_tmean0_bnd", an_tmean15_bnd ="an_tmean15_bnd") ,
cluster, city, CITYNAME, FIPS_txt, dT_c, doy)
city_tab_doy <- mean_c_dt_city_doy %>% inner_join(mean_rr_dt_city_doy) %>%
inner_join(mean_an_dt_city_doy)
Joining, by = c("cluster", "city", "CITYNAME", "FIPS_txt", "dT_c", "doy")
Joining, by = c("cluster", "city", "CITYNAME", "FIPS_txt", "dT_c", "doy")
city_tab_doy%>% head()
#Calculate the mean sum of attributable deaths for a year in the 11 year period for a particular city and climate model.
tic(msg = "Summarize AN, grouped by dT, cluster and city")
sum_an_dt_city <- summarise_sum_an(dTc_compiled_gcm_city,
an_vars = c(total_mort = "total_mort",
an_tmean0 = "an_tmean0", an_tmean15 ="an_tmean15",
an_tmean0_bnd = "an_tmean0_bnd", an_tmean15_bnd ="an_tmean15_bnd") ,
cluster, city, CITYNAME, FIPS_txt, dT_c, year, gcm) %>%
ungroup() %>%
group_by(cluster, city, CITYNAME, FIPS_txt, dT_c) %>%
summarise(baseline_proj_mort = mean(baseline_proj_mort) , sum_an_total= mean(sum_an_total), sum_an_total_bnd=mean(sum_an_total_bnd))
sum_an_dt_city_fname <- file.path(output_folder_path, paste(format(Sys.Date(), "%F_"),
"city_dT_AN_summary.csv", sep = "") )
write_csv(sum_an_dt_city, path = sum_an_dt_city_fname)
toc()
Summarize AN, grouped by dT, cluster and city: 0.747 sec elapsed
sum_an_dt_city
For notes on the baseline data, see the readme for the baseline mortality used in temperature mortbidity Previous mortality baselines for other (Climecon 4 Task 004) mortality work relied on calculations from BEN Map.
Information developed by Neal Fann o f U.S. EPA taking shape file of the citities and
using it to export baseline mortality informataion for the 209 US cities coverd in that analysis. Subset of these cities were addressed in the original Medina-Ramon and Schwartz paper.
Source file saved independently as
\boufile0114_8_CIRA2-mortality-mortality-rates-baseline_mortality_w_city.xlsx
original email with files sent to d. Mills on 7/2/14 from Neal Fann in Heat-Pops email directory: Population-health-baseline-data folder
devtools::session_info()
─ Session info ────────────────────────────────────────────────────────────────────────────────────
─ Packages ────────────────────────────────────────────────────────────────────────────────────────
package * version date lib source
assertthat 0.2.0 2017-04-11 [2] CRAN (R 3.5.1)
backports 1.1.2 2017-12-13 [2] CRAN (R 3.5.1)
base64enc 0.1-3 2015-07-28 [2] CRAN (R 3.5.1)
broom 0.5.0 2018-07-17 [2] CRAN (R 3.5.1)
callr 3.0.0 2018-08-24 [2] CRAN (R 3.5.1)
cellranger 1.1.0 2016-07-27 [2] CRAN (R 3.5.1)
cli 1.0.1 2018-09-25 [2] CRAN (R 3.5.1)
colorspace 1.3-2 2016-12-14 [2] CRAN (R 3.5.1)
crayon 1.3.4 2017-09-16 [2] CRAN (R 3.5.1)
data.table * 1.11.8 2018-09-30 [2] CRAN (R 3.5.1)
debugme 1.1.0 2017-10-22 [2] CRAN (R 3.5.1)
desc 1.2.0 2018-05-01 [2] CRAN (R 3.5.1)
devtools 2.0.1 2018-10-26 [2] CRAN (R 3.5.1)
digest 0.6.18 2018-10-10 [2] CRAN (R 3.5.1)
dlnm * 2.3.6 2018-09-12 [2] CRAN (R 3.5.1)
dplyr * 0.8.3 2019-07-04 [1] CRAN (R 3.5.1)
evaluate 0.12 2018-10-09 [2] CRAN (R 3.5.1)
fansi 0.4.0 2018-10-05 [2] CRAN (R 3.5.1)
forcats * 0.3.0 2018-02-19 [2] CRAN (R 3.5.1)
fs 1.2.6 2018-08-23 [2] CRAN (R 3.5.1)
ggplot2 * 3.1.0 2018-10-25 [2] CRAN (R 3.5.1)
glue 1.3.0 2018-07-17 [2] CRAN (R 3.5.1)
gridExtra * 2.3 2017-09-09 [2] CRAN (R 3.5.1)
gtable 0.2.0 2016-02-26 [2] CRAN (R 3.5.1)
haven 1.1.2 2018-06-27 [2] CRAN (R 3.5.1)
here * 0.1 2017-05-28 [1] CRAN (R 3.4.4)
hms 0.4.2 2018-03-10 [2] CRAN (R 3.5.1)
htmltools 0.3.6 2017-04-28 [2] CRAN (R 3.5.1)
httr 1.3.1 2017-08-20 [2] CRAN (R 3.5.1)
jsonlite 1.5 2017-06-01 [2] CRAN (R 3.5.1)
knitr 1.20 2018-02-20 [2] CRAN (R 3.5.1)
lattice 0.20-38 2018-11-04 [2] CRAN (R 3.5.1)
lazyeval 0.2.1 2017-10-29 [2] CRAN (R 3.5.1)
lubridate 1.7.4 2018-04-11 [2] CRAN (R 3.5.1)
magrittr 1.5 2014-11-22 [2] CRAN (R 3.5.1)
MASS * 7.3-51.1 2018-11-01 [2] CRAN (R 3.5.1)
Matrix 1.2-15 2018-11-01 [2] CRAN (R 3.5.1)
memoise 1.1.0 2017-04-21 [2] CRAN (R 3.5.1)
mgcv * 1.8-25 2018-10-26 [2] CRAN (R 3.5.1)
modelr 0.1.2 2018-05-11 [2] CRAN (R 3.5.1)
munsell 0.5.0 2018-06-12 [2] CRAN (R 3.5.1)
mvmeta * 0.4.11 2018-03-07 [2] CRAN (R 3.5.1)
nlme * 3.1-137 2018-04-07 [2] CRAN (R 3.5.1)
packrat 0.5.0 2018-11-14 [2] CRAN (R 3.5.1)
pillar 1.4.2 2019-06-29 [1] CRAN (R 3.5.1)
pkgbuild 1.0.2 2018-10-16 [2] CRAN (R 3.5.1)
pkgconfig 2.0.2 2018-08-16 [2] CRAN (R 3.5.1)
pkgload 1.0.2 2018-10-29 [2] CRAN (R 3.5.1)
plyr 1.8.4 2016-06-08 [2] CRAN (R 3.5.1)
prettyunits 1.0.2 2015-07-13 [2] CRAN (R 3.5.1)
processx 3.2.0 2018-08-16 [2] CRAN (R 3.5.1)
ps 1.2.1 2018-11-06 [2] CRAN (R 3.5.1)
purrr * 0.2.5 2018-05-29 [2] CRAN (R 3.5.1)
R6 2.3.0 2018-10-04 [2] CRAN (R 3.5.1)
Rcpp 1.0.2 2019-07-25 [1] CRAN (R 3.5.1)
readr * 1.1.1 2017-05-16 [2] CRAN (R 3.5.1)
readxl * 1.1.0 2018-04-20 [2] CRAN (R 3.5.1)
remotes 2.0.2 2018-10-30 [2] CRAN (R 3.5.1)
rlang 0.4.0 2019-06-25 [1] CRAN (R 3.5.1)
rmarkdown 1.10 2018-06-11 [2] CRAN (R 3.5.1)
rprojroot 1.3-2 2018-01-03 [2] CRAN (R 3.5.1)
rstudioapi 0.8 2018-10-02 [2] CRAN (R 3.5.1)
rvest 0.3.2 2016-06-17 [2] CRAN (R 3.5.1)
scales 1.0.0 2018-08-09 [2] CRAN (R 3.5.1)
sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 3.5.1)
stringi 1.2.4 2018-07-20 [2] CRAN (R 3.5.1)
stringr * 1.3.1 2018-05-10 [2] CRAN (R 3.5.1)
survival * 2.43-1 2018-10-29 [2] CRAN (R 3.5.1)
testthat 2.0.1 2018-10-13 [2] CRAN (R 3.5.1)
tibble * 2.1.3 2019-06-06 [1] CRAN (R 3.5.1)
tictoc * 1.0 2014-06-17 [1] CRAN (R 3.5.1)
tidyr * 0.8.2 2018-10-28 [2] CRAN (R 3.5.1)
tidyselect 0.2.5 2018-10-11 [2] CRAN (R 3.5.1)
tidyverse * 1.2.1 2017-11-14 [2] CRAN (R 3.5.1)
tsModel * 0.6 2013-06-24 [2] CRAN (R 3.5.1)
tufte 0.4 2018-07-15 [2] CRAN (R 3.5.1)
usethis 1.4.0 2018-08-14 [2] CRAN (R 3.5.1)
utf8 1.1.4 2018-05-24 [2] CRAN (R 3.5.1)
vctrs 0.2.0 2019-07-05 [1] CRAN (R 3.5.1)
withr 2.1.2 2018-03-15 [2] CRAN (R 3.5.1)
xml2 1.2.0 2018-01-24 [2] CRAN (R 3.5.1)
yaml 2.2.0 2018-07-25 [2] CRAN (R 3.5.1)
zeallot 0.1.0 2018-01-28 [1] CRAN (R 3.4.4)
[1] /home/ad.abt.local/layc/R/library
[2] /opt/R/3.5.1/lib64/R/library