author: Havala O.T. Pye (pye.havala@epa.gov)
date: created 2025-02-28
last update 2025-5-13
%matplotlib inline
#!python -m pip install --user adjustText
#!python -m pip install --user textalloc
#!python -m pip install --user plotly
#!python -m pip install --user nbformat
# Libraries and packages (many optional/can comment out)
import cmaqsatproc as csp
import pandas as pd
import xarray as xr
import cartopy
import cartopy.crs as ccrs
import cartopy.feature
import cartopy.io.shapereader as shpreader
import matplotlib.pyplot as plt
import numpy as np
import glob
import netCDF4
from netCDF4 import Dataset
import PseudoNetCDF as pnc
from datetime import datetime, timedelta, date
import warnings
import scipy
from scipy.stats import bootstrap
import math
from IPython.display import display
import ast
import adjustText
import textalloc as ta
import plotly
import plotly.express as px
import nbformat
# print versions
print('csp version: ' +csp.__version__)
print('pandas version: ' +pd.__version__)
print('xr version: ' +xr.__version__)
print('cartopy version:' +cartopy.__version__)
print('Numpy version: ' +np.__version__)
print('Netcdf4 version:' +netCDF4.__version__)
csp version: 0.2.4 pandas version: 1.5.1 xr version: 2023.11.0 cartopy version:0.22.0 Numpy version: 1.22.3 Netcdf4 version:1.6.5
today = str(date.today())
today = today.replace('-','')
pd.options.display.max_columns = None
pd.options.display.max_rows = None
# Major file locations
workdir = '/work/MOD3DEV/has/2023cracmm_ages/analysis/scripts/' # where this script sits
datadir_base = '/work/MOD3DEV/has/2023cracmm_ages/runs/20250213cmaq55plus/data/POST/' # where the write site files are
datadir_nofire = '/work/MOD3DEV/has/2023cracmm_ages/runs/20250213cmaq55plus_nofire/data/POST/' # where the write site files are
outdir = workdir + '../output/' # for saving output and plots
# function to read write site output and put in pandas dataframe indexed by date+time
def get_df_fromwritesite(filenamewpath):
# assign column names with units
colnames = pd.read_csv(filenamewpath,skiprows=5,header=None).iloc[0,:].values
colunits = pd.read_csv(filenamewpath,skiprows=6,header=None).iloc[0,:].values
for i in np.arange(len(colnames)):
colnames[i] = colnames[i]+'_'+str(colunits[i])
# read file
dfcmaqdata = pd.read_csv(filenamewpath,skiprows=7,header=None)
dfcmaqdata = dfcmaqdata.rename(columns=dict(zip(dfcmaqdata.columns,colnames)))
# get full date and time (GMT)
dfcmaqdata['fulldatetime_%Y-%m-%d %H:%M:%S']= ( pd.to_datetime(
pd.to_datetime(dfcmaqdata['date_YYYY-MM-DD']).dt.strftime('%Y-%m-%d')+
' '+
pd.to_datetime(dfcmaqdata['Time_hh:mm:ss']).dt.strftime('%H:%M:%S'),
format='%Y-%m-%d %H:%M:%S'))
dfcmaqdata['date_YYYY-MM-DD']=pd.to_datetime(dfcmaqdata['date_YYYY-MM-DD']).dt.strftime('%Y-%m-%d')
dfcmaqdata['Time_hh:mm:ss']=pd.to_datetime(dfcmaqdata['Time_hh:mm:ss']).dt.strftime('%H:%M:%S')
return dfcmaqdata
# Full year of base data
for month in np.arange(1,13): #1-12 inclusive
# get month as 2 character string
if month <10:
MM = '0'+str(month)
else:
MM = str(month)
# monthly data file
filename = datadir_base + 'ng_GA_cmaq55plus_cracmm2_2023_12US4_2023'+MM+'.csv'
# read into data frame
if MM == '01':
dfcmaq_base = get_df_fromwritesite(filename)
else:
dfcmaqnew = get_df_fromwritesite(filename)
dfcmaq_base = pd.concat([dfcmaq_base,dfcmaqnew])
# Full year of simulation without fires
for month in np.arange(1,13): #1-12 inclusive
# get month as 2 character string
if month <10:
MM = '0'+str(month)
else:
MM = str(month)
# monthly data file
filename = datadir_nofire + 'ng_GA_cmaq55plus_nofire_cracmm2_2023_12US4_2023'+MM+'.csv'
# read into data frame
if MM == '01':
dfcmaq_nofire = get_df_fromwritesite(filename)
else:
dfcmaqnew = get_df_fromwritesite(filename)
dfcmaq_nofire = pd.concat([dfcmaq_nofire,dfcmaqnew])
var = 'AOMIJ_ug m-3'
fig, ax = plt.subplots(1,1,figsize=(7,1.5),gridspec_kw=dict(wspace=0.02),dpi=300) # size is width, height
sites = dfcmaq_base['siteid_nan'].unique()
for site in sites:
dfplot = dfcmaq_base[dfcmaq_base['siteid_nan']==site]
ax.plot(dfplot['fulldatetime_%Y-%m-%d %H:%M:%S'],dfplot[var],lw=0.2,marker='',label=site)
print('Mean concentration for '+site+': '+str(round(np.mean(dfplot[var]),2)))
ax.set_xlabel('Date (GMT)',fontsize=7,labelpad=1)
ax.set_ylabel(var,fontsize=7,labelpad=1)
ax.set_title('Predicted organic aerosol concentration',fontsize=7)
ax.xaxis.set_tick_params(labelsize=7,pad=1,length=2)
ax.yaxis.set_tick_params(labelsize=7,pad=1,length=2)
#ax.legend(fontsize=7)
Mean concentration for SODEKALB: 5.83
var = 'AOMIJ_ug m-3'
fig, ax = plt.subplots(1,1,figsize=(7,1.5),gridspec_kw=dict(wspace=0.02),dpi=300) # size is width, height
sites = dfcmaq_nofire['siteid_nan'].unique()
for site in sites:
dfplot = dfcmaq_nofire[dfcmaq_base['siteid_nan']==site]
ax.plot(dfplot['fulldatetime_%Y-%m-%d %H:%M:%S'],dfplot[var],lw=0.2,marker='',label=site)
print('Mean concentration for '+site+': '+str(round(np.mean(dfplot[var]),2)))
ax.set_xlabel('Date (GMT)',fontsize=7,labelpad=1)
ax.set_ylabel(var,fontsize=7,labelpad=1)
ax.set_title('Predicted organic aerosol concentration',fontsize=7)
ax.xaxis.set_tick_params(labelsize=7,pad=1,length=2)
ax.yaxis.set_tick_params(labelsize=7,pad=1,length=2)
#ax.legend(fontsize=7)
Mean concentration for SODEKALB: 4.57