author: Havala O.T. Pye (pye.havala@epa.gov)
date: created 2025-02-28
modified 2025-05-14
%matplotlib inline
#!python -m pip install --user adjustText
#!python -m pip install --user textalloc
#!python -m pip install --user plotly
#!python -m pip install --user nbformat
# Libraries and packages (many optional/can comment out)
import cmaqsatproc as csp
import pandas as pd
import xarray as xr
import cartopy
import cartopy.crs as ccrs
import cartopy.feature
import cartopy.io.shapereader as shpreader
import matplotlib.pyplot as plt
import numpy as np
import glob
import netCDF4
from netCDF4 import Dataset
import PseudoNetCDF as pnc
from datetime import datetime, timedelta, date
import warnings
import scipy
from scipy.stats import bootstrap
import math
from IPython.display import display
import ast
import adjustText
import textalloc as ta
import plotly
import plotly.express as px
import nbformat
# print versions
print('csp version: ' +csp.__version__)
print('pandas version: ' +pd.__version__)
print('xr version: ' +xr.__version__)
print('cartopy version:' +cartopy.__version__)
print('Numpy version: ' +np.__version__)
print('Netcdf4 version:' +netCDF4.__version__)
csp version: 0.2.4 pandas version: 1.5.1 xr version: 2023.11.0 cartopy version:0.22.0 Numpy version: 1.22.3 Netcdf4 version:1.6.5
today = str(date.today())
today = today.replace('-','')
pd.options.display.max_columns = None
pd.options.display.max_rows = None
# Major file locations
workdir = '/work/MOD3DEV/has/2023cracmm_ages/analysis/scripts/' # where this script sits
datadir = '/work/MOD3DEV/has/2023cracmm_ages/runs/20250213cmaq55plus/data/POST/' # where the write site files are
outdir = workdir + '../output/' # for saving output and plots
# function to read write site output and put in pandas dataframe indexed by date+time
def get_df_fromwritesite(filenamewpath):
# assign column names with units
colnames = pd.read_csv(filenamewpath,skiprows=5,header=None).iloc[0,:].values
colunits = pd.read_csv(filenamewpath,skiprows=6,header=None).iloc[0,:].values
for i in np.arange(len(colnames)):
colnames[i] = colnames[i]+'_'+str(colunits[i])
# read file
dfcmaqdata = pd.read_csv(filenamewpath,skiprows=7,header=None)
dfcmaqdata = dfcmaqdata.rename(columns=dict(zip(dfcmaqdata.columns,colnames)))
# get full date and time (GMT)
dfcmaqdata['fulldatetime_%Y-%m-%d %H:%M:%S']= ( pd.to_datetime(
pd.to_datetime(dfcmaqdata['date_YYYY-MM-DD']).dt.strftime('%Y-%m-%d')+
' '+
pd.to_datetime(dfcmaqdata['Time_hh:mm:ss']).dt.strftime('%H:%M:%S'),
format='%Y-%m-%d %H:%M:%S'))
dfcmaqdata['date_YYYY-MM-DD']=pd.to_datetime(dfcmaqdata['date_YYYY-MM-DD']).dt.strftime('%Y-%m-%d')
dfcmaqdata['Time_hh:mm:ss']=pd.to_datetime(dfcmaqdata['Time_hh:mm:ss']).dt.strftime('%H:%M:%S')
return dfcmaqdata
for month in np.arange(1,13): #1-12 inclusive
# get month as 2 character string
if month <10:
MM = '0'+str(month)
else:
MM = str(month)
# monthly data file
filename = datadir + 'decarlo_LA_v3_cmaq55plus_cracmm2_2023_12US4_2023'+MM+'.csv'
# read into data frame
if MM == '01':
dfcmaq = get_df_fromwritesite(filename)
else:
dfcmaqnew = get_df_fromwritesite(filename)
dfcmaq = pd.concat([dfcmaq,dfcmaqnew])
dfcmaq.columns.tolist #copy/paste to list of gases
<bound method IndexOpsMixin.tolist of Index(['siteid_nan', 'column_nan', 'row_nan', 'longitude_degrees',
'latitude_degrees', 'date_YYYY-MM-DD', 'Time_hh:mm:ss',
'ACETALDEHYDE_ppbV', 'ACROLEIN_ppbV', 'BUTADIENE13_ppbV',
'BENZENE_ppbV', 'FORMALDEHYDE_ppbV', 'TOLUENE_ppbV', 'ETHB_ppbV',
'STYRENE_ppbV', 'CO_ppbV', 'MOH_ppbV', 'MVK_ppbV', 'MACR_ppbV',
'ISOP_ppbV', 'fulldatetime_%Y-%m-%d %H:%M:%S'],
dtype='object')>
gases = ['ACETALDEHYDE_ppbV', 'ACROLEIN_ppbV', 'BUTADIENE13_ppbV',
'BENZENE_ppbV', 'FORMALDEHYDE_ppbV', 'TOLUENE_ppbV', 'ETHB_ppbV',
'STYRENE_ppbV', 'CO_ppbV', 'MOH_ppbV', 'MVK_ppbV', 'MACR_ppbV',
'ISOP_ppbV']
for var in gases:
fig, ax = plt.subplots(1,1,figsize=(7,1.5),gridspec_kw=dict(wspace=0.02),dpi=300) # size is width, height
# plot a line for each grid cell
for i in np.arange(0,15): #0-14 inclusive
if i <10:
site = 'LAMOBILE0'+str(i)
else:
site = 'LAMOBILE'+str(i)
dfplot=dfcmaq[dfcmaq['siteid_nan']==site].copy()
ax.plot(dfplot['fulldatetime_%Y-%m-%d %H:%M:%S'],dfplot[var],lw=0.5,marker=' ')
ax.set_xlabel('Date (GMT)',fontsize=7,labelpad=1)
ax.set_ylabel(var,fontsize=7,labelpad=1)
ax.set_title('Predicted concentration in select Louisiana locations',fontsize=7)
#ax.set_yscale('log')
ax.xaxis.set_tick_params(labelsize=7,pad=1,length=2)
ax.yaxis.set_tick_params(labelsize=7,pad=1,length=2)