%run ~/STILT_modules_v2.5_upd.ipynb
%run ~/ICOS_atmObs_modules_v2.5_upd.ipynb
%run ~/for_thesis.ipynb
#%run ~/ida_master_def_upd.ipynb
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.pyplot import figure
from datetime import datetime
import netCDF4 as cdf
import numpy as np
import numpy
import logging
logging.getLogger().setLevel(logging.CRITICAL)
import datetime as dt
import os, fnmatch
import fnmatch
import pandas as pd
import matplotlib.pyplot as p
from mpl_toolkits.basemap import Basemap
import warnings
warnings.filterwarnings('ignore')
import statsmodels.api as sm
import statsmodels.formula.api as smf
from netCDF4 import Dataset
from matplotlib.pyplot import figure
stations = create_STILT_dictionary()
Modelled compared to measured CO2 concentrations with data from Globalver:
GAT344, HEI, LIN099, LUT, PRS, TRN180
With data from the carbon portal:
NOR100, HTM150, SVB150, PUY, OPE120, KRE250, JFJ, HPB131, SMR125
Need to run the cells in order:
import math
year_start=input("Choose start date year (vary for station, currently only 2017): ")
month_start=input("Choose start date month (write 1-12): ")
day_start=input("Choose start date day (write number): ")
start_date=dt.datetime(int(year_start),int(month_start),int(day_start),0)
year_end=input("Choose end date year (vary for station, currenltly only 2017): ")
month_end=input("Choose end date month (write 1-12): ")
day_end=input("Choose end date day (write number): ")
end_date=dt.datetime(int(year_end), int(month_end), int(day_end),0)-dt.timedelta(hours=3)
date_range = pd.date_range(start_date, end_date, freq='3H')
url="https://stilt.icos-cp.eu/viewer/stationinfo"
More information on the Globalview product: here
The date/times for each measured value are saved in separate lists. These are in turn saved in lists of lists that are accessed one at a time using index to access the correct lists corresponding to each station.
Only the stations HEI, TRN, PRS, LUT, LIN and GAT have measured concentrations for 2017 (not including the data for stations that can be accessed directly from the carbon portal, see next section)
#latest version of globalview:
GVp3='obspack_co2_1_GLOBALVIEWplus_v4.2.2_2019-06-05'
#sepcific for 2017 (names of datasets, and avaiability)
start_date=dt.datetime(2017,1,1,0)
end_date=dt.datetime(2018,1,1,0)
def read_nc(filename):
fid_nc=cdf.Dataset(filename)
value=fid_nc.variables['value'][:]
time=fid_nc.variables['time'][:]
date=cdf.num2date(time,units=fid_nc.variables["time"].units)
fid_nc.close()
df = pd.DataFrame({'value':value},index=date)
return df
!ls /opt/stiltdata/ObsPack/obspack_co2_1_GLOBALVIEWplus_v4.2.2_2019-06-05/data/nc
#the stations I found to have measured concentrations:
allstations=['HEI', 'TRN', 'PRS', 'LUT','LIN','GAT']
path_ObsPack = '/opt/stiltdata/ObsPack/'
path_GVp3 = path_ObsPack
allfiles_GVp3 = fnmatch.filter(os.listdir(path_GVp3+GVp3+'/data/nc/'), '*insitu*')
#reterived data into list. Appending to these lists continue in the cell with measured data reterived from ICOS
measured_2017_list_of_lists=[]
dates_2017_list_of_lists=[]
for file in allfiles_GVp3:
for station in allstations:
if station.lower() in file:
#if it is lin or gat, there are one out of many files I want to look at
if station=='LIN':
if file=='co2_lin_surface-insitu_425_allvalid.nc':
fig = p.figure(figsize=(15,15))
ax = fig.add_subplot(3,1,1)
version=GVp3
path=path_GVp3+version+'/data/nc/'
filename1=path+file
print(filename1)
df_GVp3 = read_nc(filename1)
df_GVp3.name = file
ax.plot(df_GVp3.index,df_GVp3["value"]*1000000,'.r',label='GVp3')
ax.set_xlim(start_date,end_date)
p.title(file)
ax.set_ylabel('CO$_2$ [ppm]')
p.legend()
ax.grid(axis='x')
p.show()
measured_2017_list=[]
dates_2017_list=[]
for index in range(0,len(df_GVp3)):
if df_GVp3.index.year[index]==2017:
measured_2017_list.append(df_GVp3.value[index]*1000000)
#need to save dates for each sperate station. Know which dates/times have measured concentration.
#date used to match the correct stilt data.
dates_2017_list.append(df_GVp3.index[index])
measured_2017_list_of_lists.append(measured_2017_list)
dates_2017_list_of_lists.append(dates_2017_list)
#for GAT: same as for LIN
elif station=='GAT':
if file=='co2_gat_surface-insitu_442_allvalid-341magl.nc':
fig = p.figure(figsize=(15,15))
ax = fig.add_subplot(3,1,1)
version=GVp3
path=path_GVp3+version+'/data/nc/'
filename1=path+file
print(filename1)
df_GVp3 = read_nc(filename1)
df_GVp3.name = file
ax.plot(df_GVp3.index,df_GVp3["value"]*1000000,'.r',label='GVp3')
ax.set_xlim(start_date,end_date)
p.title(file)
ax.set_ylabel('CO$_2$ [ppm]')
p.legend()
ax.grid(axis='x')
p.show()
measured_2017_list=[]
dates_2017_list=[]
for index in range(0,len(df_GVp3)):
#print('happening within')
if df_GVp3.index.year[index]==2017:
measured_2017_list.append(df_GVp3.value[index]*1000000)
dates_2017_list.append(df_GVp3.index[index])
measured_2017_list_of_lists.append(measured_2017_list)
dates_2017_list_of_lists.append(dates_2017_list)
#unless stations GAT or LIN
else:
fig = p.figure(figsize=(15,15))
ax = fig.add_subplot(3,1,1)
version=GVp3
path=path_GVp3+version+'/data/nc/'
filename1=path+file
print(filename1)
df_GVp3 = read_nc(filename1)
df_GVp3.name = file
ax.plot(df_GVp3.index,df_GVp3["value"]*1000000,'.r',label='GVp3')
ax.set_xlim(start_date,end_date)
p.title(file)
ax.set_ylabel('CO$_2$ [ppm]')
p.legend()
ax.grid(axis='x')
p.show()
measured_2017_list=[]
dates_2017_list=[]
for index in range(0,len(df_GVp3)):
if df_GVp3.index.year[index]==2017:
measured_2017_list.append(df_GVp3.value[index]*1000000)
dates_2017_list.append(df_GVp3.index[index])
measured_2017_list_of_lists.append(measured_2017_list)
dates_2017_list_of_lists.append(dates_2017_list)
In this cell, modeled concentrations are plotted together with measured concentrations. Below the time series is a time series with only the model data differences, the result from subtracting the measured concentration values from the modelled, are shown. The model data differences are saved in lists to be used in sections further down in the Notebook.
#the order in which the data was saved (see order in which the graphs were produced.)
all_stations=['GAT344', 'HEI', 'LIN099', 'LUT', 'PRS', 'TRN180']
station_iterator=iter(all_stations)
#list of lists of model data differences
list_of_list_diff_measurements_stilt_not_abs=[]
#the total number of NaN-values for each station. A NaN-value can be because of missing measured (most common),
#or missing modelled concentrations
list_count_nan=[]
list_stations=[]
#move to the next station data (measured data) using index on the lists of lists in which it is saved
index=0
for station in station_iterator:
#access the STILT modeled concentrations
df = read_stilt_timeseries(station, date_range)
#want the list of measurements to match those of STILT
list_measurements = np.empty(len(date_range))
list_measurements[:] = np.nan
#all the modeled concentrations into a list:
list_stilt_values = np.empty(len(date_range))
#unless a modelled value, need to be a NaN-value to ensure the list of modelled and measured concentrations are the same lenght.
list_stilt_values[:] = np.nan
#count for STILT-values. Added to for each value in df['co2.fuel']
count = 0
#do not want to start over looking for a match between modeled and measured concentrations.
#this will be zero only the first time attempting to match the first stilt modeled concentration
#then added to
start_count = 0
#for every modeled co2-value (could be any of the columns here the fuel column)
for value in df['co2.fuel']:
#access the year, month, day and hour for the current modeled concentration
stilt_year= df['co2.fuel'].index[count].year
stilt_month= df['co2.fuel'].index[count].month
stilt_day= df['co2.fuel'].index[count].day
stilt_hour= df['co2.fuel'].index[count].hour
#add the modeled concentration to replace the "nan" that is the default.
list_stilt_values[count] = df['co2.stilt'][count]
#counting the number of measurements looped over. used as index to access next measurement in df_stationdata
#have to access the correct measured concentration corresponding to the current modeled concentration.
count_measurements = 0
#does not happen the first time (start_count=0)
if start_count > 0:
count_measurements = start_count
#do not want to start from the beginning every time when looking for the correct measured concentration.
for value in range(start_count, len(dates_2017_list_of_lists[index]), 1):
#print('happening')
#find a match between modeled and measured concentration.
#only every three hours (that's what it is for modelled concentrations)
if (dates_2017_list_of_lists[index][count_measurements].year == stilt_year and dates_2017_list_of_lists[index][count_measurements].month== stilt_month \
and dates_2017_list_of_lists[index][count_measurements].day == stilt_day and dates_2017_list_of_lists[index][count_measurements].hour == stilt_hour):
#count --> added to for every new value of modelled concentartion
#index --> to access correct station
#count_measurements to access the correct measured data value (matching the measured with the modeled
#in the if-statement). Count_measurement is added to even if there is no match.
list_measurements[count]= measured_2017_list_of_lists[index][count_measurements]
#used to access the correct value... once for each measured concentration (df_stationdata)
count_measurements = count_measurements +1
#set start_count to the index "we are on" so can start there next time looking for a "match" between modelled and measured concnetrations.
start_count=count_measurements
#moving on to looking for the next matchig measurement once found it
#--> there will only be one "matching" measured concentration for each modeled
#concentration = mo need to keep going in loop.
break
else:
#just moving on to next one if there is no match with modeled concentraion.
count_measurements = count_measurements +1
#this will run the first time there is a match between modeled and measured concentrations (hour/date the same)
#same thing happening just start from the beginning when looking for match.
else:
for value in range(0, len(dates_2017_list_of_lists[index]), 1):
if (dates_2017_list_of_lists[index][count_measurements].year == stilt_year and dates_2017_list_of_lists[index][count_measurements].month== stilt_month \
and dates_2017_list_of_lists[index][count_measurements].day == stilt_day and dates_2017_list_of_lists[index][count_measurements].hour == stilt_hour):
list_measurements[count]= measured_2017_list_of_lists[index][count_measurements]
count_measurements = count_measurements +1
start_count=count_measurements
break
#this is counting measurements, adding to even if no match with modelled concentrations.
else:
count_measurements = count_measurements +1
#this is counting stilt-values (outer loop)
count=count+1
#disregard nan-values when EITHER measured concentrations or modeled concentrations are missing for
#a specific date/time. Count how many times this happens.
#index2 is used to check the corresponding modelled concentration to the "current" measured concentration -->
#possible because the lenght of the two lists are the same.
index2=0
count_nan = 0
for value in list_measurements:
if (math.isnan(value)== True or math.isnan(list_stilt_values[index2]==True)):
count_nan=count_nan+1
index2=index2+1
#once "checked" on station, append NaN-value.
list_count_nan.append(count_nan)
#np.array to be able to subtract values from one list from the values in another list
#positive value is modelled concentation is higher than measured (overestimate)
diff_measurements_stilt_not_abs = np.array(list_stilt_values) - np.array(list_measurements)
#average of the diff, not including the footprints that have no measurements (or modeled values in terms of stilt)
diff_measurements_stilt_best_average = np.nanmean(diff_measurements_stilt_not_abs)
#standard deviation, not including nan-values
#previously used the abs value - not anymore. want to caputre exactly how much it varies between over and underestimates
diff_measurements_stilt_best_stdev= np.nanstd(diff_measurements_stilt_not_abs)
#appends to the list:
list_of_list_diff_measurements_stilt_not_abs.append(diff_measurements_stilt_not_abs)
#plot:
matplotlib.rcParams.update({'font.size': 17})
fig = p.figure(figsize=(20,20))
ax = fig.add_subplot(6,1,1)
#plot modelled concentrations
#df.index = date range
p.plot(df.index,list_stilt_values,linestyle=':',color='b',label='Modeled concentrations')
#can use df-index for measurements too --> have same date_range.
p.plot(df.index,list_measurements,linestyle='-',color='k',label='Measured concentrations')
date_index_number = (len(date_range) - 1)
p.title(station + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)\
+ '\n'+ 'Number of requested footprints: ' + str(count) + '\n' + 'Number of NaN values: ' + str(count_nan)+ '\n' +\
'Modeled concentration, average deviation from measured concentration: ' + str("%.2f" % diff_measurements_stilt_best_average) + ' ppm' + '\n' + \
'Standard deviation of deviation: ' + str("%.2f" % diff_measurements_stilt_best_stdev) + ' ppm')
ax.set_xlim(start_date,end_date)
ax.set_ylabel('CO$_2$ [ppm]')
ax.grid(axis='x')
ax.legend(loc='upper right')
ax = fig.add_subplot(6,1,2)
#new plot --> below. Showing the difference from the measured concentrations.
p.plot(df.index,diff_measurements_stilt_not_abs,linestyle=':',color='b',label='Deviation from measured values')
zeros_for_measured_concentration = np.zeros(len(date_range))
p.plot(df.index,zeros_for_measured_concentration,linestyle='-',color='k')
ax.set_xlim(start_date,end_date)
ax.set_ylabel('CO$_2$ [ppm]')
ax.grid(axis='x')
ax.legend(loc='upper right')
#both plots in one --> subplots.
p.show()
index=index+1
OBS: need to runt the two previous cells before running this one!
After this cell, all available staions have lists with deviations between modelled and measured concentartions that can be used in the Notebook cells that look at the deviations
all_stations_orig=['NOR100', 'HTM150', 'SVB150', 'PUY', 'OPE120', 'KRE250', 'JFJ', 'HPB131', 'SMR125']
#UNCOMMENT IF ONLY WANT DATA FROM ICOS (not running the Globalview cells)
#list_of_list_diff_measurements_stilt_not_abs=[]
#list_count_nan=[]
#list_stations=[]
#RE-run this without appening to the list (changed from "not abs" in terms of standard deviation)
icos_stations = [ist[0:3] for ist in all_stations_orig]
#get_ICOS_filename --> see above. here change to download into folder, default is not
ICOS_files = get_ICOS_filename(icos_stations,download=True)
station_iterator=iter(all_stations_orig)
for station in station_iterator:
#access the STILT modeled concentrations
df = read_stilt_timeseries(station, date_range)
if len(station) > 3:
searchName=station[0:3]+'_'+str(int(station[3:]))
else:
searchName=station
#check if measured concentraiton for station?
filenames = [i for i in ICOS_files.fileName if searchName in i]
if not filenames:
print ('No ICOS data available for station ',searchName)
else:
for filename in filenames:
#need df_stationdata --> measured concentrations
df_stationdata, df_stationmetadata = read_ICOS_atm_downloaded(filename)
if df.empty:
print ('No STILT data for the defined date_range for station: '), station
else:
#SAME PROCESS AS FOR GLOBALVIEW DATA
list_stations.append(station)
list_measurements = np.empty(len(date_range))
list_measurements[:] = np.nan
list_stilt_values = np.empty(len(date_range))
list_stilt_values[:] = np.nan
count = 0
start_count = 0
for value in df['co2.fuel']:
stilt_year= df['co2.fuel'].index[count].year
stilt_month= df['co2.fuel'].index[count].month
stilt_day= df['co2.fuel'].index[count].day
stilt_hour= df['co2.fuel'].index[count].hour
list_stilt_values[count] = df['co2.stilt'][count]
count_measurements = 0
if start_count > 0:
count_measurements = start_count
for value in range(start_count, len(df_stationdata), 1):
if (df_stationdata['Year'][count_measurements] == str(stilt_year) and df_stationdata['Month'][count_measurements]== str(stilt_month).zfill(2) \
and df_stationdata['Day'][count_measurements] == str(stilt_day).zfill(2) and df_stationdata['Hour'][count_measurements] == str(stilt_hour).zfill(2)):
list_measurements[count]= df_stationdata['CO2'][count_measurements]
count_measurements = count_measurements +1
start_count=count_measurements
break
else:
count_measurements = count_measurements +1
else:
for value in range(0, len(df_stationdata), 1):
if (df_stationdata['Year'][count_measurements] == str(stilt_year) and df_stationdata['Month'][count_measurements]== str(stilt_month).zfill(2) \
and df_stationdata['Day'][count_measurements] == str(stilt_day).zfill(2) and df_stationdata['Hour'][count_measurements] == str(stilt_hour).zfill(2)):
list_measurements[count]= df_stationdata['CO2'][count_measurements]
count_measurements = count_measurements +1
start_count=count_measurements
break
else:
count_measurements = count_measurements +1
count=count+1
index=0
count_nan = 0
for value in list_measurements:
if (math.isnan(value)== True or math.isnan(list_stilt_values[index]==True)):
count_nan=count_nan+1
index=index+1
list_count_nan.append(count_nan)
diff_measurements_stilt_not_abs = np.array(list_stilt_values) - np.array(list_measurements)
diff_measurements_stilt_best_average = np.nanmean(diff_measurements_stilt_not_abs)
#updated - previously stdev of abs values
diff_measurements_stilt_best_stdev= np.nanstd(diff_measurements_stilt_not_abs)
list_of_list_diff_measurements_stilt_not_abs.append(diff_measurements_stilt_not_abs)
matplotlib.rcParams.update({'font.size': 17})
fig = p.figure(figsize=(20,20))
ax = fig.add_subplot(6,1,1)
p.plot(df.index,df['co2.stilt'],linestyle=':',color='b',label='Modeled concentrations')
p.plot(df.index,list_measurements,linestyle='-',color='k',label='Measured concentrations')
date_index_number = (len(date_range) - 1)
p.title(station + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)\
+ '\n'+ 'Number of requested footprints: ' + str(count) + '\n' + 'Number of NaN values: ' + str(count_nan)+ '\n' +\
'Modeled concentration, average deviation from measured concentration: ' + str("%.2f" % diff_measurements_stilt_best_average) + ' ppm' + '\n' + \
'Standard deviation of deviation: ' + str("%.2f" % diff_measurements_stilt_best_stdev) + ' ppm')
ax.set_xlim(start_date,end_date)
ax.set_ylabel('CO$_2$ [ppm]')
ax.grid(axis='x')
ax.legend(loc='upper right')
ax = fig.add_subplot(6,1,2)
p.plot(df.index,diff_measurements_stilt_not_abs,linestyle=':',color='b',label='Deviation from measured values')
zeros_for_measured_concentration = np.zeros(len(date_range))
p.plot(df.index,zeros_for_measured_concentration,linestyle='-',color='k')
ax.set_xlim(start_date,end_date)
ax.set_ylabel('CO$_2$ [ppm]')
ax.grid(axis='x')
ax.legend(loc='upper right')
p.show()
Using the data extracted in the first cell, which must be ran first. The deviation data is
sorted based on which month and which hour the deviation data was extracted from and displayed in seperate bar graphs. This is done seperatley for each station.
At the end, the averages by month and by hours for all stations are displayed in seperate graphs.
#use the data in the last section.
#show the average deviation by month or by hour:
index_station=0
#un-comment if only ICOS measured data:
#list_stations=['NOR100', 'HTM150', 'SVB150', 'PUY', 'OPE120', 'KRE250', 'JFJ', 'HPB131', 'SMR125']
#un-comment if only Globalview data:
#list_stations=['GAT344', 'HEI', 'LIN099', 'LUT', 'PRS', 'TRN180']
#using data from both Globalview and ICOS measured data:
list_stations=['GAT344', 'HEI', 'LIN099', 'LUT', 'PRS', 'TRN180', 'NOR100', 'HTM150', 'SVB150', 'PUY', 'OPE120', 'KRE250', 'JFJ', 'HPB131', 'SMR125']
#for the final aggregated comparison between the stations in the station list (lists of lists appended to for each station)
zero_list=[]
three_list=[]
six_list=[]
nine_list=[]
twelve_list=[]
fifteen_list=[]
eighteen_list=[]
twentyone_list=[]
jan_list=[]
feb_list=[]
mar_list=[]
apr_list=[]
may_list=[]
jun_list=[]
jul_list=[]
aug_list=[]
sep_list=[]
oct_list=[]
nov_list=[]
dec_list=[]
for station in list_stations:
#move on in the list of measured/modeled concentrations:
index=0
#go over the deviation values and their associated months and hours, put in correct list.
list_jan = []
list_feb = []
list_mar = []
list_apr = []
list_may = []
list_jun = []
list_jul = []
list_aug = []
list_sep = []
list_oct = []
list_nov = []
list_dec = []
list_0 = []
list_3 = []
list_6 = []
list_9 = []
list_12 = []
list_15 = []
list_18 = []
list_21 = []
#when displaying the data, important to show how many nan-values are in there.
count_jan_nan = 0
count_feb_nan = 0
count_mar_nan = 0
count_apr_nan = 0
count_may_nan = 0
count_jun_nan = 0
count_jul_nan = 0
count_aug_nan = 0
count_sep_nan = 0
count_oct_nan = 0
count_nov_nan = 0
count_dec_nan = 0
count_0_nan = 0
count_3_nan = 0
count_6_nan = 0
count_9_nan = 0
count_12_nan = 0
count_15_nan = 0
count_18_nan = 0
count_21_nan = 0
#....and compare it to how many possible values there are for the month/hours = need to keep a count:
count_jan = 0
count_feb = 0
count_mar = 0
count_apr = 0
count_may = 0
count_jun = 0
count_jul = 0
count_aug = 0
count_sep = 0
count_oct = 0
count_nov = 0
count_dec = 0
count_0 = 0
count_3 = 0
count_6 = 0
count_9 = 0
count_12 = 0
count_15 = 0
count_18 = 0
count_21 = 0
#count_measurements is added to for each no-nan value
count_measurements = 0
for value in list_of_list_diff_measurements_stilt_not_abs[index_station]:
month=date_range[index].month
hour=date_range[index].hour
if math.isnan(value) == False:
#only place for count_measurements
count_measurements = count_measurements + 1
if month == 1:
list_jan.append(value)
if math.isnan(value) == True:
count_jan_nan=count_jan_nan + 1
else:
count_jan=count_jan+1
if month == 2:
list_feb.append(value)
if math.isnan(value) == True:
count_feb_nan=count_feb_nan + 1
else:
count_feb=count_feb+1
if month == 3:
list_mar.append(value)
if math.isnan(value) == True:
count_mar_nan=count_mar_nan + 1
else:
count_mar=count_mar+1
if month == 4:
list_apr.append(value)
if math.isnan(value) == True:
count_apr_nan=count_apr_nan + 1
else:
count_apr=count_apr+1
if month == 5:
list_may.append(value)
if math.isnan(value) == True:
count_may_nan=count_may_nan + 1
else:
count_may=count_may+1
if month == 6:
list_jun.append(value)
if math.isnan(value) == True:
count_jun_nan=count_jun_nan + 1
else:
count_jun=count_jun+1
if month == 7:
list_jul.append(value)
if math.isnan(value) == True:
count_jul_nan=count_jul_nan + 1
else:
count_jul=count_jul+1
if month == 8:
list_aug.append(value)
if math.isnan(value) == True:
count_aug_nan=count_aug_nan + 1
else:
count_aug=count_aug+1
if month == 9:
list_sep.append(value)
if math.isnan(value) == True:
count_sep_nan=count_sep_nan + 1
else:
count_sep=count_sep+1
if month == 10:
list_oct.append(value)
if math.isnan(value) == True:
count_oct_nan=count_oct_nan + 1
else:
count_oct=count_oct+1
if month == 11:
list_nov.append(value)
if math.isnan(value) == True:
count_nov_nan=count_nov_nan + 1
else:
count_nov=count_nov+1
if month == 12:
list_dec.append(value)
if math.isnan(value) == True:
count_dec_nan=count_dec_nan + 1
else:
count_dec=count_dec+1
if hour == 0:
list_0.append(value)
if math.isnan(value) == True:
count_0_nan=count_0_nan + 1
else:
count_0=count_0+1
if hour == 3:
list_3.append(value)
if math.isnan(value) == True:
count_3_nan=count_3_nan + 1
else:
count_3=count_3+1
if hour == 6:
list_6.append(value)
if math.isnan(value) == True:
count_6_nan=count_6_nan + 1
else:
count_6=count_6+1
if hour == 9:
list_9.append(value)
if math.isnan(value) == True:
count_9_nan=count_9_nan + 1
else:
count_9=count_9+1
if hour == 12:
list_12.append(value)
if math.isnan(value) == True:
count_12_nan=count_12_nan + 1
else:
count_12=count_12+1
if hour == 15:
list_15.append(value)
if math.isnan(value) == True:
count_15_nan=count_15_nan + 1
else:
count_15=count_15+1
if hour == 18:
list_18.append(value)
if math.isnan(value) == True:
count_18_nan=count_18_nan + 1
else:
count_18=count_18+1
if hour == 21:
list_21.append(value)
if math.isnan(value) == True:
count_21_nan=count_21_nan + 1
else:
count_21=count_21+1
index = index + 1
#these are going into seperate graphs
averages_months= np.array((np.nanmean(list_jan), np.nanmean(list_feb), np.nanmean(list_mar), np.nanmean(list_apr), \
np.nanmean(list_may), np.nanmean(list_jun), np.nanmean(list_jun), np.nanmean(list_aug), np.nanmean(list_sep),\
np.nanmean(list_oct), np.nanmean(list_nov), np.nanmean(list_dec)))
#for aggregated graph at the end:
jan_list.append(averages_months[0])
feb_list.append(averages_months[1])
mar_list.append(averages_months[2])
apr_list.append(averages_months[3])
may_list.append(averages_months[4])
jun_list.append(averages_months[5])
jul_list.append(averages_months[6])
aug_list.append(averages_months[7])
sep_list.append(averages_months[8])
oct_list.append(averages_months[9])
nov_list.append(averages_months[10])
dec_list.append(averages_months[11])
averages_hours= np.array((np.nanmean(list_0), np.nanmean(list_3), np.nanmean(list_6), np.nanmean(list_9), \
np.nanmean(list_12), np.nanmean(list_15), np.nanmean(list_18), np.nanmean(list_21)))
#for aggregated graph at the end:
zero_list.append(averages_hours[0])
three_list.append(averages_hours[1])
six_list.append(averages_hours[2])
nine_list.append(averages_hours[3])
twelve_list.append(averages_hours[4])
fifteen_list.append(averages_hours[5])
eighteen_list.append(averages_hours[6])
twentyone_list.append(averages_hours[7])
count_nan_months = np.array((count_jan_nan, count_feb_nan, count_mar_nan, count_apr_nan, count_may_nan, count_jun_nan,\
count_jul_nan, count_aug_nan, count_sep_nan, count_oct_nan, count_nov_nan, count_dec_nan))
count_nan_hours = np.array((count_0_nan, count_3_nan, count_6_nan, count_9_nan, count_12_nan, count_15_nan,\
count_18_nan, count_21_nan))
std_months= np.array((np.nanstd(list_jan), np.nanstd(list_feb), np.nanstd(list_mar), np.nanstd(list_apr), \
np.nanstd(list_may), np.nanstd(list_jun), np.nanstd(list_jul), np.nanstd(list_aug), np.nanstd(list_sep),\
np.nanstd(list_oct), np.nanstd(list_nov), np.nanstd(list_dec)))
std_hours= np.array((np.nanstd(list_0), np.nanstd(list_3), np.nanstd(list_6), np.nanstd(list_9), \
np.nanstd(list_12), np.nanstd(list_15), np.nanstd(list_18), np.nanstd(list_21)))
#is there any data to display in graphs?
check_for_data = 0
for value in averages_months:
if math.isnan(value) == False:
check_for_data = check_for_data + 1
#if there is data in the lists, can move on.
#since running last cell, not really necessary
if check_for_data != 0:
N = len(averages_months)
ind = np.arange(N)
width = 0.35
#added figure stuff for size change
figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.yaxis.grid(True)
plt.bar(ind, averages_months, width, color='blue')
matplotlib.rcParams.update({'font.size': 17})
plt.ylabel('Average deviation from measured concentrations')
plt.xlabel('Month (count NaN)')
#added figsize
#for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
plt.title(station +' performance modeled concentration by month' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
'Number of comparisons between modeled and measured concentrations: ' + str(count_measurements) + '\n')
plt.xticks(ind, ('Jan (' + str(count_nan_months[0]) + '/' + str(len(list_jan)) + ')' , 'Feb (' + str(count_nan_months[1]) + '/' + str(len(list_feb)) + ')', 'Mar ('+ str(count_nan_months[2]) + '/' + str(len(list_mar)) + ')',\
'Apr (' + str(count_nan_months[3]) + '/' + str(len(list_apr)) + ')', 'May ('+ str(count_nan_months[4]) + '/' + str(len(list_may)) + ')', 'Jun (' + str(count_nan_months[5]) + '/' + str(len(list_jun)) +')', \
'Jul (' + str(count_nan_months[6]) + '/' + str(len(list_jul)) + ')', 'Aug (' + str(count_nan_months[7]) + '/' + str(len(list_aug)) +')', 'Sep (' + str(count_nan_months[8]) + '/' + str(len(list_sep)) +')',\
'Oct (' + str(count_nan_months[9]) + '/' + str(len(list_oct)) +')', 'Nov (' + str(count_nan_months[10]) + '/' + str(len(list_nov)) +')', 'Dec (' + str(count_nan_months[11]) + '/' + str(len(list_dec)) +')'), rotation='vertical')
plt.axhline(0, color='black', lw=2)
plt.show()
std_months= std_months.tolist()
std_months_dic=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
index=0
for value in std_months:
print ('Standard deviation for', std_months_dic[index], 'month:', ("%.2f" %std_months[index]))
index=index+1
N = len(averages_hours)
ind = np.arange(N)
width = 0.35
figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.yaxis.grid(True)
plt.bar(ind, averages_hours, width, color='g')
plt.ylabel('Average deviation from measured concentration')
plt.xlabel('Hour of the day (count NaN)')
#for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
plt.title(station + ' performance modeled concentration by hour' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
'Number of comparisons between modeled and measured concentrations: ' + str(count_measurements)+ '\n')
plt.xticks(ind, ('0 (' + str(count_nan_hours[0]) + '/' + str(len(list_0))+ ')' , '3 (' + str(count_nan_hours[1]) + '/' + str(len(list_3))+ ')' , '6 (' + str(count_nan_hours[2]) + '/' + str(len(list_9))+ ')',\
'9 (' + str(count_nan_hours[3]) + '/' + str(len(list_9))+ ')', '12 (' + str(count_nan_hours[4]) + '/' + str(len(list_12))+ ')', '15 (' + str(count_nan_hours[5]) + '/' + str(len(list_15))+ ')',\
'18 (' + str(count_nan_hours[6]) + '/' + str(len(list_18))+ ')', '21 (' + str(count_nan_hours[7]) + '/' + str(len(list_21))+')'), rotation='vertical')
plt.axhline(0, color='black', lw=2)
plt.show()
std_hours= std_hours.tolist()
std_hours_dic=["0", "3", "6", "9", "12", "15", "18", "21"]
index=0
for value in std_hours:
print ('Standard deviation at', std_hours_dic[index], 'hour:', ("%.2f" %std_hours[index]))
index=index+1
else:
print('no measuremets for this station')
index_station=index_station+1
#add comparison? All in one bar graph at the bottom? maybe only in this cell, not next.
y_pos = np.arange(len(list_stations))
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.4, zero_list ,width=0.03,color='r',align='center')
p2 = ax.bar(y_pos-0.3, three_list,width=0.03,color='g',align='center')
p3 = ax.bar(y_pos-0.2, six_list,width=0.03,color='g',align='center')
p4 = ax.bar(y_pos-0.1, nine_list,width=0.03,color='g',align='center')
p5 = ax.bar(y_pos+0, twelve_list ,width=0.03,color='g',align='center')
p6 = ax.bar(y_pos +0.1, fifteen_list ,width=0.03,color='g',align='center')
p7 = ax.bar(y_pos+0.2, eighteen_list,width=0.03,color='g',align='center')
p8 = ax.bar(y_pos+0.3, twentyone_list,width=0.03,color='y',align='center')
ax.yaxis.grid(True)
ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0]), ('0', '3', '6', '9','12', '15', '18', '21') )
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.axhline(0, color='black', lw=2)
#get the index of the last date (start at 0) to access it in the titles
date_index_number = (len(date_range) - 1)
plt.title('Average disagreement by hour' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
'Hour(s): all')
plt.ylabel('ppm')
plt.show()
y_pos = np.arange(len(list_stations))
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.3, jan_list ,width=0.03,color='r',align='center')
p2 = ax.bar(y_pos-0.25, feb_list,width=0.03,color='b',align='center')
p3 = ax.bar(y_pos-0.20, mar_list,width=0.03,color='b',align='center')
p4 = ax.bar(y_pos-0.15, apr_list,width=0.03,color='b',align='center')
p5 = ax.bar(y_pos-0.10, may_list ,width=0.03,color='b',align='center')
p6 = ax.bar(y_pos-0.05, jun_list ,width=0.03,color='b',align='center')
p7 = ax.bar(y_pos+0.0, jul_list,width=0.03,color='b',align='center')
p8 = ax.bar(y_pos+0.05, aug_list,width=0.03,color='b',align='center')
p9 = ax.bar(y_pos+0.10, sep_list ,width=0.03,color='b',align='center')
p10 = ax.bar(y_pos +0.15, oct_list ,width=0.03,color='b',align='center')
p11 = ax.bar(y_pos+0.20, nov_list,width=0.03,color='b',align='center')
p12 = ax.bar(y_pos+0.25, dec_list,width=0.03,color='y',align='center')
ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0], p9[0], p10[0], p11[0], p12[0]), ('Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), ncol=2)
ax.yaxis.grid(True)
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.axhline(0, color='black', lw=2)
#get the index of the last date (start at 0) to access it in the titles
date_index_number = (len(date_range) - 1)
plt.title('Average disagreement by month' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
'Hour(s): all')
plt.ylabel('ppm')
plt.show()
The deviation data extracted in the first cell is needed to run this cell.
The user chooses the threshold by writing a positive integer when prompted upon running the cell.
For each station is all_stations, one bar graph with the count over the threshold and one bar graph
with count for under the threshold is output.
At the end there are four aggregated bar graphs with all stations:
One for count of overestimations are shown by month, one for underestimating by month, and the same set of graphs
for the different hours.
threshold_input= input("Write threshold ppm for display in graphs(positve integer): ")
thershold = int(threshold_input)
thershold_neg= -1* int(threshold_input)
#for the final aggregated comparison between the stations in the station list
zero_list_positive=[]
three_list_positive=[]
six_list_positive=[]
nine_list_positive=[]
twelve_list_positive=[]
fifteen_list_positive=[]
eighteen_list_positive=[]
twentyone_list_positive=[]
jan_list_positive=[]
feb_list_positive=[]
mar_list_positive=[]
apr_list_positive=[]
may_list_positive=[]
jun_list_positive=[]
jul_list_positive=[]
aug_list_positive=[]
sep_list_positive=[]
oct_list_positive=[]
nov_list_positive=[]
dec_list_positive=[]
zero_list_negative=[]
three_list_negative=[]
six_list_negative=[]
nine_list_negative=[]
twelve_list_negative=[]
fifteen_list_negative=[]
eighteen_list_negative=[]
twentyone_list_negative=[]
jan_list_negative=[]
feb_list_negative=[]
mar_list_negative=[]
apr_list_negative=[]
may_list_negative=[]
jun_list_negative=[]
jul_list_negative=[]
aug_list_negative=[]
sep_list_negative=[]
oct_list_negative=[]
nov_list_negative=[]
dec_list_negative=[]
index_station=0
for station in list_stations:
#one set of bar graphs (by month and by hour), for each threshold
index = 0
#by hour
list_of_indexes_positive = np.empty(len(date_range))
list_of_indexes_positive[:] = np.nan
list_of_indexes_negative = np.empty(len(date_range))
list_of_indexes_negative[:] = np.nan
#by month
list_of_indexes_positive_month = np.empty(len(date_range))
list_of_indexes_positive_month[:] = np.nan
list_of_indexes_negative_month= np.empty(len(date_range))
list_of_indexes_negative_month[:] = np.nan
list_of_nan = np.empty(len(date_range))
list_of_nan[:] = np.nan
list_of_nan_month = np.empty(len(date_range))
list_of_nan_month[:] = np.nan
#a list where the "month numbers" are added to.
count_months = []
for value in list_of_list_diff_measurements_stilt_not_abs[index_station]:
count_months.append(date_range[index].month)
#also, add to the lists depending on if over or under the thresholds:
if value > int(thershold):
#the list is as long as the date_range
list_of_indexes_positive[index] = date_range[index].hour
list_of_indexes_positive_month[index] = date_range[index].month
if value < int(thershold_neg):
list_of_indexes_negative[index] = date_range[index].hour
list_of_indexes_negative_month[index] = date_range[index].month
if math.isnan(value) == True:
list_of_nan[index] = date_range[index].hour
list_of_nan_month[index] = date_range[index].month
#move to the next deviation value.
index = index + 1
#how many of these values are there for each month?
count_jan_month = 0
count_feb_month = 0
count_mar_month = 0
count_apr_month = 0
count_may_month = 0
count_jun_month = 0
count_jul_month = 0
count_aug_month = 0
count_sep_month = 0
count_oct_month = 0
count_nov_month = 0
count_dec_month = 0
#how many total in each month:
for month in count_months:
if month == 1:
count_jan_month = count_jan_month + 1
if month == 2:
count_feb_month = count_feb_month + 1
if month == 3:
count_mar_month = count_mar_month + 1
if month == 4:
count_apr_month = count_apr_month + 1
if month == 5:
count_may_month = count_may_month + 1
if month == 6:
count_jun_month = count_jun_month + 1
if month == 7:
count_jul_month = count_jul_month + 1
if month == 8:
count_aug_month = count_aug_month + 1
if month == 9:
count_sep_month = count_sep_month + 1
if month == 10:
count_oct_month = count_oct_month + 1
if month == 11:
count_nov_month = count_nov_month + 1
if month == 12:
count_dec_month = count_dec_month + 1
count_0 = 0
count_3 = 0
count_6= 0
count_9 = 0
count_12 = 0
count_15 = 0
count_18 = 0
count_21 = 0
#hours, overestimating more than the threshold.
for value in list_of_indexes_positive:
if value == 0:
count_0 = count_0 + 1
if value == 3:
count_3 = count_3 + 1
if value == 6:
count_6 = count_6 + 1
if value == 9:
count_9 = count_9 + 1
if value == 12:
count_12 = count_12 + 1
if value == 15:
count_15 = count_15 + 1
if value == 18:
count_18 = count_18 + 1
if value == 21:
count_21 = count_21 + 1
#for aggregated bar graph at the end
zero_list_positive.append(count_0)
three_list_positive.append(count_3)
six_list_positive.append(count_6)
nine_list_positive.append(count_9)
twelve_list_positive.append(count_12)
fifteen_list_positive.append(count_15)
eighteen_list_positive.append(count_18)
twentyone_list_positive.append(count_21)
count_jan = 0
count_feb = 0
count_mar= 0
count_apr = 0
count_may= 0
count_jun = 0
count_jul = 0
count_aug = 0
count_sep = 0
count_oct = 0
count_nov= 0
count_dec = 0
for value in list_of_indexes_positive_month:
if value == 1:
count_jan = count_jan + 1
if value == 2:
count_feb = count_feb + 1
if value == 3:
count_mar = count_mar + 1
if value == 4:
count_apr = count_apr + 1
if value == 5:
count_may = count_may + 1
if value == 6:
count_jun = count_jun + 1
if value == 7:
count_jul = count_jul + 1
if value == 8:
count_aug = count_aug + 1
if value == 9:
count_sep = count_sep + 1
if value == 10:
count_oct = count_oct + 1
if value == 11:
count_nov = count_nov + 1
if value == 12:
count_dec = count_dec + 1
jan_list_positive.append(count_jan)
feb_list_positive.append(count_feb)
mar_list_positive.append(count_mar)
apr_list_positive.append(count_apr)
may_list_positive.append(count_may)
jun_list_positive.append(count_jun)
jul_list_positive.append(count_jul)
aug_list_positive.append(count_aug)
sep_list_positive.append(count_sep)
oct_list_positive.append(count_oct)
nov_list_positive.append(count_nov)
dec_list_positive.append(count_dec)
count_0_neg = 0
count_3_neg = 0
count_6_neg = 0
count_9_neg = 0
count_12_neg = 0
count_15_neg = 0
count_18_neg = 0
count_21_neg = 0
for value in list_of_indexes_negative:
if value == 0:
count_0_neg = count_0_neg + 1
if value == 3:
count_3_neg = count_3_neg + 1
if value == 6:
count_6_neg = count_6_neg+ 1
if value == 9:
count_9_neg = count_9_neg + 1
if value == 12:
count_12_neg = count_12_neg + 1
if value == 15:
count_15_neg = count_15_neg + 1
if value == 18:
count_18_neg = count_18_neg + 1
if value == 21:
count_21_neg = count_21_neg + 1
zero_list_negative.append(count_0_neg)
three_list_negative.append(count_3_neg)
six_list_negative.append(count_6_neg)
nine_list_negative.append(count_9_neg)
twelve_list_negative.append(count_12_neg)
fifteen_list_negative.append(count_15_neg)
eighteen_list_negative.append(count_18_neg)
twentyone_list_negative.append(count_21_neg)
count_jan_neg = 0
count_feb_neg = 0
count_mar_neg= 0
count_apr_neg = 0
count_may_neg= 0
count_jun_neg = 0
count_jul_neg = 0
count_aug_neg = 0
count_sep_neg = 0
count_oct_neg = 0
count_nov_neg= 0
count_dec_neg = 0
for value in list_of_indexes_negative_month:
if value == 1:
count_jan_neg = count_jan_neg + 1
if value == 2:
count_feb_neg = count_feb_neg + 1
if value == 3:
count_mar_neg = count_mar_neg + 1
if value == 4:
count_apr_neg = count_apr_neg + 1
if value == 5:
count_may_neg = count_may_neg + 1
if value == 6:
count_jun_neg = count_jun_neg + 1
if value == 7:
count_jul_neg = count_jul_neg + 1
if value == 8:
count_aug_neg = count_aug_neg + 1
if value == 9:
count_sep_neg = count_sep_neg + 1
if value == 10:
count_oct_neg = count_oct_neg + 1
if value == 11:
count_nov_neg = count_nov_neg + 1
if value == 12:
count_dec_neg = count_dec_neg + 1
jan_list_negative.append(count_jan_neg)
feb_list_negative.append(count_feb_neg)
mar_list_negative.append(count_mar_neg)
apr_list_negative.append(count_apr_neg)
may_list_negative.append(count_may_neg)
jun_list_negative.append(count_jun_neg)
jul_list_negative.append(count_jul_neg)
aug_list_negative.append(count_aug_neg)
sep_list_negative.append(count_sep_neg)
oct_list_negative.append(count_oct_neg)
nov_list_negative.append(count_nov_neg)
dec_list_negative.append(count_dec_neg)
count_0_nan = 0
count_3_nan = 0
count_6_nan = 0
count_9_nan = 0
count_12_nan = 0
count_15_nan = 0
count_18_nan = 0
count_21_nan = 0
for value in list_of_nan:
if value == 0:
count_0_nan = count_0_nan + 1
if value == 3:
count_3_nan = count_3_nan + 1
if value == 6:
count_6_nan = count_6_nan+ 1
if value == 9:
count_9_nan = count_9_nan + 1
if value == 12:
count_12_nan = count_12_nan + 1
if value == 15:
count_15_nan = count_15_nan + 1
if value == 18:
count_18_nan = count_18_nan + 1
if value == 21:
count_21_nan = count_21_nan + 1
count_jan_nan = 0
count_feb_nan = 0
count_mar_nan = 0
count_apr_nan = 0
count_may_nan = 0
count_jun_nan = 0
count_jul_nan = 0
count_aug_nan = 0
count_sep_nan = 0
count_oct_nan = 0
count_nov_nan = 0
count_dec_nan = 0
for value in list_of_nan_month:
if value == 1:
count_jan_nan = count_jan_nan + 1
if value == 2:
count_feb_nan = count_feb_nan + 1
if value == 3:
count_mar_nan = count_mar_nan+ 1
if value == 4:
count_apr_nan = count_apr_nan + 1
if value == 5:
count_may_nan = count_may_nan + 1
if value == 6:
count_jun_nan = count_jun_nan + 1
if value == 7:
count_jul_nan = count_jul_nan + 1
if value == 8:
count_aug_nan = count_aug_nan + 1
if value == 9:
count_sep_nan = count_sep_nan + 1
if value == 10:
count_oct_nan = count_oct_nan + 1
if value == 11:
count_nov_nan = count_nov_nan + 1
if value == 12:
count_dec_nan = count_dec_nan + 1
list_negatives = [count_0_neg, count_3_neg, count_6_neg, count_9_neg, count_12_neg, count_15_neg, count_18_neg ,count_21_neg]
list_positives= [count_0, count_3, count_6,count_9, count_12, count_15, count_18,count_21]
list_nan = [count_0_nan, count_3_nan, count_6_nan, count_9_nan, count_12_nan, count_15_nan, count_18_nan ,count_21_nan]
#here by month:
list_positives_month = [count_jan, count_feb, count_mar, count_apr, count_may, count_jun, count_jul, count_aug, count_sep, count_oct, count_nov, count_dec]
list_negatives_month = [count_jan_neg, count_feb_neg, count_mar_neg, count_apr_neg, count_may_neg, count_jun_neg, count_jul_neg, count_aug_neg, count_sep_neg, count_oct_neg, count_nov_neg, count_dec_neg]
list_nan_month = [count_jan_nan, count_feb_nan, count_mar_nan, count_apr_nan, count_may_nan, count_jun_nan, count_jul_nan, count_aug_nan, count_sep_nan, count_oct_nan, count_nov_nan, count_dec_nan]
#here count for each month (total footprints by month to use in % calculation):
count_month_list = [count_jan_month, count_feb_month, count_mar_month, count_apr_month, count_may_month, count_jun_month, count_jul_month, count_aug_month, count_sep_month, count_oct_month, count_nov_month, count_dec_month]
total_negatives = 0
total_positives = 0
for value in list_negatives:
total_negatives=total_negatives + int(value)
for value in list_positives:
total_positives = total_positives + int(value)
#changed here... don't want to display empty graphs.
if total_negatives>0 or total_positives>0:
#MAKING bar graph for hour of the day:
count_positives = np.array((count_0, count_3, count_6, count_9, count_12, count_15, count_18, count_21))
count_negatives = np.array((count_0_neg, count_3_neg, count_6_neg, count_9_neg, count_12_neg, count_15_neg, count_18_neg, count_21_neg))
count_nan = np.array((count_0_nan, count_3_nan, count_6_nan, count_9_nan, count_12_nan, count_15_nan, count_18_nan ,count_21_nan))
#how many values there are:
N = len(count_positives)
ind = np.arange(N)
width = 0.35
matplotlib.rcParams.update({'font.size': 17})
figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.yaxis.grid(True)
p1 = plt.bar(ind, count_positives, width, color='red')
p2 = plt.bar(ind, count_negatives, width, bottom=count_positives, color='green')
#p3 = plt.bar(ind,count_nan, width, bottom=count_positives+count_negatives, color= 'grey')
plt.ylabel('Count')
plt.xlabel('Hour of the day (count NaN)')
#for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
plt.title(station + '\n' + 'Deviations over and under measured concentrations' + '\n' + 'Thershold: +-' + str(thershold) + ' ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
'Footprints within date range: ' + str(count)+ '\n')
plt.xticks(ind, ('0 (' + str(count_nan[0]) + ')', '3 (' + str(count_nan[1]) + ')', '6 (' + str(count_nan[2]) + ')', '9 (' + str(count_nan[3]) + ')', '12 (' + str(count_nan[4]) + ')', '15 (' + str(count_nan[5]) + ')', '18 (' + str(count_nan[6]) + ')', '21 (' + str(count_nan[7]) + ')'), rotation='vertical')
plt.legend((p1[0], p2[0], p3[0]), ('>' + str(thershold) + ' ppm over: ' + str(sum(count_positives)) , '>' + str(thershold_neg) + 'ppm below: ' + str(sum(count_negatives))))
plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
#pylab.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
plt.show()
#NEW: graph for by month:
count_positives_month = np.array((count_jan, count_feb, count_mar, count_apr, count_may, count_jun, count_jul, count_aug, count_sep, count_oct, count_nov, count_dec))
count_negatives_month = np.array((count_jan_neg, count_feb_neg, count_mar_neg, count_apr_neg, count_may_neg, count_jun_neg, count_jul_neg, count_aug_neg, count_sep_neg, count_oct_neg, count_nov_neg, count_dec_neg))
count_nan_month = np.array((count_jan_nan, count_feb_nan, count_mar_nan, count_apr_nan, count_may_nan, count_jun_nan, count_jul_nan, count_aug_nan, count_sep_nan, count_oct_nan, count_nov_nan, count_dec_nan))
count_nan_month_total=sum(count_nan_month)
figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
matplotlib.rcParams.update({'font.size': 17})
N = len(count_positives_month)
ax = plt.subplot(111)
ax.yaxis.grid(True)
ind = np.arange(N)
width = 0.35
p1 = plt.bar(ind, count_positives_month, width, color='red')
p2 = plt.bar(ind, count_negatives_month, width, bottom=count_positives_month, color='blue')
#p3 = plt.bar(ind,count_nan_month, width, bottom=count_positives_month+count_negatives_month, color= 'grey')
plt.ylabel('Count')
plt.xlabel('Month (count NaN)')
#for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
plt.title(station + '\n' + 'Deviation over and under measured concentrations' + '\n' + 'Thershold: +-' + str(thershold) + ' ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
'Footprints within date range: ' + str(count)+ '\n' + 'Count NaN: ' + str( count_nan_month_total))
plt.xticks(ind, ('Jan (' + str(count_nan_month[0]) + ')', 'Feb (' + str(count_nan_month[1]) + ')', 'Mar (' + str(count_nan_month[2]) + ')', 'Apr (' + str(count_nan_month[3]) + ')', 'May (' + str(count_nan_month[4]) + ')', 'Jun (' + str(count_nan_month[5]) + ')', 'Jul (' + str(count_nan_month[6]) + ')', 'Aug (' + str(count_nan_month[7]) + ')', 'Sep (' + str(count_nan_month[8]) + ')', 'Oct (' + str(count_nan_month[9]) + ')', 'Nov (' + str(count_nan_month[10]) + ')', 'Dec (' + str(count_nan_month[11]) + ')'), rotation='vertical')
plt.legend((p1[0], p2[0]), ('>' + str(thershold) + ' ppm over: ' + str(sum(count_positives)) , '>' + str(thershold_neg) + 'ppm below: ' + str(sum(count_negatives))))
plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
#pylab.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
plt.show()
else:
print ('Modeled concentrations are never disagreeing with measured concentrations above or below threshold ', threshold)
index_station=index_station+1
#aggregated bar all stations at the end.
#one bar for overestimation ("positive") and one for under estimation ("negatives")
#overestimating more than threshold by month
y_pos = np.arange(len(list_stations))
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.3, jan_list_positive ,width=0.03,color='r',align='center')
p2 = ax.bar(y_pos-0.25, feb_list_positive,width=0.03,color='b',align='center')
p3 = ax.bar(y_pos-0.20, mar_list_positive,width=0.03,color='b',align='center')
p4 = ax.bar(y_pos-0.15, apr_list_positive,width=0.03,color='b',align='center')
p5 = ax.bar(y_pos-0.10, may_list_positive,width=0.03,color='b',align='center')
p6 = ax.bar(y_pos-0.05, jun_list_positive,width=0.03,color='b',align='center')
p7 = ax.bar(y_pos+0.0, jul_list_positive,width=0.03,color='b',align='center')
p8 = ax.bar(y_pos+0.05, aug_list_positive,width=0.03,color='b',align='center')
p9 = ax.bar(y_pos+0.10, sep_list_positive,width=0.03,color='b',align='center')
p10 = ax.bar(y_pos +0.15, oct_list_positive,width=0.03,color='b',align='center')
p11 = ax.bar(y_pos+0.20, nov_list_positive,width=0.03,color='b',align='center')
p12 = ax.bar(y_pos+0.25, dec_list_positive,width=0.03,color='g',align='center')
ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0], p9[0], p10[0], p11[0], p12[0]), ('Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), ncol=2)
ax.yaxis.grid(True)
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.axhline(0, color='black', lw=2)
#get the index of the last date (start at 0) to access it in the titles
date_index_number = (len(date_range) - 1)
plt.title('Count overestimating more than ' + str(thershold) + 'ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
'Hour(s): all')
plt.ylabel('count')
plt.show()
#underestimation more than threshold by month
y_pos = np.arange(len(list_stations))
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.3, jan_list_negative ,width=0.03,color='r',align='center')
p2 = ax.bar(y_pos-0.25, feb_list_negative,width=0.03,color='b',align='center')
p3 = ax.bar(y_pos-0.20, mar_list_negative,width=0.03,color='b',align='center')
p4 = ax.bar(y_pos-0.15, apr_list_negative,width=0.03,color='b',align='center')
p5 = ax.bar(y_pos-0.10, may_list_negative,width=0.03,color='b',align='center')
p6 = ax.bar(y_pos-0.05, jun_list_negative,width=0.03,color='b',align='center')
p7 = ax.bar(y_pos+0.0, jul_list_negative,width=0.03,color='b',align='center')
p8 = ax.bar(y_pos+0.05, aug_list_negative,width=0.03,color='b',align='center')
p9 = ax.bar(y_pos+0.10, sep_list_negative,width=0.03,color='b',align='center')
p10 = ax.bar(y_pos +0.15, oct_list_negative,width=0.03,color='b',align='center')
p11 = ax.bar(y_pos+0.20, nov_list_negative,width=0.03,color='b',align='center')
p12 = ax.bar(y_pos+0.25, dec_list_negative,width=0.03,color='g',align='center')
ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0], p9[0], p10[0], p11[0], p12[0]), ('Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), ncol=2)
ax.yaxis.grid(True)
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.axhline(0, color='black', lw=2)
#get the index of the last date (start at 0) to access it in the titles
date_index_number = (len(date_range) - 1)
plt.title('Count underestimating more than ' + str(thershold) + 'ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
'Hour(s): all')
plt.ylabel('count')
plt.show()
#overestimating more than threshold by hour
y_pos = np.arange(len(list_stations))
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.4, zero_list_positive ,width=0.03,color='r',align='center')
p2 = ax.bar(y_pos-0.3, three_list_positive,width=0.03,color='g',align='center')
p3 = ax.bar(y_pos-0.2, six_list_positive,width=0.03,color='g',align='center')
p4 = ax.bar(y_pos-0.1, nine_list_positive,width=0.03,color='g',align='center')
p5 = ax.bar(y_pos+0, twelve_list_positive,width=0.03,color='g',align='center')
p6 = ax.bar(y_pos +0.1, fifteen_list_positive,width=0.03,color='g',align='center')
p7 = ax.bar(y_pos+0.2, eighteen_list_positive,width=0.03,color='g',align='center')
p8 = ax.bar(y_pos+0.3, twentyone_list_positive,width=0.03,color='y',align='center')
ax.yaxis.grid(True)
ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0]), ('0', '3', '6', '9','12', '15', '18', '21') )
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.axhline(0, color='black', lw=2)
#get the index of the last date (start at 0) to access it in the titles
date_index_number = (len(date_range) - 1)
plt.title('Count overestimating more than ' + str(thershold) + 'ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
'Hour(s): all')
plt.ylabel('count')
plt.show()
#underestimating more than threshold by hour
y_pos = np.arange(len(list_stations))
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.4, zero_list_negative ,width=0.03,color='r',align='center')
p2 = ax.bar(y_pos-0.3, three_list_negative,width=0.03,color='g',align='center')
p3 = ax.bar(y_pos-0.2, six_list_negative,width=0.03,color='g',align='center')
p4 = ax.bar(y_pos-0.1, nine_list_negative,width=0.03,color='g',align='center')
p5 = ax.bar(y_pos+0, twelve_list_negative ,width=0.03,color='g',align='center')
p6 = ax.bar(y_pos +0.1, fifteen_list_negative ,width=0.03,color='g',align='center')
p7 = ax.bar(y_pos+0.2, eighteen_list_negative,width=0.03,color='g',align='center')
p8 = ax.bar(y_pos+0.3, twentyone_list_negative,width=0.03,color='y',align='center')
ax.yaxis.grid(True)
ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0]), ('0', '3', '6', '9','12', '15', '18', '21'))
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.axhline(0, color='black', lw=2)
#get the index of the last date (start at 0) to access it in the titles
date_index_number = (len(date_range) - 1)
plt.title('Count underestimating more than ' + str(thershold) + 'ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
'Hour(s): all')
plt.ylabel('count')
plt.show()
The deviation data extracted in the first cell is needed to run this cell.
The user chooses the threshold by writing a positive integer when prompted upon running the cell.
For each station is all_stations, one bar graph with the count over the threshold and one bar graph
with count for under the threshold is output.
At the end there are four aggregated bar graphs with all stations:
One for count of overestimations are shown by month, one for underestimating by month, and the same set of graphs
for the different hours.
threshold_input= input("Write number of standard deviations for display in graph (ex '1' or '2'): ")
number_standard_deviations= float(threshold_input)
list_stations=['GAT344', 'HEI', 'LIN099', 'LUT', 'PRS', 'TRN180', 'NOR100', 'HTM150', 'SVB150', 'PUY', 'OPE120', 'KRE250', 'JFJ', 'HPB131', 'SMR125']
matplotlib.rcParams.update({'font.size': 17})
#for the final aggregated comparison between the stations in the station list
zero_list_positive=[]
three_list_positive=[]
six_list_positive=[]
nine_list_positive=[]
twelve_list_positive=[]
fifteen_list_positive=[]
eighteen_list_positive=[]
twentyone_list_positive=[]
jan_list_positive=[]
feb_list_positive=[]
mar_list_positive=[]
apr_list_positive=[]
may_list_positive=[]
jun_list_positive=[]
jul_list_positive=[]
aug_list_positive=[]
sep_list_positive=[]
oct_list_positive=[]
nov_list_positive=[]
dec_list_positive=[]
zero_list_negative=[]
three_list_negative=[]
six_list_negative=[]
nine_list_negative=[]
twelve_list_negative=[]
fifteen_list_negative=[]
eighteen_list_negative=[]
twentyone_list_negative=[]
jan_list_negative=[]
feb_list_negative=[]
mar_list_negative=[]
apr_list_negative=[]
may_list_negative=[]
jun_list_negative=[]
jul_list_negative=[]
aug_list_negative=[]
sep_list_negative=[]
oct_list_negative=[]
nov_list_negative=[]
dec_list_negative=[]
index_station=0
for station in list_stations:
#want the threshold to be one standard deviation away:
#get standard devation based on diff list:
standard_deviation=(numpy.nanstd(list_of_list_diff_measurements_stilt_not_abs[index_station]))
#standard_deviation=numpy.nanstd(list_of_list_diff_measurements_stilt_not_abs[index_station])
#the threshold is based on number of standard deviations defined
thershold = float(standard_deviation*number_standard_deviations)
thershold_neg= -1* float(standard_deviation*number_standard_deviations)
#one set of bar graphs (by month and by hour), for each threshold
index = 0
#by hour
list_of_indexes_positive = np.empty(len(date_range))
list_of_indexes_positive[:] = np.nan
list_of_indexes_negative = np.empty(len(date_range))
list_of_indexes_negative[:] = np.nan
#by month
list_of_indexes_positive_month = np.empty(len(date_range))
list_of_indexes_positive_month[:] = np.nan
list_of_indexes_negative_month= np.empty(len(date_range))
list_of_indexes_negative_month[:] = np.nan
list_of_nan = np.empty(len(date_range))
list_of_nan[:] = np.nan
list_of_nan_month = np.empty(len(date_range))
list_of_nan_month[:] = np.nan
#a list where the "month numbers" are added to.
count_months = []
for value in list_of_list_diff_measurements_stilt_not_abs[index_station]:
count_months.append(date_range[index].month)
#also, add to the lists depending on if over or under the thresholds:
if value > int(thershold):
#the list is as long as the date_range
list_of_indexes_positive[index] = date_range[index].hour
list_of_indexes_positive_month[index] = date_range[index].month
if value < int(thershold_neg):
list_of_indexes_negative[index] = date_range[index].hour
list_of_indexes_negative_month[index] = date_range[index].month
if math.isnan(value) == True:
list_of_nan[index] = date_range[index].hour
list_of_nan_month[index] = date_range[index].month
#move to the next deviation value.
index = index + 1
#how many of these values are there for each month?
count_jan_month = 0
count_feb_month = 0
count_mar_month = 0
count_apr_month = 0
count_may_month = 0
count_jun_month = 0
count_jul_month = 0
count_aug_month = 0
count_sep_month = 0
count_oct_month = 0
count_nov_month = 0
count_dec_month = 0
#how many total in each month:
for month in count_months:
if month == 1:
count_jan_month = count_jan_month + 1
if month == 2:
count_feb_month = count_feb_month + 1
if month == 3:
count_mar_month = count_mar_month + 1
if month == 4:
count_apr_month = count_apr_month + 1
if month == 5:
count_may_month = count_may_month + 1
if month == 6:
count_jun_month = count_jun_month + 1
if month == 7:
count_jul_month = count_jul_month + 1
if month == 8:
count_aug_month = count_aug_month + 1
if month == 9:
count_sep_month = count_sep_month + 1
if month == 10:
count_oct_month = count_oct_month + 1
if month == 11:
count_nov_month = count_nov_month + 1
if month == 12:
count_dec_month = count_dec_month + 1
count_0 = 0
count_3 = 0
count_6= 0
count_9 = 0
count_12 = 0
count_15 = 0
count_18 = 0
count_21 = 0
#hours, overestimating more than the threshold.
for value in list_of_indexes_positive:
if value == 0:
count_0 = count_0 + 1
if value == 3:
count_3 = count_3 + 1
if value == 6:
count_6 = count_6 + 1
if value == 9:
count_9 = count_9 + 1
if value == 12:
count_12 = count_12 + 1
if value == 15:
count_15 = count_15 + 1
if value == 18:
count_18 = count_18 + 1
if value == 21:
count_21 = count_21 + 1
#for aggregated bar graph at the end
zero_list_positive.append(count_0)
three_list_positive.append(count_3)
six_list_positive.append(count_6)
nine_list_positive.append(count_9)
twelve_list_positive.append(count_12)
fifteen_list_positive.append(count_15)
eighteen_list_positive.append(count_18)
twentyone_list_positive.append(count_21)
count_jan = 0
count_feb = 0
count_mar= 0
count_apr = 0
count_may= 0
count_jun = 0
count_jul = 0
count_aug = 0
count_sep = 0
count_oct = 0
count_nov= 0
count_dec = 0
for value in list_of_indexes_positive_month:
if value == 1:
count_jan = count_jan + 1
if value == 2:
count_feb = count_feb + 1
if value == 3:
count_mar = count_mar + 1
if value == 4:
count_apr = count_apr + 1
if value == 5:
count_may = count_may + 1
if value == 6:
count_jun = count_jun + 1
if value == 7:
count_jul = count_jul + 1
if value == 8:
count_aug = count_aug + 1
if value == 9:
count_sep = count_sep + 1
if value == 10:
count_oct = count_oct + 1
if value == 11:
count_nov = count_nov + 1
if value == 12:
count_dec = count_dec + 1
jan_list_positive.append(count_jan)
feb_list_positive.append(count_feb)
mar_list_positive.append(count_mar)
apr_list_positive.append(count_apr)
may_list_positive.append(count_may)
jun_list_positive.append(count_jun)
jul_list_positive.append(count_jul)
aug_list_positive.append(count_aug)
sep_list_positive.append(count_sep)
oct_list_positive.append(count_oct)
nov_list_positive.append(count_nov)
dec_list_positive.append(count_dec)
count_0_neg = 0
count_3_neg = 0
count_6_neg = 0
count_9_neg = 0
count_12_neg = 0
count_15_neg = 0
count_18_neg = 0
count_21_neg = 0
for value in list_of_indexes_negative:
if value == 0:
count_0_neg = count_0_neg + 1
if value == 3:
count_3_neg = count_3_neg + 1
if value == 6:
count_6_neg = count_6_neg+ 1
if value == 9:
count_9_neg = count_9_neg + 1
if value == 12:
count_12_neg = count_12_neg + 1
if value == 15:
count_15_neg = count_15_neg + 1
if value == 18:
count_18_neg = count_18_neg + 1
if value == 21:
count_21_neg = count_21_neg + 1
zero_list_negative.append(count_0_neg)
three_list_negative.append(count_3_neg)
six_list_negative.append(count_6_neg)
nine_list_negative.append(count_9_neg)
twelve_list_negative.append(count_12_neg)
fifteen_list_negative.append(count_15_neg)
eighteen_list_negative.append(count_18_neg)
twentyone_list_negative.append(count_21_neg)
count_jan_neg = 0
count_feb_neg = 0
count_mar_neg= 0
count_apr_neg = 0
count_may_neg= 0
count_jun_neg = 0
count_jul_neg = 0
count_aug_neg = 0
count_sep_neg = 0
count_oct_neg = 0
count_nov_neg= 0
count_dec_neg = 0
for value in list_of_indexes_negative_month:
if value == 1:
count_jan_neg = count_jan_neg + 1
if value == 2:
count_feb_neg = count_feb_neg + 1
if value == 3:
count_mar_neg = count_mar_neg + 1
if value == 4:
count_apr_neg = count_apr_neg + 1
if value == 5:
count_may_neg = count_may_neg + 1
if value == 6:
count_jun_neg = count_jun_neg + 1
if value == 7:
count_jul_neg = count_jul_neg + 1
if value == 8:
count_aug_neg = count_aug_neg + 1
if value == 9:
count_sep_neg = count_sep_neg + 1
if value == 10:
count_oct_neg = count_oct_neg + 1
if value == 11:
count_nov_neg = count_nov_neg + 1
if value == 12:
count_dec_neg = count_dec_neg + 1
jan_list_negative.append(count_jan_neg)
feb_list_negative.append(count_feb_neg)
mar_list_negative.append(count_mar_neg)
apr_list_negative.append(count_apr_neg)
may_list_negative.append(count_may_neg)
jun_list_negative.append(count_jun_neg)
jul_list_negative.append(count_jul_neg)
aug_list_negative.append(count_aug_neg)
sep_list_negative.append(count_sep_neg)
oct_list_negative.append(count_oct_neg)
nov_list_negative.append(count_nov_neg)
dec_list_negative.append(count_dec_neg)
count_0_nan = 0
count_3_nan = 0
count_6_nan = 0
count_9_nan = 0
count_12_nan = 0
count_15_nan = 0
count_18_nan = 0
count_21_nan = 0
for value in list_of_nan:
if value == 0:
count_0_nan = count_0_nan + 1
if value == 3:
count_3_nan = count_3_nan + 1
if value == 6:
count_6_nan = count_6_nan+ 1
if value == 9:
count_9_nan = count_9_nan + 1
if value == 12:
count_12_nan = count_12_nan + 1
if value == 15:
count_15_nan = count_15_nan + 1
if value == 18:
count_18_nan = count_18_nan + 1
if value == 21:
count_21_nan = count_21_nan + 1
count_jan_nan = 0
count_feb_nan = 0
count_mar_nan = 0
count_apr_nan = 0
count_may_nan = 0
count_jun_nan = 0
count_jul_nan = 0
count_aug_nan = 0
count_sep_nan = 0
count_oct_nan = 0
count_nov_nan = 0
count_dec_nan = 0
for value in list_of_nan_month:
if value == 1:
count_jan_nan = count_jan_nan + 1
if value == 2:
count_feb_nan = count_feb_nan + 1
if value == 3:
count_mar_nan = count_mar_nan+ 1
if value == 4:
count_apr_nan = count_apr_nan + 1
if value == 5:
count_may_nan = count_may_nan + 1
if value == 6:
count_jun_nan = count_jun_nan + 1
if value == 7:
count_jul_nan = count_jul_nan + 1
if value == 8:
count_aug_nan = count_aug_nan + 1
if value == 9:
count_sep_nan = count_sep_nan + 1
if value == 10:
count_oct_nan = count_oct_nan + 1
if value == 11:
count_nov_nan = count_nov_nan + 1
if value == 12:
count_dec_nan = count_dec_nan + 1
list_negatives = [count_0_neg, count_3_neg, count_6_neg, count_9_neg, count_12_neg, count_15_neg, count_18_neg ,count_21_neg]
list_positives= [count_0, count_3, count_6,count_9, count_12, count_15, count_18,count_21]
list_nan = [count_0_nan, count_3_nan, count_6_nan, count_9_nan, count_12_nan, count_15_nan, count_18_nan ,count_21_nan]
#here by month:
list_positives_month = [count_jan, count_feb, count_mar, count_apr, count_may, count_jun, count_jul, count_aug, count_sep, count_oct, count_nov, count_dec]
list_negatives_month = [count_jan_neg, count_feb_neg, count_mar_neg, count_apr_neg, count_may_neg, count_jun_neg, count_jul_neg, count_aug_neg, count_sep_neg, count_oct_neg, count_nov_neg, count_dec_neg]
list_nan_month = [count_jan_nan, count_feb_nan, count_mar_nan, count_apr_nan, count_may_nan, count_jun_nan, count_jul_nan, count_aug_nan, count_sep_nan, count_oct_nan, count_nov_nan, count_dec_nan]
#here count for each month (total footprints by month to use in % calculation):
count_month_list = [count_jan_month, count_feb_month, count_mar_month, count_apr_month, count_may_month, count_jun_month, count_jul_month, count_aug_month, count_sep_month, count_oct_month, count_nov_month, count_dec_month]
total_negatives = 0
total_positives = 0
for value in list_negatives:
total_negatives=total_negatives + int(value)
for value in list_positives:
total_positives = total_positives + int(value)
#changed here... don't want to display empty graphs.
if total_negatives>0 or total_positives>0:
#MAKING bar graph for hour of the day:
count_positives = np.array((count_0, count_3, count_6, count_9, count_12, count_15, count_18, count_21))
count_negatives = np.array((count_0_neg, count_3_neg, count_6_neg, count_9_neg, count_12_neg, count_15_neg, count_18_neg, count_21_neg))
count_nan = np.array((count_0_nan, count_3_nan, count_6_nan, count_9_nan, count_12_nan, count_15_nan, count_18_nan ,count_21_nan))
count_nan_total=sum(count_nan)
#how many values there are:
N = len(count_positives)
ind = np.arange(N)
width = 0.35
matplotlib.rcParams.update({'font.size': 17})
figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.yaxis.grid(True)
p1 = plt.bar(ind, count_positives, width, color='red')
p2 = plt.bar(ind, count_negatives, width, bottom=count_positives, color='green')
#p3 = plt.bar(ind,count_nan, width, bottom=count_positives+count_negatives, color= 'grey')
plt.ylabel('Count')
plt.xlabel('Hour of the day (count NaN)')
#for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
plt.title(station + '\n' + 'Model data differences greater than ' + str(number_standard_deviations) + ' standard deviation(s): +-' + str("%.2f" % thershold) + ' ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
'Footprints within date range: ' + str(count)+ '\n' + 'Count NaN: ' + str(count_nan_total))
plt.xticks(ind, ('0 (' + str(count_nan[0]) + ')', '3 (' + str(count_nan[1]) + ')', '6 (' + str(count_nan[2]) + ')', '9 (' + str(count_nan[3]) + ')', '12 (' + str(count_nan[4]) + ')', '15 (' + str(count_nan[5]) + ')', '18 (' + str(count_nan[6]) + ')', '21 (' + str(count_nan[7]) + ')'), rotation='vertical')
plt.legend((p1[0], p2[0]), ('>' + str(thershold) + ' ppm over: ' + str(sum(count_positives)) , '>' + str(thershold_neg) + 'ppm below: ' + str(sum(count_negatives))))
plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
#pylab.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
plt.show()
#NEW: graph for by month:
count_positives_month = np.array((count_jan, count_feb, count_mar, count_apr, count_may, count_jun, count_jul, count_aug, count_sep, count_oct, count_nov, count_dec))
count_negatives_month = np.array((count_jan_neg, count_feb_neg, count_mar_neg, count_apr_neg, count_may_neg, count_jun_neg, count_jul_neg, count_aug_neg, count_sep_neg, count_oct_neg, count_nov_neg, count_dec_neg))
count_nan_month = np.array((count_jan_nan, count_feb_nan, count_mar_nan, count_apr_nan, count_may_nan, count_jun_nan, count_jul_nan, count_aug_nan, count_sep_nan, count_oct_nan, count_nov_nan, count_dec_nan))
figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
matplotlib.rcParams.update({'font.size': 17})
N = len(count_positives_month)
ax = plt.subplot(111)
ax.yaxis.grid(True)
ind = np.arange(N)
width = 0.35
p1 = plt.bar(ind, count_positives_month, width, color='red')
p2 = plt.bar(ind, count_negatives_month, width, bottom=count_positives_month, color='blue')
#p3 = plt.bar(ind,count_nan_month, width, bottom=count_positives_month+count_negatives_month, color= 'grey')
plt.ylabel('Count')
plt.xlabel('Month (count NaN)')
#for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
plt.title(station + '\n' + 'Model data differences greater than ' + str(number_standard_deviations) + ' standard deviation(s): +-' + str("%.2f" % thershold) + ' ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
'Footprints within date range: ' + str(count)+ '\n' + 'Count NaN: ' + str( count_nan_total))
plt.xticks(ind, ('Jan (' + str(count_nan_month[0]) + ')', 'Feb (' + str(count_nan_month[1]) + ')', 'Mar (' + str(count_nan_month[2]) + ')', 'Apr (' + str(count_nan_month[3]) + ')', 'May (' + str(count_nan_month[4]) + ')', 'Jun (' + str(count_nan_month[5]) + ')', 'Jul (' + str(count_nan_month[6]) + ')', 'Aug (' + str(count_nan_month[7]) + ')', 'Sep (' + str(count_nan_month[8]) + ')', 'Oct (' + str(count_nan_month[9]) + ')', 'Nov (' + str(count_nan_month[10]) + ')', 'Dec (' + str(count_nan_month[11]) + ')'), rotation='vertical')
plt.legend((p1[0], p2[0]), ('>' + str(thershold) + ' ppm over: ' + str(sum(count_positives)) , '>' + str(thershold_neg) + 'ppm below: ' + str(sum(count_negatives))))
plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
#pylab.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
plt.show()
else:
print ('Modeled concentrations are never disagreeing with measured concentrations above or below threshold ', threshold)
index_station=index_station+1
#aggregated bar all stations at the end.
#one bar for overestimation ("positive") and one for under estimation ("negatives")
#overestimating more than threshold by month
y_pos = np.arange(len(list_stations))
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.3, jan_list_positive ,width=0.03,color='r',align='center')
p2 = ax.bar(y_pos-0.25, feb_list_positive,width=0.03,color='b',align='center')
p3 = ax.bar(y_pos-0.20, mar_list_positive,width=0.03,color='b',align='center')
p4 = ax.bar(y_pos-0.15, apr_list_positive,width=0.03,color='b',align='center')
p5 = ax.bar(y_pos-0.10, may_list_positive,width=0.03,color='b',align='center')
p6 = ax.bar(y_pos-0.05, jun_list_positive,width=0.03,color='b',align='center')
p7 = ax.bar(y_pos+0.0, jul_list_positive,width=0.03,color='b',align='center')
p8 = ax.bar(y_pos+0.05, aug_list_positive,width=0.03,color='b',align='center')
p9 = ax.bar(y_pos+0.10, sep_list_positive,width=0.03,color='b',align='center')
p10 = ax.bar(y_pos +0.15, oct_list_positive,width=0.03,color='b',align='center')
p11 = ax.bar(y_pos+0.20, nov_list_positive,width=0.03,color='b',align='center')
p12 = ax.bar(y_pos+0.25, dec_list_positive,width=0.03,color='g',align='center')
ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0], p9[0], p10[0], p11[0], p12[0]), ('Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), ncol=2)
ax.yaxis.grid(True)
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.axhline(0, color='black', lw=2)
#get the index of the last date (start at 0) to access it in the titles
date_index_number = (len(date_range) - 1)
plt.title('Count model data difference greater than ' + str(number_standard_deviations) + ' standard deviation(s) over' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
'Hour(s): all')
plt.ylabel('count')
plt.show()
#underestimation more than threshold by month
y_pos = np.arange(len(list_stations))
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.3, jan_list_negative ,width=0.03,color='r',align='center')
p2 = ax.bar(y_pos-0.25, feb_list_negative,width=0.03,color='b',align='center')
p3 = ax.bar(y_pos-0.20, mar_list_negative,width=0.03,color='b',align='center')
p4 = ax.bar(y_pos-0.15, apr_list_negative,width=0.03,color='b',align='center')
p5 = ax.bar(y_pos-0.10, may_list_negative,width=0.03,color='b',align='center')
p6 = ax.bar(y_pos-0.05, jun_list_negative,width=0.03,color='b',align='center')
p7 = ax.bar(y_pos+0.0, jul_list_negative,width=0.03,color='b',align='center')
p8 = ax.bar(y_pos+0.05, aug_list_negative,width=0.03,color='b',align='center')
p9 = ax.bar(y_pos+0.10, sep_list_negative,width=0.03,color='b',align='center')
p10 = ax.bar(y_pos +0.15, oct_list_negative,width=0.03,color='b',align='center')
p11 = ax.bar(y_pos+0.20, nov_list_negative,width=0.03,color='b',align='center')
p12 = ax.bar(y_pos+0.25, dec_list_negative,width=0.03,color='g',align='center')
ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0], p9[0], p10[0], p11[0], p12[0]), ('Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), ncol=2)
ax.yaxis.grid(True)
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.axhline(0, color='black', lw=2)
#get the index of the last date (start at 0) to access it in the titles
date_index_number = (len(date_range) - 1)
plt.title('Count model data difference greater than ' + str(number_standard_deviations) + ' standard deviation(s) below' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
'Hour(s): all')
plt.ylabel('count')
plt.show()
#overestimating more than threshold by hour
y_pos = np.arange(len(list_stations))
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.4, zero_list_positive ,width=0.03,color='r',align='center')
p2 = ax.bar(y_pos-0.3, three_list_positive,width=0.03,color='g',align='center')
p3 = ax.bar(y_pos-0.2, six_list_positive,width=0.03,color='g',align='center')
p4 = ax.bar(y_pos-0.1, nine_list_positive,width=0.03,color='g',align='center')
p5 = ax.bar(y_pos+0, twelve_list_positive,width=0.03,color='g',align='center')
p6 = ax.bar(y_pos +0.1, fifteen_list_positive,width=0.03,color='g',align='center')
p7 = ax.bar(y_pos+0.2, eighteen_list_positive,width=0.03,color='g',align='center')
p8 = ax.bar(y_pos+0.3, twentyone_list_positive,width=0.03,color='y',align='center')
ax.yaxis.grid(True)
ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0]), ('0', '3', '6', '9','12', '15', '18', '21') )
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.axhline(0, color='black', lw=2)
#get the index of the last date (start at 0) to access it in the titles
date_index_number = (len(date_range) - 1)
plt.title('Count model data difference greater than ' + str(number_standard_deviations) + ' standard deviation(s) over' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
'Hour(s): all')
plt.ylabel('count')
plt.show()
#underestimating more than threshold by hour
y_pos = np.arange(len(list_stations))
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.4, zero_list_negative ,width=0.03,color='r',align='center')
p2 = ax.bar(y_pos-0.3, three_list_negative,width=0.03,color='g',align='center')
p3 = ax.bar(y_pos-0.2, six_list_negative,width=0.03,color='g',align='center')
p4 = ax.bar(y_pos-0.1, nine_list_negative,width=0.03,color='g',align='center')
p5 = ax.bar(y_pos+0, twelve_list_negative ,width=0.03,color='g',align='center')
p6 = ax.bar(y_pos +0.1, fifteen_list_negative ,width=0.03,color='g',align='center')
p7 = ax.bar(y_pos+0.2, eighteen_list_negative,width=0.03,color='g',align='center')
p8 = ax.bar(y_pos+0.3, twentyone_list_negative,width=0.03,color='y',align='center')
ax.yaxis.grid(True)
ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0]), ('0', '3', '6', '9','12', '15', '18', '21'))
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.axhline(0, color='black', lw=2)
#get the index of the last date (start at 0) to access it in the titles
date_index_number = (len(date_range) - 1)
plt.title('Count model data difference greater than ' + str(number_standard_deviations) + ' standard deviation(s) under' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
'Hour(s): all')
plt.ylabel('count')
plt.show()
In this cell, the footprints are subset into footprints that have a model data difference over and under one standard deviation.
The resulting subsets are divided by the average footprint for year 2017 at the stations and multiplied by 100. This give a percentage value for each footprint cell in the resulting relative footprint map.
A value over 100 means that the cell has seen more sensitivity when the model over- or underestimated the concentrations at the stations (separate maps).
A future development will be the possibility to export the date and times of the footprints with over- or underestimates to be used in the other Notebooks. Also, exporting the date and times of the "better performing" footprints is another option.
index_station=0
dates_over=[]
date_range[index_in_diff]
pathFP='/opt/stiltdata/fsicos2/stiltweb/'
#list_stations rather than all_stations in a few places
for station in list_stations:
count_diff_neg=0
count_diff_pos=0
index_in_diff=0
count_all=0
for value in list_of_list_diff_measurements_stilt_not_abs[index_station]:
filename=(pathFP+'slots/'+stations[station]['locIdent']+'/'+str(date_range[index_in_diff].year)+'/'+str(date_range[index_in_diff].month).zfill(2)+'/'
+str(date_range[index_in_diff].year)+'x'+str(date_range[index_in_diff].month).zfill(2)+'x'+str(date_range[index_in_diff].day).zfill(2)+'x'+str(date_range[index_in_diff].hour).zfill(2)+'/foot')
#for title. Not all stations in all_stations necessarily "valid" (both modeled and measured results)
current_station_for_title=station[0:3]
current_station=station
#the loop over all footprints:
if math.isnan(value)== False:
if count_all==0:
f_fp = cdf.Dataset(filename)
fp_all_summed=f_fp.variables['foot'][:,:,:]
lon=f_fp.variables['lon'][:]
lat=f_fp.variables['lat'][:]
count_all=count_all +1
else:
#no need to define lat and lon over and over ^
f_fp = cdf.Dataset(filename)
fp=f_fp.variables['foot'][:,:,:]
fp_all_summed=fp_all_summed + fp
count_all=count_all+1
#possibly change
stdev= np.nanstd(list_of_list_diff_measurements_stilt_not_abs[index_station])
#when under - do not forget minus! make interactive.
if value<-stdev:
if os.path.isfile(filename):
if count_diff_neg==0:
f_fp = cdf.Dataset(filename)
fp_summed_under=f_fp.variables['foot'][:,:,:]
#lon=f_fp.variables['lon'][:]
#lat=f_fp.variables['lat'][:]
count_diff_neg=count_diff_neg +1
else:
f_fp = cdf.Dataset(filename)
fp=f_fp.variables['foot'][:,:,:]
fp_summed_under=fp_summed_under + fp
count_diff_neg=count_diff_neg +1
else:
print ('file does not exist: ',filename)
#overestimates
if value>stdev:
if os.path.isfile(filename):
if count_diff_pos==0:
f_fp = cdf.Dataset(filename)
fp_summed_over=f_fp.variables['foot'][:,:,:]
#lon=f_fp.variables['lon'][:]
#lat=f_fp.variables['lat'][:]
count_diff_pos=count_diff_pos +1
else:
f_fp = cdf.Dataset(filename)
fp=f_fp.variables['foot'][:,:,:]
fp_summed_over=fp_summed_over + fp
count_diff_pos=count_diff_pos +1
else:
print ('file does not exist: ',filename)
#future potential: subset all "good values"
#if value>-stdev and value<stdev:
#dates_within_one_std.append(date_range[index_in_diff])
#adding for every value in diff_measurement_stilt_best_not_abs (one for each footprint in defined date range)
index_in_diff=index_in_diff+1
average_under=fp_summed_under/count_diff_neg
average_over=fp_summed_over/count_diff_pos
average_all=fp_all_summed/count_all
#if it is the same it will be 0...
#what where average_under is bigger. + value means more of underestimation
#diff= average_under-average_all
diff_under=(average_under/average_all)*100
diff_over=(average_over/average_all)*100
date_index_number = (len(date_range) - 1)
#unit before: ppm / ($\mu$mol / m$^{2}$s)
for_title=(str(current_station_for_title) + ' association with underestimation' + '\n' + 'Average over ' + str("%.2f" % np.nanstd(list_of_list_diff_measurements_stilt_not_abs[index_station])) + '(' + str(count_diff_neg) + ')' + \
' - average all valid footprints (' + str(count_all) + ')' + '\n' + \
str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day) + ' to ' + str(date_range[date_index_number].year) +\
'-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' + 'Hour(s): all')
plot_maps_diff(diff_under, lon, lat, title=for_title, label='deviation from average', unit='[%]', linlog='linear', station=[current_station], zoom=current_station)
for_title_over=(str(current_station_for_title) + ' association with overestimation' + '\n' + 'Average over ' + str("%.2f" % np.nanstd(list_of_list_diff_measurements_stilt_not_abs[index_station])) + '(' + str(count_diff_pos) + ')' + \
' - average all valid footprints (' + str(count_all) + ')' + '\n' + \
str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day) + ' to ' + str(date_range[date_index_number].year) +\
'-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' + 'Hour(s): all')
plot_maps_diff(diff_over, lon, lat, title=for_title_over, label='deviation from average', unit='[%]', linlog='linear', station=[current_station], zoom=current_station)
index_station=index_station+1
#potential for future (make for all stations! --> save in seperate lists)
#pickle date range for use in other notebooks.
#f = open("custom_date_range.pkl","wb")
#pickle.dump(dates_over,f)
#f.close()