Import modules and functions

In [40]:
%run ~/STILT_modules_v2.5_upd.ipynb
%run ~/ICOS_atmObs_modules_v2.5_upd.ipynb
%run ~/for_thesis.ipynb
#%run ~/ida_master_def_upd.ipynb

import matplotlib.pyplot as plt
import matplotlib
from matplotlib.pyplot import figure

from datetime import datetime

import netCDF4 as cdf
import numpy as np
import numpy
import logging
logging.getLogger().setLevel(logging.CRITICAL)
import datetime as dt
import os, fnmatch
import fnmatch
import pandas as pd
import matplotlib.pyplot as p
from mpl_toolkits.basemap import Basemap
import warnings
warnings.filterwarnings('ignore')
import statsmodels.api as sm
import statsmodels.formula.api as smf
from netCDF4 import Dataset
from matplotlib.pyplot import figure

stations = create_STILT_dictionary()
Functions defined for handling STILT output:
add_old_STILTid
atc_query
atc_stationlist
available_STILT_dictionary
byte2string
checklib
createDatetimeObjList
create_STILT_dictionary
date_range_date_hour
display
figure
found_fit
func_YYYY_to_YY
func_date_format_str
func_list_datetimeObj
get_ICOS_filename
get_ICOS_list
get_station_class
icosDatadf
icosMetadatadf
is_number
lonlat_2_ixjy
plot_available_STILT
plot_icos_stilt_timeseries
plot_maps
plot_maps_basemap
plot_maps_diff
plot_maps_land_cover
plot_maps_population
plot_stilt_timeseries
plot_stilt_timeseries_components
read_ICOS_atm_downloaded
read_ICOS_zipfile
read_STILT_dictionary
read_aggreg_footprints
read_emissions
read_nc
read_stilt_raw_timeseries
read_stilt_timeseries
read_stilt_ts
read_stilt_ts_old
select_land_cover_classes
select_stations
select_stations_anthro_order
select_stations_measured
str2dataframe
unzip
what_stations_see
what_stations_see_dic2
what_stations_see_return_all
Functions defined for handling ICOS time series:
add_old_STILTid
atc_query
atc_stationlist
available_STILT_dictionary
byte2string
checklib
createDatetimeObjList
create_STILT_dictionary
date_range_date_hour
display
figure
found_fit
func_YYYY_to_YY
func_date_format_str
func_list_datetimeObj
get_ICOS_filename
get_ICOS_list
get_station_class
icosDatadf
icosMetadatadf
is_number
lonlat_2_ixjy
plot_available_STILT
plot_icos_stilt_timeseries
plot_maps
plot_maps_basemap
plot_maps_diff
plot_maps_land_cover
plot_maps_population
plot_stilt_timeseries
plot_stilt_timeseries_components
read_ICOS_atm_downloaded
read_ICOS_zipfile
read_STILT_dictionary
read_aggreg_footprints
read_emissions
read_nc
read_stilt_raw_timeseries
read_stilt_timeseries
read_stilt_ts
read_stilt_ts_old
select_land_cover_classes
select_stations
select_stations_anthro_order
select_stations_measured
str2dataframe
unzip
what_stations_see
what_stations_see_dic2
what_stations_see_return_all

List of available stations

Modelled compared to measured CO2 concentrations with data from Globalver:
GAT344, HEI, LIN099, LUT, PRS, TRN180
With data from the carbon portal:
NOR100, HTM150, SVB150, PUY, OPE120, KRE250, JFJ, HPB131, SMR125
Need to run the cells in order:

  • Accessing measured concentrations from Globalview

  • Time series showing the modeled and measured concentrations for Globalview

  • Accessing measured concentrations from the carbon portal and time series showing modelled and measured concentrations

  • Average deviation by hour and by month

  • Count of times the deviation is over and below user defined threshold, by hour and by month

  • Set the date range and the hour(s) for the analysis

    In [2]:
    import math
    year_start=input("Choose start date year (vary for station, currently only 2017): ")
    month_start=input("Choose start date month (write 1-12): ")
    day_start=input("Choose start date day (write number): ")
    
    start_date=dt.datetime(int(year_start),int(month_start),int(day_start),0)
    
    year_end=input("Choose end date year (vary for station, currenltly only 2017): ")
    month_end=input("Choose end date month (write 1-12): ")
    day_end=input("Choose end date day (write number): ")
    
    end_date=dt.datetime(int(year_end), int(month_end), int(day_end),0)-dt.timedelta(hours=3)
    
    
    date_range = pd.date_range(start_date, end_date, freq='3H')
    
    url="https://stilt.icos-cp.eu/viewer/stationinfo"
    
    Choose start date year (vary for station, currently only 2017): 2017
    Choose start date month (write 1-12): 1
    Choose start date day (write number): 1
    Choose end date year (vary for station, currenltly only 2017): 2018
    Choose end date month (write 1-12): 1
    Choose end date day (write number): 1
    

    Accessing measured concentrations from Globalview

    More information on the Globalview product: here
    The date/times for each measured value are saved in separate lists. These are in turn saved in lists of lists that are accessed one at a time using index to access the correct lists corresponding to each station. Only the stations HEI, TRN, PRS, LUT, LIN and GAT have measured concentrations for 2017 (not including the data for stations that can be accessed directly from the carbon portal, see next section)

    In [3]:
    #latest version of globalview:
    GVp3='obspack_co2_1_GLOBALVIEWplus_v4.2.2_2019-06-05'
    
    #sepcific for 2017 (names of datasets, and avaiability)
    start_date=dt.datetime(2017,1,1,0)
    end_date=dt.datetime(2018,1,1,0)
    
    def read_nc(filename):
        fid_nc=cdf.Dataset(filename)
        value=fid_nc.variables['value'][:]
        time=fid_nc.variables['time'][:]
        date=cdf.num2date(time,units=fid_nc.variables["time"].units)
        fid_nc.close()
        df = pd.DataFrame({'value':value},index=date)
        return df
    
    !ls /opt/stiltdata/ObsPack/obspack_co2_1_GLOBALVIEWplus_v4.2.2_2019-06-05/data/nc
    
    #the stations I found to have measured concentrations:
    allstations=['HEI', 'TRN', 'PRS', 'LUT','LIN','GAT']
    path_ObsPack = '/opt/stiltdata/ObsPack/'
    path_GVp3 = path_ObsPack
    allfiles_GVp3 = fnmatch.filter(os.listdir(path_GVp3+GVp3+'/data/nc/'), '*insitu*')
    
    #reterived data into list. Appending to these lists continue in the cell with measured data reterived from ICOS
    measured_2017_list_of_lists=[]
    dates_2017_list_of_lists=[]
     
    for file in allfiles_GVp3:
        for station in allstations:
            if station.lower() in file:
                
                #if it is lin or gat, there are one out of many files I want to look at
                if station=='LIN':
                    if file=='co2_lin_surface-insitu_425_allvalid.nc':
    
                        fig = p.figure(figsize=(15,15))
                        ax = fig.add_subplot(3,1,1)
    
                        version=GVp3
                        path=path_GVp3+version+'/data/nc/'
                        filename1=path+file
                        print(filename1)
                        df_GVp3 = read_nc(filename1)
                        df_GVp3.name = file
                        ax.plot(df_GVp3.index,df_GVp3["value"]*1000000,'.r',label='GVp3')
    
    
                        ax.set_xlim(start_date,end_date)
                        p.title(file)
                        ax.set_ylabel('CO$_2$  [ppm]')
                        p.legend()
                        ax.grid(axis='x')
    
                        p.show()
    
                        measured_2017_list=[]
                        dates_2017_list=[]
                        for index in range(0,len(df_GVp3)): 
                            if df_GVp3.index.year[index]==2017:
                                measured_2017_list.append(df_GVp3.value[index]*1000000)
                                
                                #need to save dates for each sperate station. Know which dates/times have measured concentration.
                                #date used to match the correct stilt data. 
                                dates_2017_list.append(df_GVp3.index[index])
    
                        measured_2017_list_of_lists.append(measured_2017_list)
                        dates_2017_list_of_lists.append(dates_2017_list)
                        
                #for GAT: same as for LIN     
                elif station=='GAT':
                    if file=='co2_gat_surface-insitu_442_allvalid-341magl.nc':
                        fig = p.figure(figsize=(15,15))
                        ax = fig.add_subplot(3,1,1)
    
                        version=GVp3
                        path=path_GVp3+version+'/data/nc/'
                        filename1=path+file
                        print(filename1)
                        df_GVp3 = read_nc(filename1)
                        df_GVp3.name = file
                        ax.plot(df_GVp3.index,df_GVp3["value"]*1000000,'.r',label='GVp3')
    
    
                        ax.set_xlim(start_date,end_date)
                        p.title(file)
                        ax.set_ylabel('CO$_2$  [ppm]')
                        p.legend()
                        ax.grid(axis='x')
    
                        p.show()
    
                        measured_2017_list=[]
                        dates_2017_list=[]
                        for index in range(0,len(df_GVp3)):
                            #print('happening within')
                            if df_GVp3.index.year[index]==2017:
    
                                measured_2017_list.append(df_GVp3.value[index]*1000000)
                                dates_2017_list.append(df_GVp3.index[index])
    
                        measured_2017_list_of_lists.append(measured_2017_list)
                        dates_2017_list_of_lists.append(dates_2017_list)
                
                #unless stations GAT or LIN
                else:
                    
                    fig = p.figure(figsize=(15,15))
                    ax = fig.add_subplot(3,1,1)
    
                    version=GVp3
                    path=path_GVp3+version+'/data/nc/'
                    filename1=path+file
                    print(filename1)
                    df_GVp3 = read_nc(filename1)
                    df_GVp3.name = file
                    ax.plot(df_GVp3.index,df_GVp3["value"]*1000000,'.r',label='GVp3')
    
    
                    ax.set_xlim(start_date,end_date)
                    p.title(file)
                    ax.set_ylabel('CO$_2$  [ppm]')
                    p.legend()
                    ax.grid(axis='x')
    
                    p.show()
    
                    measured_2017_list=[]
                    dates_2017_list=[]
                    for index in range(0,len(df_GVp3)): 
                        if df_GVp3.index.year[index]==2017:
                            measured_2017_list.append(df_GVp3.value[index]*1000000)
                            dates_2017_list.append(df_GVp3.index[index])
    
                    measured_2017_list_of_lists.append(measured_2017_list)
                    dates_2017_list_of_lists.append(dates_2017_list)
    
    co2_aac_surface-insitu_60_allhours.nc
    co2_aao_aircraft-pfp_1_allvalid.nc
    co2_above_aircraft-insitu_1_allvalid.nc
    co2_abp_surface-flask_1_representative.nc
    co2_abp_surface-flask_26_marine.nc
    co2_abt_surface-insitu_6_allvalid.nc
    co2_acg_aircraft-pfp_1_allvalid.nc
    co2_acr_surface-insitu_60_allhours.nc
    co2_act_aircraft-insitu_428_allvalid-b200.nc
    co2_act_aircraft-insitu_428_allvalid-c130.nc
    co2_act_aircraft-pfp_1_allvalid-b200.nc
    co2_act_aircraft-pfp_1_allvalid-c130.nc
    co2_acv_surface-insitu_60_allhours.nc
    co2_aia_aircraft-flask_2_representative.nc
    co2_alf_aircraft-pfp_26_representative.nc
    co2_alt_surface-flask_1_representative.nc
    co2_alt_surface-flask_2_representative.nc
    co2_alt_surface-flask_426_representative.nc
    co2_alt_surface-flask_4_representative.nc
    co2_alt_surface-insitu_6_allvalid.nc
    co2_ame_surface-insitu_60_allhours.nc
    co2_ams_surface-flask_1_representative.nc
    co2_ams_surface-insitu_11_representative.nc
    co2_amt_surface-pfp_1_allvalid-107magl.nc
    co2_amt_tower-insitu_1_allvalid-107magl.nc
    co2_amt_tower-insitu_1_allvalid-12magl.nc
    co2_amt_tower-insitu_1_allvalid-30magl.nc
    co2_amy_surface-flask_1_representative.nc
    co2_aoa_aircraft-flask_19_allvalid.nc
    co2_aoz_surface-insitu_60_allhours.nc
    co2_ara_surface-flask_2_representative.nc
    co2_arcpac2008_aircraft-insitu_114_allvalid.nc
    co2_arctas_aircraft-insitu_428_allvalid-dc8.nc
    co2_asc_surface-flask_1_representative.nc
    co2_ask_surface-flask_1_representative.nc
    co2_avi_surface-flask_1_representative.nc
    co2_azr_surface-flask_1_representative.nc
    co2_bal_surface-flask_1_representative.nc
    co2_bao_surface-pfp_1_allvalid-300magl.nc
    co2_bao_tower-insitu_1_allvalid-100magl.nc
    co2_bao_tower-insitu_1_allvalid-22magl.nc
    co2_bao_tower-insitu_1_allvalid-300magl.nc
    co2_bck_surface-insitu_6_allvalid.nc
    co2_bcs_surface-flask_426_representative.nc
    co2_bgi_aircraft-pfp_1_allvalid.nc
    co2_bgu_surface-flask_11_representative.nc
    co2_bhd_surface-flask_1_representative.nc
    co2_bhd_surface-flask_426_representative.nc
    co2_bhd_surface-insitu_15_baseline.nc
    co2_bir_surface-insitu_56_allvalid.nc
    co2_bkt_surface-flask_1_representative.nc
    co2_bme_surface-flask_1_representative.nc
    co2_bmw_surface-flask_1_representative.nc
    co2_bne_aircraft-pfp_1_allvalid.nc
    co2_bra_surface-insitu_6_allvalid.nc
    co2_brm_tower-insitu_49_allvalid-12magl.nc
    co2_brm_tower-insitu_49_allvalid-132magl.nc
    co2_brm_tower-insitu_49_allvalid-212magl.nc
    co2_brm_tower-insitu_49_allvalid-45magl.nc
    co2_brm_tower-insitu_49_allvalid-72magl.nc
    co2_brw_surface-flask_1_representative.nc
    co2_brw_surface-flask_426_representative.nc
    co2_brw_surface-insitu_1_allvalid.nc
    co2_bsc_surface-flask_1_representative.nc
    co2_bsd_tower-insitu_160_allvalid-108magl.nc
    co2_bsd_tower-insitu_160_allvalid-248magl.nc
    co2_bsd_tower-insitu_160_allvalid-42magl.nc
    co2_calnex2010_aircraft-insitu_114_allvalid.nc
    co2_car_aircraft-pfp_1_allvalid.nc
    co2_cba_surface-flask_1_representative.nc
    co2_cba_surface-flask_4_representative.nc
    co2_cby_surface-insitu_6_allvalid.nc
    co2_cdl_surface-insitu_6_allvalid.nc
    co2_ces_tower-insitu_52_allvalid-120magl.nc
    co2_ces_tower-insitu_52_allvalid-200magl.nc
    co2_ces_tower-insitu_52_allvalid-20magl.nc
    co2_ces_tower-insitu_52_allvalid-60magl.nc
    co2_cfa_surface-flask_2_representative.nc
    co2_cgo_surface-flask_1_representative.nc
    co2_cgo_surface-flask_2_representative.nc
    co2_cgo_surface-flask_4_representative.nc
    co2_chl_surface-insitu_6_allvalid.nc
    co2_chm_surface-insitu_6_allvalid.nc
    co2_chr_surface-flask_1_representative.nc
    co2_chr_surface-flask_426_representative.nc
    co2_cib_surface-flask_1_representative.nc
    co2_cit_surface-insitu_115_allhours-200magl.nc
    co2_cma_aircraft-pfp_1_allvalid.nc
    co2_cmo_surface-flask_1_representative.nc
    co2_cob2003b_aircraft-insitu_59_allvalid.nc
    co2_cob2004_aircraft-insitu_59_allvalid.nc
    co2_cob_aircraft-flask_1_allvalid.nc
    co2_cob_aircraft-pfp_1_allvalid.nc
    co2_con_aircraft-flask_20_allvalid.nc
    co2_con_aircraft-insitu_20_allvalid.nc
    co2_cop_tower-insitu_59_allhours.nc
    co2_cpa_surface-flask_2_representative.nc
    co2_cps_surface-insitu_6_allvalid.nc
    co2_cpt_surface-flask_1_representative.nc
    co2_cpt_surface-insitu_36_marine.nc
    co2_cri_surface-flask_2_representative.nc
    co2_crv_aircraft-pfp_1_allvalid.nc
    co2_crv_surface-pfp_1_allvalid-32magl.nc
    co2_crv_tower-insitu_1_allvalid-17magl.nc
    co2_crv_tower-insitu_1_allvalid-32magl.nc
    co2_crv_tower-insitu_1_allvalid-5magl.nc
    co2_crz_surface-flask_1_representative.nc
    co2_cya_surface-flask_2_representative.nc
    co2_dc3_aircraft-insitu_428_allvalid-dc8.nc
    co2_dc3_aircraft-insitu_428_allvalid-falcon.nc
    co2_dec_surface-insitu_431_allvalid.nc
    co2_discover-aq_aircraft-insitu_428_allvalid-c130-co.nc
    co2_discover-aq_aircraft-insitu_428_allvalid-p3b-ca.nc
    co2_discover-aq_aircraft-insitu_428_allvalid-p3b-co.nc
    co2_discover-aq_aircraft-insitu_428_allvalid-p3b-tx.nc
    co2_dnd_aircraft-pfp_1_allvalid.nc
    co2_drp_shipboard-flask_1_representative.nc
    co2_dsi_surface-flask_1_representative.nc
    co2_dvv_tower-insitu_60_allvalid.nc
    co2_eec_surface-insitu_431_allvalid.nc
    co2_egb_surface-insitu_6_allvalid.nc
    co2_eic_surface-flask_1_representative.nc
    co2_ell_surface-flask_431_representative.nc
    co2_ena_surface-insitu_64_allvalid-10magl.nc
    co2_esp_aircraft-pfp_1_allvalid.nc
    co2_esp_surface-flask_2_representative.nc
    co2_esp_surface-insitu_6_allvalid.nc
    co2_est_surface-insitu_6_allvalid.nc
    co2_etl_aircraft-pfp_1_allvalid.nc
    co2_etl_surface-insitu_6_allvalid.nc
    co2_fkl_surface-flask_11_representative.nc
    co2_fne_surface-insitu_6_allvalid.nc
    co2_fpk_surface-insitu_60_allhours.nc
    co2_fsd_surface-insitu_6_allvalid.nc
    co2_ftl_aircraft-pfp_1_allvalid.nc
    co2_fwi_aircraft-pfp_1_allvalid.nc
    co2_gat_surface-insitu_425_allvalid-341magl.nc
    co2_gat_surface-insitu_442_allvalid-132magl.nc
    co2_gat_surface-insitu_442_allvalid-216magl.nc
    co2_gat_surface-insitu_442_allvalid-30magl.nc
    co2_gat_surface-insitu_442_allvalid-341magl.nc
    co2_gat_surface-insitu_442_allvalid-60magl.nc
    co2_gci01_tower-insitu_60_allvalid.nc
    co2_gci02_tower-insitu_60_allvalid.nc
    co2_gci03_tower-insitu_60_allvalid.nc
    co2_gci04_tower-insitu_60_allvalid.nc
    co2_gci05_tower-insitu_60_allvalid.nc
    co2_gic_surface-insitu_431_allvalid.nc
    co2_gif_surface-insitu_11_allvalid.nc
    co2_gmi_surface-flask_1_representative.nc
    co2_goz_surface-flask_1_representative.nc
    co2_gpa_surface-flask_2_representative.nc
    co2_gsfc_aircraft-insitu_430_allvalid.nc
    co2_haa_aircraft-pfp_1_allvalid.nc
    co2_hba_surface-flask_1_representative.nc
    co2_hdp_surface-insitu_3_nonlocal.nc
    co2_hei_surface-insitu_22_allvalid.nc
    co2_hfm_aircraft-pfp_1_allvalid.nc
    co2_hfm_tower-insitu_59_allhours.nc
    co2_hil_aircraft-pfp_1_allvalid.nc
    co2_hip_aircraft-insitu_59_allvalid.nc
    co2_hnp_surface-insitu_6_allvalid.nc
    co2_hpb_surface-flask_1_representative.nc
    co2_hpb_surface-insitu_425_allvalid-131magl.nc
    co2_hpb_surface-insitu_442_allvalid-131magl.nc
    co2_hpb_surface-insitu_442_allvalid-50magl.nc
    co2_hpb_surface-insitu_442_allvalid-93magl.nc
    co2_hsu_surface-flask_1_representative.nc
    co2_htm_surface-insitu_442_allvalid-150magl.nc
    co2_htm_surface-insitu_442_allvalid-30magl.nc
    co2_htm_surface-insitu_442_allvalid-70magl.nc
    co2_htm_tower-insitu_424_allvalid-150magl.nc
    co2_htm_tower-insitu_424_allvalid-30magl.nc
    co2_htm_tower-insitu_424_allvalid-70magl.nc
    co2_hun_surface-flask_1_representative.nc
    co2_hun_tower-insitu_35_allvalid-10magl.nc
    co2_hun_tower-insitu_35_allvalid-115magl.nc
    co2_hun_tower-insitu_35_allvalid-48magl.nc
    co2_hun_tower-insitu_35_allvalid-82magl.nc
    co2_ice_surface-flask_1_representative.nc
    co2_intex-b_aircraft-insitu_428_allvalid-dc8.nc
    co2_intex-na_aircraft-insitu_428_allvalid-dc8.nc
    co2_inu_surface-insitu_6_allvalid.nc
    co2_inx01_surface-insitu_60_allhours.nc
    co2_inx02_surface-insitu_60_allhours.nc
    co2_inx03_surface-insitu_60_allhours.nc
    co2_inx04_surface-insitu_60_allhours.nc
    co2_inx05_surface-insitu_60_allhours.nc
    co2_inx06_surface-insitu_60_allhours.nc
    co2_inx07_surface-insitu_60_allhours.nc
    co2_inx08_surface-insitu_60_allhours.nc
    co2_inx09_surface-insitu_60_allhours.nc
    co2_inx10_surface-insitu_60_allhours.nc
    co2_inx11_surface-insitu_60_allhours.nc
    co2_inx12_surface-insitu_60_allhours.nc
    co2_inx13_surface-insitu_60_allhours.nc
    co2_inx14_surface-insitu_60_allhours.nc
    co2_inx_aircraft-pfp_1_allvalid.nc
    co2_inx_surface-pfp_1_allvalid-121magl.nc
    co2_inx_surface-pfp_1_allvalid-125magl.nc
    co2_inx_surface-pfp_1_allvalid-129magl.nc
    co2_inx_surface-pfp_1_allvalid-130magl.nc
    co2_inx_surface-pfp_1_allvalid-137magl.nc
    co2_inx_surface-pfp_1_allvalid-39magl.nc
    co2_inx_surface-pfp_1_allvalid-40magl.nc
    co2_inx_surface-pfp_1_allvalid-54magl.nc
    co2_inx_surface-pfp_1_allvalid.nc
    co2_izo_surface-flask_1_representative.nc
    co2_izo_surface-insitu_27_allvalid.nc
    co2_jfj_surface-insitu_442_allvalid-5magl.nc
    co2_jfj_surface-insitu_49_allvalid.nc
    co2_jfj_surface-insitu_5_allvalid.nc
    co2_kas_surface-insitu_53_allvalid.nc
    co2_kcmp_tower-insitu_102_allhours-200magl.nc
    co2_key_surface-flask_1_representative.nc
    co2_korus-aq_aircraft-insitu_428_allvalid-dc8.nc
    co2_kre_surface-insitu_442_allvalid-10magl.nc
    co2_kre_surface-insitu_442_allvalid-125magl.nc
    co2_kre_surface-insitu_442_allvalid-250magl.nc
    co2_kre_surface-insitu_442_allvalid-50magl.nc
    co2_kum_surface-flask_1_representative.nc
    co2_kum_surface-flask_426_representative.nc
    co2_kum_surface-flask_4_representative.nc
    co2_kzd_surface-flask_1_representative.nc
    co2_kzm_surface-flask_1_representative.nc
    co2_lef_aircraft-pfp_1_allvalid.nc
    co2_lef_surface-pfp_1_allvalid-244magl.nc
    co2_lef_surface-pfp_1_allvalid-396magl.nc
    co2_lef_tower-insitu_1_allvalid-11magl.nc
    co2_lef_tower-insitu_1_allvalid-122magl.nc
    co2_lef_tower-insitu_1_allvalid-244magl.nc
    co2_lef_tower-insitu_1_allvalid-30magl.nc
    co2_lef_tower-insitu_1_allvalid-396magl.nc
    co2_lef_tower-insitu_1_allvalid-76magl.nc
    co2_lew_surface-pfp_1_allvalid-95magl.nc
    co2_lin_surface-insitu_425_allvalid.nc
    co2_ljo_surface-flask_426_representative.nc
    co2_ljo_surface-flask_4_representative.nc
    co2_llb_surface-flask_1_representative.nc
    co2_llb_surface-insitu_6_allvalid.nc
    co2_lln_surface-flask_1_representative.nc
    co2_lmp_surface-flask_1_representative.nc
    co2_lmu_surface-insitu_431_allvalid.nc
    co2_lut_surface-insitu_44_allvalid.nc
    co2_maa_surface-flask_2_representative.nc
    co2_mbc_surface-flask_1_representative.nc
    co2_mbo_surface-pfp_1_allvalid-11magl.nc
    co2_mbo_tower-insitu_1_allvalid-11magl.nc
    co2_mci_aircraft-pfp_1_allvalid.nc
    co2_mex_surface-flask_1_representative.nc
    co2_mhd_surface-flask_1_representative.nc
    co2_mhd_surface-insitu_11_representative.nc
    co2_mid_surface-flask_1_representative.nc
    co2_mkn_surface-flask_1_representative.nc
    co2_mlo_surface-flask_1_representative.nc
    co2_mlo_surface-flask_2_representative.nc
    co2_mlo_surface-flask_426_representative.nc
    co2_mlo_surface-flask_4_representative.nc
    co2_mlo_surface-insitu_1_allvalid.nc
    co2_mnm_surface-insitu_19_representative.nc
    co2_mqa_surface-flask_2_representative.nc
    co2_mrc_aircraft-pfp_1_allvalid.nc
    co2_mrc_surface-pfp_1_allvalid-59magl.nc
    co2_mrc_surface-pfp_1_allvalid-61magl.nc
    co2_mrc_tower-insitu_60_allvalid.nc
    co2_msh_surface-pfp_1_allvalid-46magl.nc
    co2_mvy_surface-insitu_1_allvalid.nc
    co2_mwo_surface-pfp_1_allvalid-46magl.nc
    co2_nat_surface-flask_1_representative.nc
    co2_nat_surface-flask_26_marine.nc
    co2_nha_aircraft-pfp_1_allvalid.nc
    co2_nmb_surface-flask_1_representative.nc
    co2_nor_surface-insitu_442_allvalid-100magl.nc
    co2_nor_surface-insitu_442_allvalid-32magl.nc
    co2_nor_surface-insitu_442_allvalid-59magl.nc
    co2_nor_tower-insitu_424_allvalid-100magl.nc
    co2_nor_tower-insitu_424_allvalid-32magl.nc
    co2_nor_tower-insitu_424_allvalid-59magl.nc
    co2_nwr_surface-flask_1_representative.nc
    co2_nwr_surface-insitu_3_nonlocal.nc
    co2_nwr_surface-pfp_1_allvalid-3magl.nc
    co2_obn_surface-flask_1_representative.nc
    co2_ofr_surface-insitu_68_allhours.nc
    co2_oil_aircraft-pfp_1_allvalid.nc
    co2_oli_surface-insitu_64_allvalid-10magl.nc
    co2_omp_surface-insitu_68_allhours.nc
    co2_omt_surface-insitu_68_allhours.nc
    co2_ong_surface-insitu_68_allhours.nc
    co2_ope_surface-insitu_442_allvalid-10magl.nc
    co2_ope_surface-insitu_442_allvalid-120magl.nc
    co2_ope_surface-insitu_442_allvalid-50magl.nc
    co2_ope_tower-insitu_11_allvalid-120magl.nc
    co2_opw_surface-flask_1_representative.nc
    co2_orc_aircraft-insitu_3_allvalid-merge10.nc
    co2_osi_tower-insitu_68_allhours-269magl.nc
    co2_osi_tower-insitu_68_allhours-31magl.nc
    co2_ota_surface-flask_2_representative.nc
    co2_owa_surface-insitu_68_allhours.nc
    co2_oxk_surface-flask_1_representative.nc
    co2_oyq_surface-insitu_68_allhours.nc
    co2_pal_surface-flask_1_representative.nc
    co2_pal_surface-insitu_30_continental.nc
    co2_pal_surface-insitu_30_marine.nc
    co2_pal_surface-insitu_30_nonlocal.nc
    co2_pdm_surface-flask_11_representative.nc
    co2_pdm_surface-insitu_11_allvalid.nc
    co2_pfa_aircraft-pfp_1_allvalid.nc
    co2_poc_shipboard-flask_1_representative.nc
    co2_prs_surface-insitu_21_allvalid.nc
    co2_psa_surface-flask_1_representative.nc
    co2_psa_surface-flask_4_representative.nc
    co2_pta_surface-flask_1_representative.nc
    co2_puy_surface-insitu_11_allvalid.nc
    co2_puy_surface-insitu_442_allvalid-10magl.nc
    co2_pv_surface-insitu_115_allhours-200magl.nc
    co2_rba-b_aircraft-pfp_26_representative.nc
    co2_rba_surface-insitu_3_nonlocal.nc
    co2_rce_surface-insitu_60_allhours.nc
    co2_rgl_tower-insitu_160_allvalid-45magl.nc
    co2_rgl_tower-insitu_160_allvalid-90magl.nc
    co2_rgv_surface-insitu_60_allhours.nc
    co2_rk1_surface-flask_426_representative.nc
    co2_rkw_surface-insitu_60_allhours.nc
    co2_rmm_surface-insitu_60_allhours.nc
    co2_rpb_surface-flask_1_representative.nc
    co2_rrl_surface-insitu_60_allhours.nc
    co2_rta_aircraft-pfp_1_allvalid.nc
    co2_ryo_surface-insitu_19_representative.nc
    co2_sam_aircraft-pfp_1_allvalid.nc
    co2_san_aircraft-pfp_1_allvalid.nc
    co2_san_aircraft-pfp_26_representative.nc
    co2_sca_aircraft-pfp_1_allvalid.nc
    co2_scs_shipboard-flask_1_representative.nc
    co2_sct_surface-pfp_1_allvalid-305magl.nc
    co2_sct_tower-insitu_1_allvalid-305magl.nc
    co2_sct_tower-insitu_1_allvalid-31magl.nc
    co2_sct_tower-insitu_1_allvalid-61magl.nc
    co2_sdz_surface-flask_1_representative.nc
    co2_seac4rs_aircraft-insitu_428_allvalid-ER2.nc
    co2_seac4rs_aircraft-insitu_428_allvalid-dc8.nc
    co2_senex2013_aircraft-insitu_114_allvalid.nc
    co2_sey_surface-flask_1_representative.nc
    co2_sgi_surface-flask_1_representative.nc
    co2_sgp_aircraft-pfp_1_allvalid.nc
    co2_sgp_surface-flask_1_representative.nc
    co2_sgp_surface-insitu_64_allvalid-60magl.nc
    co2_shm_surface-flask_1_representative.nc
    co2_sis_surface-flask_2_representative.nc
    co2_smo_surface-flask_1_representative.nc
    co2_smo_surface-flask_426_representative.nc
    co2_smo_surface-flask_4_representative.nc
    co2_smo_surface-insitu_1_allvalid.nc
    co2_smr_surface-insitu_442_allvalid-125magl.nc
    co2_smr_surface-insitu_442_allvalid-17magl.nc
    co2_smr_surface-insitu_442_allvalid-67magl.nc
    co2_smr_tower-insitu_421_allvalid-125magl.nc
    co2_smr_tower-insitu_421_allvalid-17magl.nc
    co2_smr_tower-insitu_421_allvalid-67magl.nc
    co2_snp_tower-insitu_1_allvalid-10magl.nc
    co2_snp_tower-insitu_1_allvalid-17magl.nc
    co2_snp_tower-insitu_1_allvalid-5magl.nc
    co2_songnex2015_aircraft-insitu_114_allvalid.nc
    co2_spl_surface-insitu_3_nonlocal.nc
    co2_spo_surface-flask_1_representative.nc
    co2_spo_surface-flask_2_representative.nc
    co2_spo_surface-flask_426_representative.nc
    co2_spo_surface-flask_4_representative.nc
    co2_spo_surface-insitu_1_allvalid.nc
    co2_ssc_surface-insitu_431_allvalid.nc
    co2_ssl_surface-insitu_107_allvalid.nc
    co2_start08_aircraft-insitu_59_allvalid.nc
    co2_stc_surface-flask_1_representative.nc
    co2_stm_surface-flask_1_representative.nc
    co2_stp_surface-flask_426_representative.nc
    co2_str_surface-pfp_1_allvalid-232magl.nc
    co2_sum_surface-flask_1_representative.nc
    co2_svb_surface-insitu_442_allvalid-150magl.nc
    co2_svb_surface-insitu_442_allvalid-35magl.nc
    co2_svb_surface-insitu_442_allvalid-85magl.nc
    co2_syo_surface-flask_1_representative.nc
    co2_syo_surface-insitu_8_representative.nc
    co2_tab_aircraft-pfp_26_representative.nc
    co2_tac_surface-flask_1_representative.nc
    co2_tac_tower-insitu_160_allvalid-100magl.nc
    co2_tac_tower-insitu_160_allvalid-185magl.nc
    co2_tac_tower-insitu_160_allvalid-54magl.nc
    co2_tap_surface-flask_1_representative.nc
    co2_texaqs2006_aircraft-insitu_114_allvalid.nc
    co2_tgc_aircraft-pfp_1_allvalid.nc
    co2_thd_aircraft-pfp_1_allvalid.nc
    co2_thd_surface-flask_1_representative.nc
    co2_tik_surface-flask_1_representative.nc
    co2_tom_aircraft-insitu_1_allvalid.nc
    co2_tpd_surface-insitu_6_allvalid.nc
    co2_trace-p_aircraft-insitu_428_allvalid-dc8.nc
    co2_trace-p_aircraft-insitu_428_allvalid-p3b.nc
    co2_trn_tower-insitu_11_allvalid-180magl.nc
    co2_tta_tower-insitu_160_allvalid-222magl.nc
    co2_ulb_aircraft-pfp_1_allvalid.nc
    co2_ush_surface-flask_1_representative.nc
    co2_uta_surface-flask_1_representative.nc
    co2_utdbk_tower-insitu_432_allvalid.nc
    co2_utmsa_tower-insitu_432_allvalid.nc
    co2_utmur_tower-insitu_432_allvalid.nc
    co2_utrpk_tower-insitu_432_allvalid.nc
    co2_utsug_tower-insitu_432_allvalid.nc
    co2_utuou_tower-insitu_432_allvalid.nc
    co2_uum_surface-flask_1_representative.nc
    co2_vac_surface-insitu_431_allvalid.nc
    co2_wao_surface-insitu_13_allvalid.nc
    co2_wbi_aircraft-pfp_1_allvalid.nc
    co2_wbi_surface-pfp_1_allvalid-379magl.nc
    co2_wbi_tower-insitu_1_allvalid-31magl.nc
    co2_wbi_tower-insitu_1_allvalid-379magl.nc
    co2_wbi_tower-insitu_1_allvalid-99magl.nc
    co2_wgc_surface-pfp_1_allvalid-483magl.nc
    co2_wgc_surface-pfp_1_allvalid-91magl.nc
    co2_wgc_tower-insitu_1_allvalid-30magl.nc
    co2_wgc_tower-insitu_1_allvalid-483magl.nc
    co2_wgc_tower-insitu_1_allvalid-91magl.nc
    co2_wis_surface-flask_1_representative.nc
    co2_wkt_surface-pfp_1_allvalid-122magl.nc
    co2_wkt_surface-pfp_1_allvalid-457magl.nc
    co2_wkt_tower-insitu_1_allvalid-122magl.nc
    co2_wkt_tower-insitu_1_allvalid-244magl.nc
    co2_wkt_tower-insitu_1_allvalid-30magl.nc
    co2_wkt_tower-insitu_1_allvalid-457magl.nc
    co2_wkt_tower-insitu_1_allvalid-62magl.nc
    co2_wkt_tower-insitu_1_allvalid-9magl.nc
    co2_wlg_surface-flask_1_representative.nc
    co2_wpc_shipboard-flask_1_representative.nc
    co2_wsa_surface-insitu_6_allvalid.nc
    co2_wsd_tower-insitu_60_allvalid.nc
    co2_xic_surface-insitu_431_allvalid.nc
    co2_yon_surface-insitu_19_representative.nc
    co2_zep_surface-flask_1_representative.nc
    co2_zep_surface-insitu_442_allvalid-15magl.nc
    co2_zep_surface-insitu_56_allvalid.nc
    /opt/stiltdata/ObsPack/obspack_co2_1_GLOBALVIEWplus_v4.2.2_2019-06-05/data/nc/co2_gat_surface-insitu_442_allvalid-341magl.nc
    
    /opt/stiltdata/ObsPack/obspack_co2_1_GLOBALVIEWplus_v4.2.2_2019-06-05/data/nc/co2_hei_surface-insitu_22_allvalid.nc
    
    /opt/stiltdata/ObsPack/obspack_co2_1_GLOBALVIEWplus_v4.2.2_2019-06-05/data/nc/co2_lin_surface-insitu_425_allvalid.nc
    
    /opt/stiltdata/ObsPack/obspack_co2_1_GLOBALVIEWplus_v4.2.2_2019-06-05/data/nc/co2_lut_surface-insitu_44_allvalid.nc
    
    /opt/stiltdata/ObsPack/obspack_co2_1_GLOBALVIEWplus_v4.2.2_2019-06-05/data/nc/co2_prs_surface-insitu_21_allvalid.nc
    
    /opt/stiltdata/ObsPack/obspack_co2_1_GLOBALVIEWplus_v4.2.2_2019-06-05/data/nc/co2_trn_tower-insitu_11_allvalid-180magl.nc
    

    Time series showing the modeled and measured concentrations for Globalview

    In this cell, modeled concentrations are plotted together with measured concentrations. Below the time series is a time series with only the model data differences, the result from subtracting the measured concentration values from the modelled, are shown. The model data differences are saved in lists to be used in sections further down in the Notebook.

    In [41]:
    #the order in which the data was saved (see order in which the graphs were produced.)
    all_stations=['GAT344', 'HEI', 'LIN099', 'LUT', 'PRS', 'TRN180']
    station_iterator=iter(all_stations)
    
    #list of lists of model data differences 
    list_of_list_diff_measurements_stilt_not_abs=[]
    
    #the total number of NaN-values for each station. A NaN-value can be because of missing measured (most common),
    #or missing modelled concentrations
    list_count_nan=[]
    list_stations=[]
    
    #move to the next station data (measured data) using index on the lists of lists in which it is saved
    index=0
    
    for station in station_iterator:
        
        #access the STILT modeled concentrations
        df = read_stilt_timeseries(station, date_range)
       
        #want the list of measurements to match those of STILT
        list_measurements = np.empty(len(date_range))
        list_measurements[:] = np.nan
    
        #all the modeled concentrations into a list:
        list_stilt_values = np.empty(len(date_range))
        
        #unless a modelled value, need to be a NaN-value to ensure the list of modelled and measured concentrations are the same lenght.
        list_stilt_values[:] = np.nan
    
        #count for STILT-values. Added to for each value in df['co2.fuel']
        count = 0
        
        #do not want to start over looking for a match between modeled and measured concentrations.
        #this will be zero only the first time attempting to match the first stilt modeled concentration
        #then added to
        start_count = 0
    
        #for every modeled co2-value (could be any of the columns here the fuel column)
        for value in df['co2.fuel']:
    
            #access the year, month, day and hour for the current modeled concentration
            stilt_year= df['co2.fuel'].index[count].year
            stilt_month= df['co2.fuel'].index[count].month
            stilt_day= df['co2.fuel'].index[count].day
            stilt_hour= df['co2.fuel'].index[count].hour
    
            #add the modeled concentration to replace the "nan" that is the default.
            list_stilt_values[count] = df['co2.stilt'][count]
    
            #counting the number of measurements looped over. used as index to access next measurement in df_stationdata
            #have to access the correct measured concentration corresponding to the current modeled concentration.
            count_measurements = 0 
    
            #does not happen the first time (start_count=0)
            if start_count > 0:  
                count_measurements = start_count
    
                #do not want to start from the beginning every time when looking for the correct measured concentration. 
                for value in range(start_count, len(dates_2017_list_of_lists[index]), 1):
                    #print('happening')
    
                    #find a match between modeled and measured concentration.
                    #only every three hours (that's what it is for modelled concentrations)
                    if (dates_2017_list_of_lists[index][count_measurements].year == stilt_year and dates_2017_list_of_lists[index][count_measurements].month== stilt_month \
                        and dates_2017_list_of_lists[index][count_measurements].day == stilt_day and dates_2017_list_of_lists[index][count_measurements].hour == stilt_hour):
    
                        
                        #count --> added to for every new value of modelled concentartion
                        #index --> to access correct station
                        #count_measurements to access the correct measured data value (matching the measured with the modeled
                        #in the if-statement). Count_measurement is added to even if there is no match.
                        list_measurements[count]=  measured_2017_list_of_lists[index][count_measurements]
    
                        #used to access the correct value... once for each measured concentration (df_stationdata)
                        count_measurements = count_measurements +1
    
                        #set start_count to the index "we are on" so can start there next time looking for a "match" between modelled and measured concnetrations.
                        start_count=count_measurements
    
                        #moving on to looking for the next matchig measurement once found it
                        #--> there will only be one "matching" measured concentration for each modeled 
                        #concentration = mo need to keep going in loop.
                        break
                    else:
                        
                        #just moving on to next one if there is no match with modeled concentraion. 
                        count_measurements = count_measurements +1
    
    
            #this will run the first time there is a match between modeled and measured concentrations (hour/date the same)
            #same thing happening just start from the beginning when looking for match. 
            else:
                for value in range(0, len(dates_2017_list_of_lists[index]), 1):
    
                    if (dates_2017_list_of_lists[index][count_measurements].year == stilt_year and dates_2017_list_of_lists[index][count_measurements].month== stilt_month \
                        and dates_2017_list_of_lists[index][count_measurements].day == stilt_day and dates_2017_list_of_lists[index][count_measurements].hour == stilt_hour):
                        
                        list_measurements[count]=  measured_2017_list_of_lists[index][count_measurements]
                        
                        count_measurements = count_measurements +1
                        
                        start_count=count_measurements
                        
                        break
    
                    #this is counting measurements, adding to even if no match with modelled concentrations.
                    else:
                        count_measurements = count_measurements +1
    
            #this is counting stilt-values (outer loop)
            count=count+1
    
    
        #disregard nan-values when EITHER measured concentrations or modeled concentrations are missing for
        #a specific date/time. Count how many times this happens. 
        
        #index2 is used to check the corresponding modelled concentration to the "current" measured concentration -->
        #possible because the lenght of the two lists are the same.
        index2=0
        count_nan = 0  
        for value in list_measurements:
            if (math.isnan(value)== True or math.isnan(list_stilt_values[index2]==True)):
                count_nan=count_nan+1
            index2=index2+1
    
        #once "checked" on station, append NaN-value.
        list_count_nan.append(count_nan)
    
        
        #np.array to be able to subtract values from one list from the values in another list
        #positive value is modelled concentation is higher than measured (overestimate)
        diff_measurements_stilt_not_abs =  np.array(list_stilt_values) - np.array(list_measurements)
    
        #average of the diff, not including the footprints that have no measurements (or modeled values in terms of stilt)
        diff_measurements_stilt_best_average = np.nanmean(diff_measurements_stilt_not_abs)
    
        #standard deviation, not including nan-values
        #previously used the abs value - not anymore. want to caputre exactly how much it varies between over and underestimates
        diff_measurements_stilt_best_stdev= np.nanstd(diff_measurements_stilt_not_abs)
    
        #appends to the list:
        list_of_list_diff_measurements_stilt_not_abs.append(diff_measurements_stilt_not_abs)
        
        #plot:
        matplotlib.rcParams.update({'font.size': 17})
    
        fig = p.figure(figsize=(20,20))
    
        ax = fig.add_subplot(6,1,1)
    
        #plot modelled concentrations
        #df.index = date range
        p.plot(df.index,list_stilt_values,linestyle=':',color='b',label='Modeled concentrations')
    
        #can use df-index for measurements too --> have same date_range. 
        p.plot(df.index,list_measurements,linestyle='-',color='k',label='Measured concentrations')
    
        date_index_number = (len(date_range) - 1)
    
        p.title(station + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
               + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)\
               +  '\n'+ 'Number of requested footprints: ' + str(count) + '\n' + 'Number of NaN values: ' + str(count_nan)+  '\n' +\
                'Modeled concentration, average deviation from measured concentration: ' + str("%.2f" % diff_measurements_stilt_best_average) + ' ppm' + '\n' + \
                'Standard deviation of deviation: ' + str("%.2f" % diff_measurements_stilt_best_stdev) + ' ppm')
    
        ax.set_xlim(start_date,end_date)
        ax.set_ylabel('CO$_2$  [ppm]')
        ax.grid(axis='x')
        ax.legend(loc='upper right')
        ax = fig.add_subplot(6,1,2)
    
        #new plot --> below. Showing the difference from the measured concentrations.
        p.plot(df.index,diff_measurements_stilt_not_abs,linestyle=':',color='b',label='Deviation from measured values')
    
        zeros_for_measured_concentration = np.zeros(len(date_range))
        p.plot(df.index,zeros_for_measured_concentration,linestyle='-',color='k')
    
        ax.set_xlim(start_date,end_date)
    
        ax.set_ylabel('CO$_2$ [ppm]')
        ax.grid(axis='x')
        ax.legend(loc='upper right')
    
    
        #both plots in one --> subplots.
        p.show()
        
        index=index+1
    

    Accessing measured concentations from the carbon portal and time series showing modelled and measured concentrations

    OBS: need to runt the two previous cells before running this one!
    After this cell, all available staions have lists with deviations between modelled and measured concentartions that can be used in the Notebook cells that look at the deviations

    In [42]:
    all_stations_orig=['NOR100', 'HTM150', 'SVB150', 'PUY', 'OPE120', 'KRE250', 'JFJ', 'HPB131', 'SMR125']
    
    #UNCOMMENT IF ONLY WANT DATA FROM ICOS (not running the Globalview cells)
    #list_of_list_diff_measurements_stilt_not_abs=[]
    #list_count_nan=[]
    #list_stations=[]
    
    #RE-run this without appening to the list (changed from "not abs" in terms of standard deviation)
    
    icos_stations = [ist[0:3] for ist in all_stations_orig]
    
    #get_ICOS_filename --> see above. here change to download into folder, default is not
    ICOS_files = get_ICOS_filename(icos_stations,download=True)
    
    station_iterator=iter(all_stations_orig)
    
    for station in station_iterator:
        
        #access the STILT modeled concentrations
        df = read_stilt_timeseries(station, date_range)
        if len(station) > 3:
            searchName=station[0:3]+'_'+str(int(station[3:]))
        else:
            searchName=station
    
        #check if measured concentraiton for station?
        filenames = [i for i in ICOS_files.fileName if searchName in i]
        
        
        if not filenames:
            print ('No ICOS data available for station ',searchName)
        else:
            for filename in filenames:
    
                #need df_stationdata --> measured concentrations
                df_stationdata, df_stationmetadata = read_ICOS_atm_downloaded(filename)
       
                if df.empty: 
                    print ('No STILT data for the defined date_range for station: '), station
                    
                else: 
                    
                    #SAME PROCESS AS FOR GLOBALVIEW DATA
                    list_stations.append(station)
                    
                    list_measurements = np.empty(len(date_range))
                    list_measurements[:] = np.nan
    
                    list_stilt_values = np.empty(len(date_range))
                    list_stilt_values[:] = np.nan
                    
                    count = 0
    
                    start_count = 0
    
                    for value in df['co2.fuel']:
                        
                        stilt_year= df['co2.fuel'].index[count].year
                        stilt_month= df['co2.fuel'].index[count].month
                        stilt_day= df['co2.fuel'].index[count].day
                        stilt_hour= df['co2.fuel'].index[count].hour
    
                        list_stilt_values[count] = df['co2.stilt'][count]
    
                        count_measurements = 0 
                        
                        if start_count > 0:  
                            count_measurements = start_count
    
                            for value in range(start_count, len(df_stationdata), 1):
    
                                if (df_stationdata['Year'][count_measurements] == str(stilt_year) and df_stationdata['Month'][count_measurements]== str(stilt_month).zfill(2) \
                                    and df_stationdata['Day'][count_measurements] == str(stilt_day).zfill(2) and df_stationdata['Hour'][count_measurements] == str(stilt_hour).zfill(2)):
    
                                    list_measurements[count]= df_stationdata['CO2'][count_measurements]
    
                                    count_measurements = count_measurements +1
    
                                    start_count=count_measurements
    
                                    break
                                    
                                else:
        
                                    count_measurements = count_measurements +1
                  
                        else:
                            for value in range(0, len(df_stationdata), 1):
                                if (df_stationdata['Year'][count_measurements] == str(stilt_year) and df_stationdata['Month'][count_measurements]== str(stilt_month).zfill(2) \
                                    and df_stationdata['Day'][count_measurements] == str(stilt_day).zfill(2) and df_stationdata['Hour'][count_measurements] == str(stilt_hour).zfill(2)):
    
                                    list_measurements[count]= df_stationdata['CO2'][count_measurements]
                                    
                                    count_measurements = count_measurements +1
                                    start_count=count_measurements
                                    break
                                else:
                                    count_measurements = count_measurements +1
    
                        count=count+1
                    
                    index=0
                    count_nan = 0  
                    for value in list_measurements:
                        if (math.isnan(value)== True or math.isnan(list_stilt_values[index]==True)):
                            count_nan=count_nan+1
                            
                        index=index+1
                    list_count_nan.append(count_nan)
    
                    diff_measurements_stilt_not_abs =  np.array(list_stilt_values) - np.array(list_measurements)
                 
                    diff_measurements_stilt_best_average = np.nanmean(diff_measurements_stilt_not_abs)
    
                    #updated - previously stdev of abs values
                    diff_measurements_stilt_best_stdev= np.nanstd(diff_measurements_stilt_not_abs)
                    
                    list_of_list_diff_measurements_stilt_not_abs.append(diff_measurements_stilt_not_abs)
    
                    matplotlib.rcParams.update({'font.size': 17})
                    
                    fig = p.figure(figsize=(20,20))
    
                    ax = fig.add_subplot(6,1,1)
    
                    p.plot(df.index,df['co2.stilt'],linestyle=':',color='b',label='Modeled concentrations')
    
                    p.plot(df.index,list_measurements,linestyle='-',color='k',label='Measured concentrations')
    
                    date_index_number = (len(date_range) - 1)
    
                    p.title(station + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                           + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)\
                           +  '\n'+ 'Number of requested footprints: ' + str(count) + '\n' + 'Number of NaN values: ' + str(count_nan)+  '\n' +\
                            'Modeled concentration, average deviation from measured concentration: ' + str("%.2f" % diff_measurements_stilt_best_average) + ' ppm' + '\n' + \
                            'Standard deviation of deviation: ' + str("%.2f" % diff_measurements_stilt_best_stdev) + ' ppm')
                    
                    ax.set_xlim(start_date,end_date)
                    ax.set_ylabel('CO$_2$  [ppm]')
                    ax.grid(axis='x')
                    ax.legend(loc='upper right')
                    ax = fig.add_subplot(6,1,2)
    
                    p.plot(df.index,diff_measurements_stilt_not_abs,linestyle=':',color='b',label='Deviation from measured values')
    
                    zeros_for_measured_concentration = np.zeros(len(date_range))
                    p.plot(df.index,zeros_for_measured_concentration,linestyle='-',color='k')
                    
                    ax.set_xlim(start_date,end_date)
    
                    ax.set_ylabel('CO$_2$ [ppm]')
                    ax.grid(axis='x')
                    ax.legend(loc='upper right')
    
                    p.show()
                    
                    
    
    download all files:  0      ICOS_ATC_L2_L2-2019.1_HPB_93.0_CTS_CO2.zip
    1       ICOS_ATC_L2_L2-2019.1_JFJ_5.0_CTS_CO2.zip
    2      ICOS_ATC_L2_L2-2019.1_KRE_50.0_CTS_CO2.zip
    3      ICOS_ATC_L2_L2-2019.1_NOR_32.0_CTS_CO2.zip
    4     ICOS_ATC_L2_L2-2019.1_NOR_100.0_CTS_CO2.zip
    5     ICOS_ATC_L2_L2-2019.1_KRE_125.0_CTS_CO2.zip
    6     ICOS_ATC_L2_L2-2019.1_OPE_120.0_CTS_CO2.zip
    7      ICOS_ATC_L2_L2-2019.1_NOR_59.0_CTS_CO2.zip
    8      ICOS_ATC_L2_L2-2019.1_OPE_50.0_CTS_CO2.zip
    9      ICOS_ATC_L2_L2-2019.1_HTM_70.0_CTS_CO2.zip
    10     ICOS_ATC_L2_L2-2019.1_HTM_30.0_CTS_CO2.zip
    11    ICOS_ATC_L2_L2-2019.1_HPB_131.0_CTS_CO2.zip
    12    ICOS_ATC_L2_L2-2019.1_KRE_250.0_CTS_CO2.zip
    13    ICOS_ATC_L2_L2-2019.1_HTM_150.0_CTS_CO2.zip
    14     ICOS_ATC_L2_L2-2019.1_KRE_10.0_CTS_CO2.zip
    15     ICOS_ATC_L2_L2-2019.1_OPE_10.0_CTS_CO2.zip
    16     ICOS_ATC_L2_L2-2019.1_HPB_50.0_CTS_CO2.zip
    17     ICOS_ATC_L2_L2-2019.1_PUY_10.0_CTS_CO2.zip
    18    ICOS_ATC_L2_L2-2019.1_SMR_125.0_CTS_CO2.zip
    19     ICOS_ATC_L2_L2-2019.1_SMR_16.8_CTS_CO2.zip
    20     ICOS_ATC_L2_L2-2019.1_SMR_67.2_CTS_CO2.zip
    21    ICOS_ATC_L2_L2-2019.1_SVB_150.0_CTS_CO2.zip
    22     ICOS_ATC_L2_L2-2019.1_SVB_35.0_CTS_CO2.zip
    23     ICOS_ATC_L2_L2-2019.1_SVB_85.0_CTS_CO2.zip
    Name: fileName, dtype: object
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_HPB_93.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_JFJ_5.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_KRE_50.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_NOR_32.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_NOR_100.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_KRE_125.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_OPE_120.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_NOR_59.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_OPE_50.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_HTM_70.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_HTM_30.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_HPB_131.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_KRE_250.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_HTM_150.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_KRE_10.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_OPE_10.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_HPB_50.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_PUY_10.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_SMR_125.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_SMR_16.8_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_SMR_67.2_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_SVB_150.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_SVB_35.0_CTS_CO2.zip
    file already exists, skip...ICOS_ATC_L2_L2-2019.1_SVB_85.0_CTS_CO2.zip
    all done
    Zipfile:  ICOS_ATC_L2_L2-2019.1_NOR_100.0_CTS_CO2.zip
    Filename:  ICOS_ATC_L2_L2-2019.1_NOR_100.0_CTS.CO2
    
    Zipfile:  ICOS_ATC_L2_L2-2019.1_HTM_150.0_CTS_CO2.zip
    Filename:  ICOS_ATC_L2_L2-2019.1_HTM_150.0_CTS.CO2
    
    Zipfile:  ICOS_ATC_L2_L2-2019.1_SVB_150.0_CTS_CO2.zip
    Filename:  ICOS_ATC_L2_L2-2019.1_SVB_150.0_CTS.CO2
    
    Zipfile:  ICOS_ATC_L2_L2-2019.1_PUY_10.0_CTS_CO2.zip
    Filename:  ICOS_ATC_L2_L2-2019.1_PUY_10.0_CTS.CO2
    
    Zipfile:  ICOS_ATC_L2_L2-2019.1_OPE_120.0_CTS_CO2.zip
    Filename:  ICOS_ATC_L2_L2-2019.1_OPE_120.0_CTS.CO2
    
    Zipfile:  ICOS_ATC_L2_L2-2019.1_KRE_250.0_CTS_CO2.zip
    Filename:  ICOS_ATC_L2_L2-2019.1_KRE_250.0_CTS.CO2
    
    Zipfile:  ICOS_ATC_L2_L2-2019.1_JFJ_5.0_CTS_CO2.zip
    Filename:  ICOS_ATC_L2_L2-2019.1_JFJ_5.0_CTS.CO2
    
    Zipfile:  ICOS_ATC_L2_L2-2019.1_HPB_131.0_CTS_CO2.zip
    Filename:  ICOS_ATC_L2_L2-2019.1_HPB_131.0_CTS.CO2
    
    Zipfile:  ICOS_ATC_L2_L2-2019.1_SMR_125.0_CTS_CO2.zip
    Filename:  ICOS_ATC_L2_L2-2019.1_SMR_125.0_CTS.CO2
    

    Average deviation by hour and by month

    Using the data extracted in the first cell, which must be ran first. The deviation data is sorted based on which month and which hour the deviation data was extracted from and displayed in seperate bar graphs. This is done seperatley for each station.
    At the end, the averages by month and by hours for all stations are displayed in seperate graphs.

    In [9]:
    #use the data in the last section. 
    #show the average deviation by month or by hour:
    index_station=0
    
    #un-comment if only ICOS measured data:
    #list_stations=['NOR100', 'HTM150', 'SVB150', 'PUY', 'OPE120', 'KRE250', 'JFJ', 'HPB131', 'SMR125']
    
    #un-comment if only Globalview data:
    #list_stations=['GAT344', 'HEI', 'LIN099', 'LUT', 'PRS', 'TRN180']
    
    #using data from both Globalview and ICOS measured data:
    list_stations=['GAT344', 'HEI', 'LIN099', 'LUT', 'PRS', 'TRN180', 'NOR100', 'HTM150', 'SVB150', 'PUY', 'OPE120', 'KRE250', 'JFJ', 'HPB131', 'SMR125']
    
    #for the final aggregated comparison between the stations in the station list (lists of lists appended to for each station)
    zero_list=[] 
    three_list=[] 
    six_list=[]
    nine_list=[] 
    twelve_list=[] 
    fifteen_list=[] 
    eighteen_list=[]
    twentyone_list=[] 
    
    jan_list=[]
    feb_list=[] 
    mar_list=[]
    apr_list=[] 
    may_list=[] 
    jun_list=[] 
    jul_list=[] 
    aug_list=[] 
    sep_list=[] 
    oct_list=[] 
    nov_list=[] 
    dec_list=[]
      
    for station in list_stations:
    
        #move on in the list of measured/modeled concentrations:
        index=0
        
        #go over the deviation values and their associated months and hours, put in correct list.
        list_jan = []
        list_feb = []
        list_mar = []
        list_apr = []
        list_may = []
        list_jun = []
        list_jul = []
        list_aug = []
        list_sep = []
        list_oct = []
        list_nov = []
        list_dec = []
        
        list_0 = []
        list_3 = []
        list_6 = []
        list_9 = []
        list_12 = []
        list_15 = []
        list_18 = []
        list_21 = []
        
        #when displaying the data, important to show how many nan-values are in there.
        count_jan_nan = 0
        count_feb_nan = 0
        count_mar_nan = 0
        count_apr_nan = 0
        count_may_nan = 0
        count_jun_nan = 0
        count_jul_nan = 0
        count_aug_nan = 0
        count_sep_nan = 0
        count_oct_nan = 0
        count_nov_nan = 0
        count_dec_nan = 0
    
        count_0_nan = 0
        count_3_nan = 0
        count_6_nan = 0
        count_9_nan = 0
        count_12_nan = 0
        count_15_nan = 0
        count_18_nan = 0
        count_21_nan = 0
        
        #....and compare it to how many possible values there are for the month/hours = need to keep a count: 
        count_jan = 0
        count_feb = 0
        count_mar = 0
        count_apr = 0
        count_may = 0
        count_jun = 0
        count_jul = 0
        count_aug = 0
        count_sep = 0
        count_oct = 0
        count_nov = 0
        count_dec = 0
    
        count_0 = 0
        count_3 = 0
        count_6 = 0
        count_9 = 0
        count_12 = 0
        count_15 = 0
        count_18 = 0
        count_21 = 0
    
        #count_measurements is added to for each no-nan value
        count_measurements = 0
        
        for value in list_of_list_diff_measurements_stilt_not_abs[index_station]:
            month=date_range[index].month
            hour=date_range[index].hour
    
            if math.isnan(value) == False:
                #only place for count_measurements
                count_measurements = count_measurements + 1
                
            if month == 1:
                list_jan.append(value)
                if math.isnan(value) == True:
                    count_jan_nan=count_jan_nan + 1
                else:
                    count_jan=count_jan+1
    
            if month == 2:
                list_feb.append(value)
                if math.isnan(value) == True:
                    count_feb_nan=count_feb_nan + 1
                else:
                    count_feb=count_feb+1
    
            if month == 3:
                list_mar.append(value)
                if math.isnan(value) == True:
                    count_mar_nan=count_mar_nan + 1
                else:
                    count_mar=count_mar+1
    
            if month == 4:
                list_apr.append(value)
                if math.isnan(value) == True:
                    count_apr_nan=count_apr_nan + 1
                else:
                    count_apr=count_apr+1
            if month == 5:
                list_may.append(value)
    
                if math.isnan(value) == True:
                    count_may_nan=count_may_nan + 1
                else:
                    count_may=count_may+1
            if month == 6:
                list_jun.append(value)
                if math.isnan(value) == True:
                    count_jun_nan=count_jun_nan + 1
                else:
                    count_jun=count_jun+1
            if month == 7:
                list_jul.append(value)
    
                if math.isnan(value) == True:
                    count_jul_nan=count_jul_nan + 1
                else:
                    count_jul=count_jul+1
                    
            if month == 8:
                list_aug.append(value)
                if math.isnan(value) == True:
                    count_aug_nan=count_aug_nan + 1
                else:
                    count_aug=count_aug+1
            if month == 9:
                list_sep.append(value)
                if math.isnan(value) == True:
                    count_sep_nan=count_sep_nan + 1
                else:
                    
                    count_sep=count_sep+1
            if month == 10:
                list_oct.append(value)
                if math.isnan(value) == True:
                    count_oct_nan=count_oct_nan + 1
                else:
                    count_oct=count_oct+1
                    
            if month == 11:
                list_nov.append(value)
                if math.isnan(value) == True:
                    count_nov_nan=count_nov_nan + 1
                else:
                    count_nov=count_nov+1
            if month == 12:
                list_dec.append(value)
                if math.isnan(value) == True:
                    count_dec_nan=count_dec_nan + 1
                else:
                    count_dec=count_dec+1
            if hour == 0:
                list_0.append(value)
                if math.isnan(value) == True:
                    count_0_nan=count_0_nan + 1
                else:
                    count_0=count_0+1
            if hour == 3:
                list_3.append(value)
                if math.isnan(value) == True:
                    count_3_nan=count_3_nan + 1
                else:
                    count_3=count_3+1
            if hour == 6:
                list_6.append(value)
                if math.isnan(value) == True:
                    count_6_nan=count_6_nan + 1
                else:
                    count_6=count_6+1
            if hour == 9:
                list_9.append(value)
                if math.isnan(value) == True:
                    count_9_nan=count_9_nan + 1
                else:
                    count_9=count_9+1
            if hour == 12:
                list_12.append(value)
    
                if math.isnan(value) == True:
                    count_12_nan=count_12_nan + 1
                else:
                    count_12=count_12+1
            if hour == 15:
                list_15.append(value)
                if math.isnan(value) == True:
                    count_15_nan=count_15_nan + 1
                else:
                    count_15=count_15+1
            if hour == 18:
                list_18.append(value)
                if math.isnan(value) == True:
                    count_18_nan=count_18_nan + 1
                else:
                    count_18=count_18+1
            if hour == 21:
                list_21.append(value)
                if math.isnan(value) == True:
                    count_21_nan=count_21_nan + 1
                else:
                    count_21=count_21+1
            index = index + 1
    
        #these are going into seperate graphs
        averages_months= np.array((np.nanmean(list_jan), np.nanmean(list_feb), np.nanmean(list_mar), np.nanmean(list_apr), \
                                  np.nanmean(list_may), np.nanmean(list_jun), np.nanmean(list_jun), np.nanmean(list_aug), np.nanmean(list_sep),\
                                  np.nanmean(list_oct), np.nanmean(list_nov), np.nanmean(list_dec)))
        
        #for aggregated graph at the end:
        jan_list.append(averages_months[0])
        feb_list.append(averages_months[1]) 
        mar_list.append(averages_months[2])
        apr_list.append(averages_months[3])
        may_list.append(averages_months[4])
        jun_list.append(averages_months[5])
        jul_list.append(averages_months[6])
        aug_list.append(averages_months[7])
        sep_list.append(averages_months[8])
        oct_list.append(averages_months[9])
        nov_list.append(averages_months[10])
        dec_list.append(averages_months[11])
    
        averages_hours= np.array((np.nanmean(list_0), np.nanmean(list_3), np.nanmean(list_6), np.nanmean(list_9), \
                                  np.nanmean(list_12), np.nanmean(list_15), np.nanmean(list_18), np.nanmean(list_21)))
        
        #for aggregated graph at the end:
        zero_list.append(averages_hours[0])
        three_list.append(averages_hours[1])
        six_list.append(averages_hours[2])
        nine_list.append(averages_hours[3])
        twelve_list.append(averages_hours[4])
        fifteen_list.append(averages_hours[5])
        eighteen_list.append(averages_hours[6])
        twentyone_list.append(averages_hours[7])
    
        count_nan_months = np.array((count_jan_nan, count_feb_nan, count_mar_nan, count_apr_nan, count_may_nan, count_jun_nan,\
                                    count_jul_nan, count_aug_nan, count_sep_nan, count_oct_nan, count_nov_nan, count_dec_nan))
    
        count_nan_hours = np.array((count_0_nan, count_3_nan, count_6_nan, count_9_nan, count_12_nan, count_15_nan,\
                                    count_18_nan, count_21_nan))
    
    
        std_months= np.array((np.nanstd(list_jan), np.nanstd(list_feb), np.nanstd(list_mar), np.nanstd(list_apr), \
                                  np.nanstd(list_may), np.nanstd(list_jun), np.nanstd(list_jul), np.nanstd(list_aug), np.nanstd(list_sep),\
                                  np.nanstd(list_oct), np.nanstd(list_nov), np.nanstd(list_dec)))
    
        std_hours= np.array((np.nanstd(list_0), np.nanstd(list_3), np.nanstd(list_6), np.nanstd(list_9), \
                                  np.nanstd(list_12), np.nanstd(list_15), np.nanstd(list_18), np.nanstd(list_21)))
        
        #is there any data to display in graphs?
        check_for_data = 0
        for value in averages_months:
            if math.isnan(value) == False:
                check_for_data = check_for_data + 1
        
    
        #if there is data in the lists, can move on.
        #since running last cell, not really necessary
        if check_for_data != 0:
            
            N = len(averages_months)
    
            ind = np.arange(N)    
            width = 0.35    
    
            #added figure stuff for size change
            
    
            figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
    
            ax = plt.subplot(111)
            ax.yaxis.grid(True)
    
            plt.bar(ind, averages_months, width, color='blue')
            matplotlib.rcParams.update({'font.size': 17})
            plt.ylabel('Average deviation from measured concentrations')
    
            plt.xlabel('Month (count NaN)')
            #added figsize
    
    
            #for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
            plt.title(station +' performance modeled concentration by month' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                     + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
                     'Number of comparisons between modeled and measured concentrations: ' + str(count_measurements) + '\n')
            plt.xticks(ind, ('Jan (' + str(count_nan_months[0]) + '/' + str(len(list_jan)) + ')' , 'Feb (' + str(count_nan_months[1]) + '/' + str(len(list_feb)) + ')', 'Mar ('+ str(count_nan_months[2]) + '/'  + str(len(list_mar)) + ')',\
                             'Apr (' + str(count_nan_months[3]) + '/' + str(len(list_apr)) + ')', 'May ('+ str(count_nan_months[4]) + '/' + str(len(list_may)) + ')', 'Jun (' + str(count_nan_months[5]) + '/' + str(len(list_jun)) +')', \
                             'Jul (' + str(count_nan_months[6]) + '/' + str(len(list_jul)) + ')', 'Aug (' + str(count_nan_months[7]) + '/' + str(len(list_aug)) +')', 'Sep (' + str(count_nan_months[8]) + '/' + str(len(list_sep)) +')',\
                             'Oct (' + str(count_nan_months[9]) + '/' + str(len(list_oct)) +')', 'Nov (' + str(count_nan_months[10]) + '/' + str(len(list_nov)) +')', 'Dec (' + str(count_nan_months[11]) + '/' + str(len(list_dec)) +')'),  rotation='vertical')
    
            plt.axhline(0, color='black', lw=2)
    
            plt.show()
    
            std_months= std_months.tolist()
    
            std_months_dic=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
            index=0
            for value in std_months:
                print ('Standard deviation for', std_months_dic[index], 'month:', ("%.2f" %std_months[index]))
    
                index=index+1
    
    
            N = len(averages_hours)
    
            ind = np.arange(N)    
            width = 0.35 
    
    
            figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
    
            ax = plt.subplot(111)
            ax.yaxis.grid(True)
    
            plt.bar(ind, averages_hours, width, color='g')
    
    
            plt.ylabel('Average deviation from measured concentration')
    
            plt.xlabel('Hour of the day (count NaN)')
    
            #for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
            plt.title(station + ' performance modeled concentration by hour' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                     + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
                     'Number of comparisons between modeled and measured concentrations: ' + str(count_measurements)+ '\n')
            plt.xticks(ind, ('0 (' + str(count_nan_hours[0]) + '/' + str(len(list_0))+ ')' , '3 (' + str(count_nan_hours[1])  + '/' + str(len(list_3))+ ')' , '6 (' + str(count_nan_hours[2])  + '/' + str(len(list_9))+ ')',\
                             '9 (' + str(count_nan_hours[3])  + '/' + str(len(list_9))+ ')', '12 (' + str(count_nan_hours[4])  + '/' + str(len(list_12))+ ')', '15 (' + str(count_nan_hours[5])  + '/' + str(len(list_15))+ ')',\
                             '18 (' + str(count_nan_hours[6])  + '/' + str(len(list_18))+ ')', '21 (' + str(count_nan_hours[7])  + '/' + str(len(list_21))+')'), rotation='vertical')
    
            plt.axhline(0, color='black', lw=2)
    
    
            plt.show()
    
            std_hours= std_hours.tolist()
    
            std_hours_dic=["0", "3", "6", "9", "12", "15", "18", "21"]
            
            index=0
            
            for value in std_hours:
                print ('Standard deviation at', std_hours_dic[index], 'hour:', ("%.2f" %std_hours[index]))
    
                index=index+1
    
        else:
            print('no measuremets for this station')
        index_station=index_station+1
        
    #add comparison? All in one bar graph at the bottom? maybe only in this cell, not next.
    
    y_pos = np.arange(len(list_stations))
    figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.subplot(111)
    p1 = ax.bar(y_pos-0.4, zero_list ,width=0.03,color='r',align='center')
    p2 = ax.bar(y_pos-0.3, three_list,width=0.03,color='g',align='center')
    p3 = ax.bar(y_pos-0.2, six_list,width=0.03,color='g',align='center')
    p4 = ax.bar(y_pos-0.1, nine_list,width=0.03,color='g',align='center')
    p5 = ax.bar(y_pos+0, twelve_list ,width=0.03,color='g',align='center')
    p6 = ax.bar(y_pos +0.1, fifteen_list ,width=0.03,color='g',align='center')
    p7 = ax.bar(y_pos+0.2, eighteen_list,width=0.03,color='g',align='center')
    p8 = ax.bar(y_pos+0.3, twentyone_list,width=0.03,color='y',align='center')
    
    ax.yaxis.grid(True)
    ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0]), ('0', '3', '6', '9','12', '15', '18', '21') )
    
    plt.xticks(y_pos, list_stations, rotation='vertical')
    plt.axhline(0, color='black', lw=2)
    
    #get the index of the last date (start at 0) to access it in the titles
    date_index_number = (len(date_range) - 1)
    plt.title('Average disagreement by hour' + '\n'  +  str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                         + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
             'Hour(s): all')
    plt.ylabel('ppm')
    plt.show()
    
    y_pos = np.arange(len(list_stations))
    figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.subplot(111)
    p1 = ax.bar(y_pos-0.3, jan_list ,width=0.03,color='r',align='center')
    p2 = ax.bar(y_pos-0.25, feb_list,width=0.03,color='b',align='center')
    p3 = ax.bar(y_pos-0.20, mar_list,width=0.03,color='b',align='center')
    p4 = ax.bar(y_pos-0.15, apr_list,width=0.03,color='b',align='center')
    p5 = ax.bar(y_pos-0.10, may_list ,width=0.03,color='b',align='center')
    p6 = ax.bar(y_pos-0.05, jun_list ,width=0.03,color='b',align='center')
    p7 = ax.bar(y_pos+0.0, jul_list,width=0.03,color='b',align='center')
    p8 = ax.bar(y_pos+0.05, aug_list,width=0.03,color='b',align='center')
    p9 = ax.bar(y_pos+0.10, sep_list ,width=0.03,color='b',align='center')
    p10 = ax.bar(y_pos +0.15, oct_list ,width=0.03,color='b',align='center')
    p11 = ax.bar(y_pos+0.20, nov_list,width=0.03,color='b',align='center')
    p12 = ax.bar(y_pos+0.25, dec_list,width=0.03,color='y',align='center')
    ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0], p9[0], p10[0], p11[0], p12[0]), ('Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'),  ncol=2)
    
    ax.yaxis.grid(True)
    plt.xticks(y_pos, list_stations, rotation='vertical')
    plt.axhline(0, color='black', lw=2)
    
    #get the index of the last date (start at 0) to access it in the titles
    date_index_number = (len(date_range) - 1)
    plt.title('Average disagreement by month' + '\n'  +  str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                         + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
             'Hour(s): all')
    plt.ylabel('ppm')
    plt.show()
    
    Standard deviation for Jan month: nan
    Standard deviation for Feb month: nan
    Standard deviation for Mar month: nan
    Standard deviation for Apr month: 1.95
    Standard deviation for May month: 4.97
    Standard deviation for Jun month: 4.13
    Standard deviation for Jul month: 4.31
    Standard deviation for Aug month: 3.93
    Standard deviation for Sep month: 4.13
    Standard deviation for Oct month: 3.73
    Standard deviation for Nov month: 4.31
    Standard deviation for Dec month: 3.54
    
    Standard deviation at 0 hour: 3.88
    Standard deviation at 3 hour: 4.03
    Standard deviation at 6 hour: 4.47
    Standard deviation at 9 hour: 4.46
    Standard deviation at 12 hour: 3.94
    Standard deviation at 15 hour: 4.27
    Standard deviation at 18 hour: 3.49
    Standard deviation at 21 hour: 3.65
    
    Standard deviation for Jan month: 32.73
    Standard deviation for Feb month: 49.05
    Standard deviation for Mar month: 35.34
    Standard deviation for Apr month: 45.90
    Standard deviation for May month: 13.80
    Standard deviation for Jun month: 23.46
    Standard deviation for Jul month: 18.86
    Standard deviation for Aug month: 37.43
    Standard deviation for Sep month: 27.16
    Standard deviation for Oct month: 49.06
    Standard deviation for Nov month: 28.66
    Standard deviation for Dec month: 10.78
    
    Standard deviation at 0 hour: 49.72
    Standard deviation at 3 hour: 41.60
    Standard deviation at 6 hour: 36.34
    Standard deviation at 9 hour: 17.78
    Standard deviation at 12 hour: 11.22
    Standard deviation at 15 hour: 8.81
    Standard deviation at 18 hour: 13.53
    Standard deviation at 21 hour: 57.55
    
    Standard deviation for Jan month: 9.10
    Standard deviation for Feb month: 12.07
    Standard deviation for Mar month: 3.87
    Standard deviation for Apr month: 4.94
    Standard deviation for May month: 8.39
    Standard deviation for Jun month: 7.84
    Standard deviation for Jul month: 9.37
    Standard deviation for Aug month: 14.23
    Standard deviation for Sep month: 7.23
    Standard deviation for Oct month: 11.36
    Standard deviation for Nov month: 6.80
    Standard deviation for Dec month: 7.74
    
    Standard deviation at 0 hour: 8.15
    Standard deviation at 3 hour: 9.43
    Standard deviation at 6 hour: 13.05
    Standard deviation at 9 hour: 11.38
    Standard deviation at 12 hour: 6.40
    Standard deviation at 15 hour: 6.20
    Standard deviation at 18 hour: 6.12
    Standard deviation at 21 hour: 7.86
    
    Standard deviation for Jan month: 7.59
    Standard deviation for Feb month: 3.09
    Standard deviation for Mar month: 5.51
    Standard deviation for Apr month: 5.58
    Standard deviation for May month: 8.87
    Standard deviation for Jun month: 8.22
    Standard deviation for Jul month: 9.04
    Standard deviation for Aug month: 9.13
    Standard deviation for Sep month: 10.22
    Standard deviation for Oct month: 8.08
    Standard deviation for Nov month: 5.99
    Standard deviation for Dec month: 7.20
    
    Standard deviation at 0 hour: 8.55
    Standard deviation at 3 hour: 10.28
    Standard deviation at 6 hour: 9.80
    Standard deviation at 9 hour: 7.30
    Standard deviation at 12 hour: 5.32
    Standard deviation at 15 hour: 5.14
    Standard deviation at 18 hour: 5.91
    Standard deviation at 21 hour: 6.87
    
    Standard deviation for Jan month: 1.38
    Standard deviation for Feb month: 1.53
    Standard deviation for Mar month: 1.73
    Standard deviation for Apr month: 1.56
    Standard deviation for May month: 2.16
    Standard deviation for Jun month: 3.82
    Standard deviation for Jul month: 2.45
    Standard deviation for Aug month: 2.01
    Standard deviation for Sep month: 1.57
    Standard deviation for Oct month: 2.36
    Standard deviation for Nov month: 1.65
    Standard deviation for Dec month: 1.11
    
    Standard deviation at 0 hour: 2.60
    Standard deviation at 3 hour: 2.21
    Standard deviation at 6 hour: 2.46
    Standard deviation at 9 hour: 2.14
    Standard deviation at 12 hour: 1.98
    Standard deviation at 15 hour: 2.00
    Standard deviation at 18 hour: 2.21
    Standard deviation at 21 hour: 2.60
    
    Standard deviation for Jan month: 7.59
    Standard deviation for Feb month: 4.61
    Standard deviation for Mar month: 2.81
    Standard deviation for Apr month: 5.01
    Standard deviation for May month: 7.52
    Standard deviation for Jun month: 5.24
    Standard deviation for Jul month: 4.83
    Standard deviation for Aug month: 5.41
    Standard deviation for Sep month: 6.36
    Standard deviation for Oct month: 4.27
    Standard deviation for Nov month: 4.83
    Standard deviation for Dec month: 4.38
    
    Standard deviation at 0 hour: 4.43
    Standard deviation at 3 hour: 6.46
    Standard deviation at 6 hour: 6.79
    Standard deviation at 9 hour: 6.04
    Standard deviation at 12 hour: 5.19
    Standard deviation at 15 hour: 4.94
    Standard deviation at 18 hour: 4.89
    Standard deviation at 21 hour: 5.15
    
    Standard deviation for Jan month: nan
    Standard deviation for Feb month: nan
    Standard deviation for Mar month: nan
    Standard deviation for Apr month: 1.78
    Standard deviation for May month: 3.69
    Standard deviation for Jun month: 5.01
    Standard deviation for Jul month: 4.34
    Standard deviation for Aug month: 5.77
    Standard deviation for Sep month: 5.04
    Standard deviation for Oct month: 3.47
    Standard deviation for Nov month: 2.79
    Standard deviation for Dec month: 5.02
    
    Standard deviation at 0 hour: 4.46
    Standard deviation at 3 hour: 4.89
    Standard deviation at 6 hour: 6.62
    Standard deviation at 9 hour: 3.18
    Standard deviation at 12 hour: 2.84
    Standard deviation at 15 hour: 2.94
    Standard deviation at 18 hour: 3.02
    Standard deviation at 21 hour: 3.28
    
    Standard deviation for Jan month: nan
    Standard deviation for Feb month: nan
    Standard deviation for Mar month: nan
    Standard deviation for Apr month: 1.48
    Standard deviation for May month: 4.38
    Standard deviation for Jun month: 4.01
    Standard deviation for Jul month: 3.88
    Standard deviation for Aug month: 4.43
    Standard deviation for Sep month: 3.60
    Standard deviation for Oct month: 3.12
    Standard deviation for Nov month: 2.73
    Standard deviation for Dec month: 2.97
    
    Standard deviation at 0 hour: 3.39
    Standard deviation at 3 hour: 3.36
    Standard deviation at 6 hour: 4.11
    Standard deviation at 9 hour: 3.94
    Standard deviation at 12 hour: 3.45
    Standard deviation at 15 hour: 3.12
    Standard deviation at 18 hour: 3.04
    Standard deviation at 21 hour: 3.44
    
    Standard deviation for Jan month: nan
    Standard deviation for Feb month: nan
    Standard deviation for Mar month: nan
    Standard deviation for Apr month: nan
    Standard deviation for May month: nan
    Standard deviation for Jun month: 2.18
    Standard deviation for Jul month: 3.73
    Standard deviation for Aug month: 3.83
    Standard deviation for Sep month: 3.50
    Standard deviation for Oct month: 2.48
    Standard deviation for Nov month: 1.82
    Standard deviation for Dec month: 2.10
    
    Standard deviation at 0 hour: 2.83
    Standard deviation at 3 hour: 2.91
    Standard deviation at 6 hour: 3.49
    Standard deviation at 9 hour: 3.04
    Standard deviation at 12 hour: 3.10
    Standard deviation at 15 hour: 2.87
    Standard deviation at 18 hour: 2.71
    Standard deviation at 21 hour: 2.90
    
    Standard deviation for Jan month: 3.39
    Standard deviation for Feb month: 1.90
    Standard deviation for Mar month: 2.37
    Standard deviation for Apr month: 2.72
    Standard deviation for May month: 4.12
    Standard deviation for Jun month: 5.22
    Standard deviation for Jul month: 3.69
    Standard deviation for Aug month: 3.43
    Standard deviation for Sep month: 3.64
    Standard deviation for Oct month: 2.92
    Standard deviation for Nov month: 2.16
    Standard deviation for Dec month: 1.75
    
    Standard deviation at 0 hour: 3.31
    Standard deviation at 3 hour: 3.05
    Standard deviation at 6 hour: 3.82
    Standard deviation at 9 hour: 3.59
    Standard deviation at 12 hour: 3.27
    Standard deviation at 15 hour: 3.35
    Standard deviation at 18 hour: 3.76
    Standard deviation at 21 hour: 3.35
    
    Standard deviation for Jan month: 6.44
    Standard deviation for Feb month: 5.27
    Standard deviation for Mar month: 2.89
    Standard deviation for Apr month: 3.78
    Standard deviation for May month: 5.69
    Standard deviation for Jun month: 5.11
    Standard deviation for Jul month: 4.89
    Standard deviation for Aug month: 5.36
    Standard deviation for Sep month: 3.94
    Standard deviation for Oct month: 4.32
    Standard deviation for Nov month: 4.95
    Standard deviation for Dec month: 5.43
    
    Standard deviation at 0 hour: 5.16
    Standard deviation at 3 hour: 5.59
    Standard deviation at 6 hour: 5.80
    Standard deviation at 9 hour: 5.34
    Standard deviation at 12 hour: 4.72
    Standard deviation at 15 hour: 4.48
    Standard deviation at 18 hour: 4.69
    Standard deviation at 21 hour: 4.61
    
    Standard deviation for Jan month: nan
    Standard deviation for Feb month: nan
    Standard deviation for Mar month: nan
    Standard deviation for Apr month: 3.33
    Standard deviation for May month: 4.66
    Standard deviation for Jun month: 5.22
    Standard deviation for Jul month: 5.31
    Standard deviation for Aug month: 4.76
    Standard deviation for Sep month: 3.61
    Standard deviation for Oct month: 4.14
    Standard deviation for Nov month: 4.62
    Standard deviation for Dec month: 3.98
    
    Standard deviation at 0 hour: 4.25
    Standard deviation at 3 hour: 4.50
    Standard deviation at 6 hour: 4.81
    Standard deviation at 9 hour: 5.68
    Standard deviation at 12 hour: 4.54
    Standard deviation at 15 hour: 4.50
    Standard deviation at 18 hour: 3.83
    Standard deviation at 21 hour: 4.10
    
    Standard deviation for Jan month: 1.39
    Standard deviation for Feb month: 1.59
    Standard deviation for Mar month: 1.35
    Standard deviation for Apr month: 2.26
    Standard deviation for May month: 2.84
    Standard deviation for Jun month: 4.08
    Standard deviation for Jul month: 2.64
    Standard deviation for Aug month: 2.52
    Standard deviation for Sep month: 2.63
    Standard deviation for Oct month: 1.63
    Standard deviation for Nov month: 1.83
    Standard deviation for Dec month: 2.58
    
    Standard deviation at 0 hour: 2.81
    Standard deviation at 3 hour: 3.04
    Standard deviation at 6 hour: 2.95
    Standard deviation at 9 hour: 2.77
    Standard deviation at 12 hour: 3.12
    Standard deviation at 15 hour: 2.95
    Standard deviation at 18 hour: 3.68
    Standard deviation at 21 hour: 3.21
    
    Standard deviation for Jan month: 8.36
    Standard deviation for Feb month: 4.19
    Standard deviation for Mar month: 2.71
    Standard deviation for Apr month: 3.14
    Standard deviation for May month: 5.26
    Standard deviation for Jun month: 6.06
    Standard deviation for Jul month: 5.67
    Standard deviation for Aug month: 6.78
    Standard deviation for Sep month: 5.28
    Standard deviation for Oct month: 3.17
    Standard deviation for Nov month: 3.86
    Standard deviation for Dec month: 3.77
    
    Standard deviation at 0 hour: 5.57
    Standard deviation at 3 hour: 4.81
    Standard deviation at 6 hour: 5.60
    Standard deviation at 9 hour: 6.19
    Standard deviation at 12 hour: 5.46
    Standard deviation at 15 hour: 5.70
    Standard deviation at 18 hour: 6.03
    Standard deviation at 21 hour: 6.00
    
    Standard deviation for Jan month: 2.27
    Standard deviation for Feb month: 2.02
    Standard deviation for Mar month: 1.15
    Standard deviation for Apr month: 1.27
    Standard deviation for May month: 1.73
    Standard deviation for Jun month: 2.96
    Standard deviation for Jul month: 4.63
    Standard deviation for Aug month: 5.04
    Standard deviation for Sep month: 5.17
    Standard deviation for Oct month: 2.57
    Standard deviation for Nov month: 2.00
    Standard deviation for Dec month: 2.20
    
    Standard deviation at 0 hour: 3.13
    Standard deviation at 3 hour: 3.40
    Standard deviation at 6 hour: 3.41
    Standard deviation at 9 hour: 3.26
    Standard deviation at 12 hour: 2.78
    Standard deviation at 15 hour: 2.71
    Standard deviation at 18 hour: 2.83
    Standard deviation at 21 hour: 2.83
    

    Count of times the deviation is over and below user defined threshold, by hour and by month

    The deviation data extracted in the first cell is needed to run this cell.
    The user chooses the threshold by writing a positive integer when prompted upon running the cell. For each station is all_stations, one bar graph with the count over the threshold and one bar graph with count for under the threshold is output.
    At the end there are four aggregated bar graphs with all stations: One for count of overestimations are shown by month, one for underestimating by month, and the same set of graphs for the different hours.

    In [48]:
    threshold_input= input("Write threshold ppm for display in graphs(positve integer):  ")
    
    thershold = int(threshold_input)
    thershold_neg= -1* int(threshold_input)
    
    #for the final aggregated comparison between the stations in the station list
    zero_list_positive=[] 
    three_list_positive=[] 
    six_list_positive=[]
    nine_list_positive=[] 
    twelve_list_positive=[] 
    fifteen_list_positive=[] 
    eighteen_list_positive=[]
    twentyone_list_positive=[] 
    
    jan_list_positive=[]
    feb_list_positive=[] 
    mar_list_positive=[]
    apr_list_positive=[] 
    may_list_positive=[] 
    jun_list_positive=[] 
    jul_list_positive=[] 
    aug_list_positive=[] 
    sep_list_positive=[] 
    oct_list_positive=[] 
    nov_list_positive=[] 
    dec_list_positive=[]
    
    zero_list_negative=[] 
    three_list_negative=[] 
    six_list_negative=[]
    nine_list_negative=[] 
    twelve_list_negative=[] 
    fifteen_list_negative=[] 
    eighteen_list_negative=[]
    twentyone_list_negative=[] 
    
    jan_list_negative=[]
    feb_list_negative=[] 
    mar_list_negative=[]
    apr_list_negative=[] 
    may_list_negative=[] 
    jun_list_negative=[] 
    jul_list_negative=[] 
    aug_list_negative=[] 
    sep_list_negative=[] 
    oct_list_negative=[] 
    nov_list_negative=[] 
    dec_list_negative=[]
    
    index_station=0
    for station in list_stations:
        
        #one set of bar graphs (by month and by hour), for each threshold
    
        index = 0 
    
        #by hour
        list_of_indexes_positive = np.empty(len(date_range))
        list_of_indexes_positive[:] = np.nan
    
        list_of_indexes_negative = np.empty(len(date_range))
        list_of_indexes_negative[:] = np.nan
        #by month
        list_of_indexes_positive_month = np.empty(len(date_range))
        list_of_indexes_positive_month[:] = np.nan
    
        list_of_indexes_negative_month= np.empty(len(date_range))
        list_of_indexes_negative_month[:] = np.nan
    
        list_of_nan = np.empty(len(date_range))
        list_of_nan[:] = np.nan
    
        list_of_nan_month = np.empty(len(date_range))
        list_of_nan_month[:] = np.nan
    
        #a list where the "month numbers" are added to.
        count_months = []
    
        for value in list_of_list_diff_measurements_stilt_not_abs[index_station]:
            count_months.append(date_range[index].month)
    
            #also, add to the lists depending on if over or under the thresholds:
            if value > int(thershold):
                #the list is as long as the date_range  
    
                list_of_indexes_positive[index] = date_range[index].hour
                list_of_indexes_positive_month[index] = date_range[index].month
    
            if value < int(thershold_neg):
                list_of_indexes_negative[index] = date_range[index].hour
                list_of_indexes_negative_month[index] = date_range[index].month
    
            if math.isnan(value) == True:
    
                list_of_nan[index] = date_range[index].hour
    
                list_of_nan_month[index] = date_range[index].month
    
            #move to the next deviation value.
            index = index + 1
    
    
        #how many of these values are there for each month?
        count_jan_month = 0
        count_feb_month = 0
        count_mar_month = 0
        count_apr_month = 0
        count_may_month = 0
        count_jun_month = 0
        count_jul_month = 0
        count_aug_month = 0
        count_sep_month = 0
        count_oct_month = 0
        count_nov_month = 0
        count_dec_month = 0
    
        #how many total in each month:
        for month in count_months:
            if month == 1:
                count_jan_month = count_jan_month + 1
            if month == 2:
                count_feb_month = count_feb_month + 1        
            if month == 3:
                count_mar_month = count_mar_month + 1
            if month == 4:
                count_apr_month = count_apr_month + 1        
            if month == 5:
                count_may_month = count_may_month + 1 
            if month == 6:
                count_jun_month = count_jun_month + 1
            if month == 7:
                count_jul_month = count_jul_month + 1    
            if month == 8:
                count_aug_month = count_aug_month + 1 
            if month == 9:
                count_sep_month = count_sep_month + 1
            if month == 10:
                count_oct_month = count_oct_month + 1
            if month == 11:
                count_nov_month = count_nov_month + 1        
            if month == 12:
                count_dec_month = count_dec_month + 1
    
        count_0 = 0
        count_3 = 0
        count_6= 0
        count_9 = 0
        count_12 = 0
        count_15 = 0
        count_18 = 0
        count_21 = 0 
    
        #hours, overestimating more than the threshold.
        for value in list_of_indexes_positive:
            if value == 0:
                count_0 = count_0 + 1
            if value == 3:
                count_3 = count_3 + 1        
            if value == 6:
                count_6 = count_6 + 1
            if value == 9:
                count_9 = count_9 + 1        
            if value == 12:
                count_12 = count_12 + 1  
            if value == 15:
                count_15 = count_15 + 1
            if value == 18:
                count_18 = count_18 + 1    
            if value == 21:
                count_21 = count_21 + 1  
                
        #for aggregated bar graph at the end
        zero_list_positive.append(count_0) 
        three_list_positive.append(count_3) 
        six_list_positive.append(count_6) 
        nine_list_positive.append(count_9) 
        twelve_list_positive.append(count_12) 
        fifteen_list_positive.append(count_15) 
        eighteen_list_positive.append(count_18) 
        twentyone_list_positive.append(count_21) 
    
        count_jan = 0
        count_feb = 0
        count_mar= 0
        count_apr = 0
        count_may= 0
        count_jun = 0
        count_jul = 0
        count_aug = 0 
        count_sep = 0
        count_oct = 0
        count_nov= 0
        count_dec = 0
    
        for value in list_of_indexes_positive_month:
            if value == 1:
                count_jan = count_jan + 1
            if value == 2:
                count_feb = count_feb + 1        
            if value == 3:
                count_mar = count_mar + 1
            if value == 4:
                count_apr = count_apr + 1        
            if value == 5:
                count_may = count_may + 1 
            if value == 6:
                count_jun = count_jun + 1
            if value == 7:
                count_jul = count_jul + 1    
            if value == 8:
                count_aug = count_aug + 1 
            if value == 9:
                count_sep = count_sep + 1
            if value == 10:
                count_oct = count_oct + 1
            if value == 11:
                count_nov = count_nov + 1        
            if value == 12:
                count_dec = count_dec + 1
                
        jan_list_positive.append(count_jan)
        feb_list_positive.append(count_feb)
        mar_list_positive.append(count_mar)
        apr_list_positive.append(count_apr)
        may_list_positive.append(count_may)
        jun_list_positive.append(count_jun)
        jul_list_positive.append(count_jul)
        aug_list_positive.append(count_aug)
        sep_list_positive.append(count_sep)
        oct_list_positive.append(count_oct)
        nov_list_positive.append(count_nov)
        dec_list_positive.append(count_dec)
    
        count_0_neg = 0
        count_3_neg = 0
        count_6_neg = 0
        count_9_neg = 0
        count_12_neg = 0
        count_15_neg = 0
        count_18_neg = 0
        count_21_neg = 0 
    
        for value in list_of_indexes_negative:
            if value == 0:
                count_0_neg = count_0_neg + 1
            if value == 3:
                count_3_neg = count_3_neg + 1        
            if value == 6:
                count_6_neg = count_6_neg+ 1
            if value == 9:
                count_9_neg = count_9_neg + 1        
            if value == 12:
                count_12_neg = count_12_neg + 1 
            if value == 15:
                count_15_neg = count_15_neg + 1
            if value == 18:
                count_18_neg = count_18_neg + 1    
            if value == 21:
                count_21_neg = count_21_neg + 1 
        
        zero_list_negative.append(count_0_neg)
        three_list_negative.append(count_3_neg)
        six_list_negative.append(count_6_neg)
        nine_list_negative.append(count_9_neg)
        twelve_list_negative.append(count_12_neg)
        fifteen_list_negative.append(count_15_neg)
        eighteen_list_negative.append(count_18_neg)
        twentyone_list_negative.append(count_21_neg)
    
    
        count_jan_neg = 0
        count_feb_neg = 0
        count_mar_neg= 0
        count_apr_neg = 0
        count_may_neg= 0
        count_jun_neg = 0
        count_jul_neg = 0
        count_aug_neg = 0 
        count_sep_neg = 0
        count_oct_neg = 0
        count_nov_neg= 0
        count_dec_neg = 0
    
        for value in list_of_indexes_negative_month:
            if value == 1:
                count_jan_neg = count_jan_neg + 1
            if value == 2:
                count_feb_neg = count_feb_neg + 1        
            if value == 3:
                count_mar_neg = count_mar_neg + 1
            if value == 4:
                count_apr_neg = count_apr_neg + 1        
            if value == 5:
                count_may_neg = count_may_neg + 1 
            if value == 6:
                count_jun_neg = count_jun_neg + 1
            if value == 7:
                count_jul_neg = count_jul_neg + 1    
            if value == 8:
                count_aug_neg = count_aug_neg + 1 
            if value == 9:
                count_sep_neg = count_sep_neg + 1
            if value == 10:
                count_oct_neg = count_oct_neg + 1
            if value == 11:
                count_nov_neg = count_nov_neg + 1        
            if value == 12:
                count_dec_neg = count_dec_neg + 1
                
        jan_list_negative.append(count_jan_neg)
        feb_list_negative.append(count_feb_neg)
        mar_list_negative.append(count_mar_neg)
        apr_list_negative.append(count_apr_neg)
        may_list_negative.append(count_may_neg)
        jun_list_negative.append(count_jun_neg)
        jul_list_negative.append(count_jul_neg)
        aug_list_negative.append(count_aug_neg)
        sep_list_negative.append(count_sep_neg)
        oct_list_negative.append(count_oct_neg)
        nov_list_negative.append(count_nov_neg)
        dec_list_negative.append(count_dec_neg)
    
        count_0_nan = 0
        count_3_nan = 0
        count_6_nan = 0
        count_9_nan = 0
        count_12_nan = 0
        count_15_nan = 0
        count_18_nan = 0
        count_21_nan = 0
    
        for value in list_of_nan:
            if value == 0:
                count_0_nan = count_0_nan + 1
            if value == 3:
                count_3_nan = count_3_nan + 1        
            if value == 6:
                count_6_nan = count_6_nan+ 1
            if value == 9:
                count_9_nan = count_9_nan + 1        
            if value == 12:
                count_12_nan = count_12_nan + 1    
            if value == 15:
                count_15_nan = count_15_nan + 1
            if value == 18:
                count_18_nan = count_18_nan + 1    
            if value == 21:
                count_21_nan = count_21_nan + 1  
    
        count_jan_nan = 0
        count_feb_nan = 0
        count_mar_nan = 0
        count_apr_nan = 0
        count_may_nan = 0
        count_jun_nan = 0
        count_jul_nan = 0
        count_aug_nan = 0
        count_sep_nan = 0
        count_oct_nan = 0
        count_nov_nan = 0
        count_dec_nan = 0
    
    
        for value in list_of_nan_month:
            if value == 1:
                count_jan_nan = count_jan_nan + 1
            if value == 2:
                count_feb_nan = count_feb_nan + 1        
            if value == 3:
                count_mar_nan = count_mar_nan+ 1
            if value == 4:
                count_apr_nan = count_apr_nan + 1        
            if value == 5:
                count_may_nan = count_may_nan + 1    
            if value == 6:
                count_jun_nan = count_jun_nan + 1
            if value == 7:
                count_jul_nan = count_jul_nan + 1    
            if value == 8:
                count_aug_nan = count_aug_nan + 1  
            if value == 9:
                count_sep_nan = count_sep_nan + 1    
            if value == 10:
                count_oct_nan = count_oct_nan + 1
            if value == 11:
                count_nov_nan = count_nov_nan + 1    
            if value == 12:
                count_dec_nan = count_dec_nan + 1 
    
        list_negatives = [count_0_neg, count_3_neg, count_6_neg, count_9_neg, count_12_neg, count_15_neg, count_18_neg ,count_21_neg]
        list_positives= [count_0, count_3, count_6,count_9, count_12, count_15, count_18,count_21]
        list_nan = [count_0_nan, count_3_nan, count_6_nan, count_9_nan, count_12_nan, count_15_nan, count_18_nan ,count_21_nan]
        
    
    
        #here by month:
        list_positives_month = [count_jan, count_feb, count_mar, count_apr, count_may, count_jun, count_jul, count_aug, count_sep, count_oct, count_nov, count_dec]
        list_negatives_month = [count_jan_neg, count_feb_neg, count_mar_neg, count_apr_neg, count_may_neg, count_jun_neg, count_jul_neg, count_aug_neg, count_sep_neg, count_oct_neg, count_nov_neg, count_dec_neg]
        list_nan_month = [count_jan_nan, count_feb_nan, count_mar_nan, count_apr_nan, count_may_nan, count_jun_nan, count_jul_nan, count_aug_nan, count_sep_nan, count_oct_nan, count_nov_nan, count_dec_nan]
    
        #here count for each month (total footprints by month to use in % calculation):
        count_month_list = [count_jan_month, count_feb_month, count_mar_month, count_apr_month, count_may_month, count_jun_month, count_jul_month, count_aug_month, count_sep_month, count_oct_month, count_nov_month, count_dec_month]
    
        total_negatives = 0
        total_positives = 0
    
        for value in list_negatives:
            total_negatives=total_negatives + int(value)
    
        for value in list_positives:
            total_positives = total_positives + int(value)
    
    
    
        #changed here... don't want to display empty graphs. 
        if total_negatives>0 or total_positives>0:
    
    
            #MAKING bar graph for hour of the day:       
            count_positives = np.array((count_0, count_3, count_6, count_9, count_12, count_15, count_18, count_21))
            count_negatives = np.array((count_0_neg, count_3_neg, count_6_neg, count_9_neg, count_12_neg, count_15_neg, count_18_neg, count_21_neg))
            count_nan = np.array((count_0_nan, count_3_nan, count_6_nan, count_9_nan, count_12_nan, count_15_nan, count_18_nan ,count_21_nan))
    
            #how many values there are:
            N = len(count_positives)
    
            ind = np.arange(N)    
            width = 0.35     
    
    
            matplotlib.rcParams.update({'font.size': 17})
            figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
            ax = plt.subplot(111)
            ax.yaxis.grid(True)
    
            p1 = plt.bar(ind, count_positives, width, color='red')
            p2 = plt.bar(ind, count_negatives, width, bottom=count_positives, color='green')
            #p3 = plt.bar(ind,count_nan, width, bottom=count_positives+count_negatives, color= 'grey')
    
            plt.ylabel('Count')
    
            plt.xlabel('Hour of the day (count NaN)')
    
            #for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
            plt.title(station + '\n' + 'Deviations over and under measured concentrations' + '\n' + 'Thershold: +-' + str(thershold) + ' ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                     + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
                     'Footprints within date range: ' + str(count)+ '\n')
            plt.xticks(ind, ('0 (' + str(count_nan[0]) + ')', '3 (' + str(count_nan[1]) + ')', '6 (' + str(count_nan[2]) + ')', '9 (' + str(count_nan[3]) + ')', '12 (' + str(count_nan[4]) + ')', '15 (' + str(count_nan[5]) + ')', '18 (' + str(count_nan[6]) + ')', '21 (' + str(count_nan[7]) + ')'), rotation='vertical')
    
    
            plt.legend((p1[0], p2[0], p3[0]), ('>' + str(thershold) +  ' ppm over: ' + str(sum(count_positives)) , '>' + str(thershold_neg) + 'ppm below: ' + str(sum(count_negatives))))
    
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
            #pylab.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
    
            plt.show()
    
    
            #NEW: graph for by month:
    
            count_positives_month = np.array((count_jan, count_feb, count_mar, count_apr, count_may, count_jun, count_jul, count_aug, count_sep, count_oct, count_nov, count_dec))
            count_negatives_month = np.array((count_jan_neg, count_feb_neg, count_mar_neg, count_apr_neg, count_may_neg, count_jun_neg, count_jul_neg, count_aug_neg, count_sep_neg, count_oct_neg, count_nov_neg, count_dec_neg))
            count_nan_month = np.array((count_jan_nan, count_feb_nan, count_mar_nan, count_apr_nan, count_may_nan, count_jun_nan, count_jul_nan, count_aug_nan, count_sep_nan, count_oct_nan, count_nov_nan, count_dec_nan))
            
            count_nan_month_total=sum(count_nan_month)
            figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
            matplotlib.rcParams.update({'font.size': 17})
    
            N = len(count_positives_month)
            ax = plt.subplot(111)
            ax.yaxis.grid(True)
    
            ind = np.arange(N)    
            width = 0.35       
    
            p1 = plt.bar(ind, count_positives_month, width, color='red')
            p2 = plt.bar(ind, count_negatives_month, width, bottom=count_positives_month, color='blue')
            #p3 = plt.bar(ind,count_nan_month, width, bottom=count_positives_month+count_negatives_month, color= 'grey')
    
            plt.ylabel('Count')
    
            plt.xlabel('Month (count NaN)')
    
            #for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
            plt.title(station + '\n' + 'Deviation over and under measured concentrations' + '\n' + 'Thershold: +-' + str(thershold) + ' ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                     + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
                     'Footprints within date range: ' + str(count)+ '\n' + 'Count NaN: ' + str( count_nan_month_total))
            plt.xticks(ind, ('Jan (' + str(count_nan_month[0]) + ')', 'Feb (' + str(count_nan_month[1]) + ')', 'Mar (' + str(count_nan_month[2]) + ')', 'Apr (' + str(count_nan_month[3]) + ')', 'May (' + str(count_nan_month[4]) + ')', 'Jun (' + str(count_nan_month[5]) + ')', 'Jul (' + str(count_nan_month[6]) + ')', 'Aug (' + str(count_nan_month[7]) + ')', 'Sep (' + str(count_nan_month[8]) + ')', 'Oct (' + str(count_nan_month[9]) + ')', 'Nov (' + str(count_nan_month[10]) + ')', 'Dec (' + str(count_nan_month[11]) + ')'), rotation='vertical')
    
            plt.legend((p1[0], p2[0]), ('>' + str(thershold) +  ' ppm over: ' + str(sum(count_positives)) , '>' + str(thershold_neg) + 'ppm below: ' + str(sum(count_negatives))))
    
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
            #pylab.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
    
            plt.show()
        else:
            print ('Modeled concentrations are never disagreeing with measured concentrations above or below threshold ', threshold)
    
        index_station=index_station+1
    
    #aggregated bar all stations at the end.
    #one bar for overestimation ("positive") and one for under estimation ("negatives")
    
    #overestimating more than threshold by month
    y_pos = np.arange(len(list_stations))
    figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.subplot(111)
    p1 = ax.bar(y_pos-0.3, jan_list_positive ,width=0.03,color='r',align='center')
    p2 = ax.bar(y_pos-0.25, feb_list_positive,width=0.03,color='b',align='center')
    p3 = ax.bar(y_pos-0.20, mar_list_positive,width=0.03,color='b',align='center')
    p4 = ax.bar(y_pos-0.15, apr_list_positive,width=0.03,color='b',align='center')
    p5 = ax.bar(y_pos-0.10, may_list_positive,width=0.03,color='b',align='center')
    p6 = ax.bar(y_pos-0.05, jun_list_positive,width=0.03,color='b',align='center')
    p7 = ax.bar(y_pos+0.0, jul_list_positive,width=0.03,color='b',align='center')
    p8 = ax.bar(y_pos+0.05, aug_list_positive,width=0.03,color='b',align='center')
    p9 = ax.bar(y_pos+0.10, sep_list_positive,width=0.03,color='b',align='center')
    p10 = ax.bar(y_pos +0.15, oct_list_positive,width=0.03,color='b',align='center')
    p11 = ax.bar(y_pos+0.20, nov_list_positive,width=0.03,color='b',align='center')
    p12 = ax.bar(y_pos+0.25, dec_list_positive,width=0.03,color='g',align='center')
    ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0], p9[0], p10[0], p11[0], p12[0]), ('Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), ncol=2)
    
    ax.yaxis.grid(True)
    plt.xticks(y_pos, list_stations, rotation='vertical')
    plt.axhline(0, color='black', lw=2)
    
    #get the index of the last date (start at 0) to access it in the titles
    date_index_number = (len(date_range) - 1)
    plt.title('Count overestimating more than ' + str(thershold) + 'ppm' + '\n'  +  str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                         + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
             'Hour(s): all')
    plt.ylabel('count')
    plt.show()
    
    #underestimation more than threshold by month
    y_pos = np.arange(len(list_stations))
    figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.subplot(111)
    p1 = ax.bar(y_pos-0.3, jan_list_negative ,width=0.03,color='r',align='center')
    p2 = ax.bar(y_pos-0.25, feb_list_negative,width=0.03,color='b',align='center')
    p3 = ax.bar(y_pos-0.20, mar_list_negative,width=0.03,color='b',align='center')
    p4 = ax.bar(y_pos-0.15, apr_list_negative,width=0.03,color='b',align='center')
    p5 = ax.bar(y_pos-0.10, may_list_negative,width=0.03,color='b',align='center')
    p6 = ax.bar(y_pos-0.05, jun_list_negative,width=0.03,color='b',align='center')
    p7 = ax.bar(y_pos+0.0, jul_list_negative,width=0.03,color='b',align='center')
    p8 = ax.bar(y_pos+0.05, aug_list_negative,width=0.03,color='b',align='center')
    p9 = ax.bar(y_pos+0.10, sep_list_negative,width=0.03,color='b',align='center')
    p10 = ax.bar(y_pos +0.15, oct_list_negative,width=0.03,color='b',align='center')
    p11 = ax.bar(y_pos+0.20, nov_list_negative,width=0.03,color='b',align='center')
    p12 = ax.bar(y_pos+0.25, dec_list_negative,width=0.03,color='g',align='center')
    ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0], p9[0], p10[0], p11[0], p12[0]), ('Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), ncol=2)
    
    ax.yaxis.grid(True)
    plt.xticks(y_pos, list_stations, rotation='vertical')
    plt.axhline(0, color='black', lw=2)
    
    #get the index of the last date (start at 0) to access it in the titles
    date_index_number = (len(date_range) - 1)
    plt.title('Count underestimating more than ' + str(thershold) + 'ppm' + '\n'  +  str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                         + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
             'Hour(s): all')
    plt.ylabel('count')
    plt.show()
    
    #overestimating more than threshold by hour
    y_pos = np.arange(len(list_stations))
    figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.subplot(111)
    p1 = ax.bar(y_pos-0.4, zero_list_positive ,width=0.03,color='r',align='center')
    p2 = ax.bar(y_pos-0.3, three_list_positive,width=0.03,color='g',align='center')
    p3 = ax.bar(y_pos-0.2, six_list_positive,width=0.03,color='g',align='center')
    p4 = ax.bar(y_pos-0.1, nine_list_positive,width=0.03,color='g',align='center')
    p5 = ax.bar(y_pos+0, twelve_list_positive,width=0.03,color='g',align='center')
    p6 = ax.bar(y_pos +0.1, fifteen_list_positive,width=0.03,color='g',align='center')
    p7 = ax.bar(y_pos+0.2, eighteen_list_positive,width=0.03,color='g',align='center')
    p8 = ax.bar(y_pos+0.3, twentyone_list_positive,width=0.03,color='y',align='center')
    
    ax.yaxis.grid(True)
    ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0]), ('0', '3', '6', '9','12', '15', '18', '21') )
    
    plt.xticks(y_pos, list_stations, rotation='vertical')
    plt.axhline(0, color='black', lw=2)
    
    #get the index of the last date (start at 0) to access it in the titles
    date_index_number = (len(date_range) - 1)
    plt.title('Count overestimating more than ' + str(thershold) + 'ppm' + '\n'  +  str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                         + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
             'Hour(s): all')
    plt.ylabel('count')
    plt.show()
    
    #underestimating more than threshold by hour
    y_pos = np.arange(len(list_stations))
    figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.subplot(111)
    p1 = ax.bar(y_pos-0.4, zero_list_negative ,width=0.03,color='r',align='center')
    p2 = ax.bar(y_pos-0.3, three_list_negative,width=0.03,color='g',align='center')
    p3 = ax.bar(y_pos-0.2, six_list_negative,width=0.03,color='g',align='center')
    p4 = ax.bar(y_pos-0.1, nine_list_negative,width=0.03,color='g',align='center')
    p5 = ax.bar(y_pos+0, twelve_list_negative ,width=0.03,color='g',align='center')
    p6 = ax.bar(y_pos +0.1, fifteen_list_negative ,width=0.03,color='g',align='center')
    p7 = ax.bar(y_pos+0.2, eighteen_list_negative,width=0.03,color='g',align='center')
    p8 = ax.bar(y_pos+0.3, twentyone_list_negative,width=0.03,color='y',align='center')
    
    ax.yaxis.grid(True)
    ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0]), ('0', '3', '6', '9','12', '15', '18', '21'))
    
    plt.xticks(y_pos, list_stations, rotation='vertical')
    plt.axhline(0, color='black', lw=2)
    
    #get the index of the last date (start at 0) to access it in the titles
    date_index_number = (len(date_range) - 1)
    plt.title('Count underestimating more than ' + str(thershold) + 'ppm' + '\n'  +  str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                         + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
             'Hour(s): all')
    plt.ylabel('count')
    plt.show()
    
    Write threshold ppm for display in graphs(positve integer):  10
    

    Back to top

    Count of times the deviation is over and the standard deviation(s) of deviations

    The deviation data extracted in the first cell is needed to run this cell.
    The user chooses the threshold by writing a positive integer when prompted upon running the cell. For each station is all_stations, one bar graph with the count over the threshold and one bar graph with count for under the threshold is output.
    At the end there are four aggregated bar graphs with all stations: One for count of overestimations are shown by month, one for underestimating by month, and the same set of graphs for the different hours.

    In [47]:
    threshold_input= input("Write number of standard deviations for display in graph (ex '1' or '2'):  ")
    
    number_standard_deviations= float(threshold_input)
    
    list_stations=['GAT344', 'HEI', 'LIN099', 'LUT', 'PRS', 'TRN180', 'NOR100', 'HTM150', 'SVB150', 'PUY', 'OPE120', 'KRE250', 'JFJ', 'HPB131', 'SMR125']
    
    matplotlib.rcParams.update({'font.size': 17})
    
    
    #for the final aggregated comparison between the stations in the station list
    zero_list_positive=[] 
    three_list_positive=[] 
    six_list_positive=[]
    nine_list_positive=[] 
    twelve_list_positive=[] 
    fifteen_list_positive=[] 
    eighteen_list_positive=[]
    twentyone_list_positive=[] 
    
    jan_list_positive=[]
    feb_list_positive=[] 
    mar_list_positive=[]
    apr_list_positive=[] 
    may_list_positive=[] 
    jun_list_positive=[] 
    jul_list_positive=[] 
    aug_list_positive=[] 
    sep_list_positive=[] 
    oct_list_positive=[] 
    nov_list_positive=[] 
    dec_list_positive=[]
    
    zero_list_negative=[] 
    three_list_negative=[] 
    six_list_negative=[]
    nine_list_negative=[] 
    twelve_list_negative=[] 
    fifteen_list_negative=[] 
    eighteen_list_negative=[]
    twentyone_list_negative=[] 
    
    jan_list_negative=[]
    feb_list_negative=[] 
    mar_list_negative=[]
    apr_list_negative=[] 
    may_list_negative=[] 
    jun_list_negative=[] 
    jul_list_negative=[] 
    aug_list_negative=[] 
    sep_list_negative=[] 
    oct_list_negative=[] 
    nov_list_negative=[] 
    dec_list_negative=[]
    
    index_station=0
    for station in list_stations:
        
        
        #want the threshold to be one standard deviation away:
        #get standard devation based on diff list:
    
        standard_deviation=(numpy.nanstd(list_of_list_diff_measurements_stilt_not_abs[index_station]))
        #standard_deviation=numpy.nanstd(list_of_list_diff_measurements_stilt_not_abs[index_station])
    
        
        #the threshold is based on number of standard deviations defined 
        thershold = float(standard_deviation*number_standard_deviations)
        thershold_neg= -1* float(standard_deviation*number_standard_deviations)
        
        #one set of bar graphs (by month and by hour), for each threshold
    
        index = 0 
    
        #by hour
        list_of_indexes_positive = np.empty(len(date_range))
        list_of_indexes_positive[:] = np.nan
    
        list_of_indexes_negative = np.empty(len(date_range))
        list_of_indexes_negative[:] = np.nan
        #by month
        list_of_indexes_positive_month = np.empty(len(date_range))
        list_of_indexes_positive_month[:] = np.nan
    
        list_of_indexes_negative_month= np.empty(len(date_range))
        list_of_indexes_negative_month[:] = np.nan
    
        list_of_nan = np.empty(len(date_range))
        list_of_nan[:] = np.nan
    
        list_of_nan_month = np.empty(len(date_range))
        list_of_nan_month[:] = np.nan
    
        #a list where the "month numbers" are added to.
        count_months = []
    
        for value in list_of_list_diff_measurements_stilt_not_abs[index_station]:
            count_months.append(date_range[index].month)
    
            #also, add to the lists depending on if over or under the thresholds:
            if value > int(thershold):
                #the list is as long as the date_range  
    
                list_of_indexes_positive[index] = date_range[index].hour
                list_of_indexes_positive_month[index] = date_range[index].month
    
            if value < int(thershold_neg):
                list_of_indexes_negative[index] = date_range[index].hour
                list_of_indexes_negative_month[index] = date_range[index].month
    
            if math.isnan(value) == True:
    
                list_of_nan[index] = date_range[index].hour
    
                list_of_nan_month[index] = date_range[index].month
    
            #move to the next deviation value.
            index = index + 1
    
    
        #how many of these values are there for each month?
        count_jan_month = 0
        count_feb_month = 0
        count_mar_month = 0
        count_apr_month = 0
        count_may_month = 0
        count_jun_month = 0
        count_jul_month = 0
        count_aug_month = 0
        count_sep_month = 0
        count_oct_month = 0
        count_nov_month = 0
        count_dec_month = 0
    
        #how many total in each month:
        for month in count_months:
            if month == 1:
                count_jan_month = count_jan_month + 1
            if month == 2:
                count_feb_month = count_feb_month + 1        
            if month == 3:
                count_mar_month = count_mar_month + 1
            if month == 4:
                count_apr_month = count_apr_month + 1        
            if month == 5:
                count_may_month = count_may_month + 1 
            if month == 6:
                count_jun_month = count_jun_month + 1
            if month == 7:
                count_jul_month = count_jul_month + 1    
            if month == 8:
                count_aug_month = count_aug_month + 1 
            if month == 9:
                count_sep_month = count_sep_month + 1
            if month == 10:
                count_oct_month = count_oct_month + 1
            if month == 11:
                count_nov_month = count_nov_month + 1        
            if month == 12:
                count_dec_month = count_dec_month + 1
    
        count_0 = 0
        count_3 = 0
        count_6= 0
        count_9 = 0
        count_12 = 0
        count_15 = 0
        count_18 = 0
        count_21 = 0 
    
        #hours, overestimating more than the threshold.
        for value in list_of_indexes_positive:
            if value == 0:
                count_0 = count_0 + 1
            if value == 3:
                count_3 = count_3 + 1        
            if value == 6:
                count_6 = count_6 + 1
            if value == 9:
                count_9 = count_9 + 1        
            if value == 12:
                count_12 = count_12 + 1  
            if value == 15:
                count_15 = count_15 + 1
            if value == 18:
                count_18 = count_18 + 1    
            if value == 21:
                count_21 = count_21 + 1  
                
        #for aggregated bar graph at the end
        zero_list_positive.append(count_0) 
        three_list_positive.append(count_3) 
        six_list_positive.append(count_6) 
        nine_list_positive.append(count_9) 
        twelve_list_positive.append(count_12) 
        fifteen_list_positive.append(count_15) 
        eighteen_list_positive.append(count_18) 
        twentyone_list_positive.append(count_21) 
    
        count_jan = 0
        count_feb = 0
        count_mar= 0
        count_apr = 0
        count_may= 0
        count_jun = 0
        count_jul = 0
        count_aug = 0 
        count_sep = 0
        count_oct = 0
        count_nov= 0
        count_dec = 0
    
        for value in list_of_indexes_positive_month:
            if value == 1:
                count_jan = count_jan + 1
            if value == 2:
                count_feb = count_feb + 1        
            if value == 3:
                count_mar = count_mar + 1
            if value == 4:
                count_apr = count_apr + 1        
            if value == 5:
                count_may = count_may + 1 
            if value == 6:
                count_jun = count_jun + 1
            if value == 7:
                count_jul = count_jul + 1    
            if value == 8:
                count_aug = count_aug + 1 
            if value == 9:
                count_sep = count_sep + 1
            if value == 10:
                count_oct = count_oct + 1
            if value == 11:
                count_nov = count_nov + 1        
            if value == 12:
                count_dec = count_dec + 1
                
        jan_list_positive.append(count_jan)
        feb_list_positive.append(count_feb)
        mar_list_positive.append(count_mar)
        apr_list_positive.append(count_apr)
        may_list_positive.append(count_may)
        jun_list_positive.append(count_jun)
        jul_list_positive.append(count_jul)
        aug_list_positive.append(count_aug)
        sep_list_positive.append(count_sep)
        oct_list_positive.append(count_oct)
        nov_list_positive.append(count_nov)
        dec_list_positive.append(count_dec)
    
        count_0_neg = 0
        count_3_neg = 0
        count_6_neg = 0
        count_9_neg = 0
        count_12_neg = 0
        count_15_neg = 0
        count_18_neg = 0
        count_21_neg = 0 
    
        for value in list_of_indexes_negative:
            if value == 0:
                count_0_neg = count_0_neg + 1
            if value == 3:
                count_3_neg = count_3_neg + 1        
            if value == 6:
                count_6_neg = count_6_neg+ 1
            if value == 9:
                count_9_neg = count_9_neg + 1        
            if value == 12:
                count_12_neg = count_12_neg + 1 
            if value == 15:
                count_15_neg = count_15_neg + 1
            if value == 18:
                count_18_neg = count_18_neg + 1    
            if value == 21:
                count_21_neg = count_21_neg + 1 
        
        zero_list_negative.append(count_0_neg)
        three_list_negative.append(count_3_neg)
        six_list_negative.append(count_6_neg)
        nine_list_negative.append(count_9_neg)
        twelve_list_negative.append(count_12_neg)
        fifteen_list_negative.append(count_15_neg)
        eighteen_list_negative.append(count_18_neg)
        twentyone_list_negative.append(count_21_neg)
    
    
        count_jan_neg = 0
        count_feb_neg = 0
        count_mar_neg= 0
        count_apr_neg = 0
        count_may_neg= 0
        count_jun_neg = 0
        count_jul_neg = 0
        count_aug_neg = 0 
        count_sep_neg = 0
        count_oct_neg = 0
        count_nov_neg= 0
        count_dec_neg = 0
    
        for value in list_of_indexes_negative_month:
            if value == 1:
                count_jan_neg = count_jan_neg + 1
            if value == 2:
                count_feb_neg = count_feb_neg + 1        
            if value == 3:
                count_mar_neg = count_mar_neg + 1
            if value == 4:
                count_apr_neg = count_apr_neg + 1        
            if value == 5:
                count_may_neg = count_may_neg + 1 
            if value == 6:
                count_jun_neg = count_jun_neg + 1
            if value == 7:
                count_jul_neg = count_jul_neg + 1    
            if value == 8:
                count_aug_neg = count_aug_neg + 1 
            if value == 9:
                count_sep_neg = count_sep_neg + 1
            if value == 10:
                count_oct_neg = count_oct_neg + 1
            if value == 11:
                count_nov_neg = count_nov_neg + 1        
            if value == 12:
                count_dec_neg = count_dec_neg + 1
                
        jan_list_negative.append(count_jan_neg)
        feb_list_negative.append(count_feb_neg)
        mar_list_negative.append(count_mar_neg)
        apr_list_negative.append(count_apr_neg)
        may_list_negative.append(count_may_neg)
        jun_list_negative.append(count_jun_neg)
        jul_list_negative.append(count_jul_neg)
        aug_list_negative.append(count_aug_neg)
        sep_list_negative.append(count_sep_neg)
        oct_list_negative.append(count_oct_neg)
        nov_list_negative.append(count_nov_neg)
        dec_list_negative.append(count_dec_neg)
    
        count_0_nan = 0
        count_3_nan = 0
        count_6_nan = 0
        count_9_nan = 0
        count_12_nan = 0
        count_15_nan = 0
        count_18_nan = 0
        count_21_nan = 0
    
        for value in list_of_nan:
            if value == 0:
                count_0_nan = count_0_nan + 1
            if value == 3:
                count_3_nan = count_3_nan + 1        
            if value == 6:
                count_6_nan = count_6_nan+ 1
            if value == 9:
                count_9_nan = count_9_nan + 1        
            if value == 12:
                count_12_nan = count_12_nan + 1    
            if value == 15:
                count_15_nan = count_15_nan + 1
            if value == 18:
                count_18_nan = count_18_nan + 1    
            if value == 21:
                count_21_nan = count_21_nan + 1  
    
        count_jan_nan = 0
        count_feb_nan = 0
        count_mar_nan = 0
        count_apr_nan = 0
        count_may_nan = 0
        count_jun_nan = 0
        count_jul_nan = 0
        count_aug_nan = 0
        count_sep_nan = 0
        count_oct_nan = 0
        count_nov_nan = 0
        count_dec_nan = 0
    
    
        for value in list_of_nan_month:
            if value == 1:
                count_jan_nan = count_jan_nan + 1
            if value == 2:
                count_feb_nan = count_feb_nan + 1        
            if value == 3:
                count_mar_nan = count_mar_nan+ 1
            if value == 4:
                count_apr_nan = count_apr_nan + 1        
            if value == 5:
                count_may_nan = count_may_nan + 1    
            if value == 6:
                count_jun_nan = count_jun_nan + 1
            if value == 7:
                count_jul_nan = count_jul_nan + 1    
            if value == 8:
                count_aug_nan = count_aug_nan + 1  
            if value == 9:
                count_sep_nan = count_sep_nan + 1    
            if value == 10:
                count_oct_nan = count_oct_nan + 1
            if value == 11:
                count_nov_nan = count_nov_nan + 1    
            if value == 12:
                count_dec_nan = count_dec_nan + 1 
    
        list_negatives = [count_0_neg, count_3_neg, count_6_neg, count_9_neg, count_12_neg, count_15_neg, count_18_neg ,count_21_neg]
        list_positives= [count_0, count_3, count_6,count_9, count_12, count_15, count_18,count_21]
        list_nan = [count_0_nan, count_3_nan, count_6_nan, count_9_nan, count_12_nan, count_15_nan, count_18_nan ,count_21_nan]
        
    
    
        #here by month:
        list_positives_month = [count_jan, count_feb, count_mar, count_apr, count_may, count_jun, count_jul, count_aug, count_sep, count_oct, count_nov, count_dec]
        list_negatives_month = [count_jan_neg, count_feb_neg, count_mar_neg, count_apr_neg, count_may_neg, count_jun_neg, count_jul_neg, count_aug_neg, count_sep_neg, count_oct_neg, count_nov_neg, count_dec_neg]
        list_nan_month = [count_jan_nan, count_feb_nan, count_mar_nan, count_apr_nan, count_may_nan, count_jun_nan, count_jul_nan, count_aug_nan, count_sep_nan, count_oct_nan, count_nov_nan, count_dec_nan]
    
        #here count for each month (total footprints by month to use in % calculation):
        count_month_list = [count_jan_month, count_feb_month, count_mar_month, count_apr_month, count_may_month, count_jun_month, count_jul_month, count_aug_month, count_sep_month, count_oct_month, count_nov_month, count_dec_month]
    
        total_negatives = 0
        total_positives = 0
    
        for value in list_negatives:
            total_negatives=total_negatives + int(value)
    
        for value in list_positives:
            total_positives = total_positives + int(value)
    
    
    
        #changed here... don't want to display empty graphs. 
        if total_negatives>0 or total_positives>0:
    
    
            #MAKING bar graph for hour of the day:       
            count_positives = np.array((count_0, count_3, count_6, count_9, count_12, count_15, count_18, count_21))
            count_negatives = np.array((count_0_neg, count_3_neg, count_6_neg, count_9_neg, count_12_neg, count_15_neg, count_18_neg, count_21_neg))
            count_nan = np.array((count_0_nan, count_3_nan, count_6_nan, count_9_nan, count_12_nan, count_15_nan, count_18_nan ,count_21_nan))
    
            count_nan_total=sum(count_nan)
            #how many values there are:
            N = len(count_positives)
    
            ind = np.arange(N)    
            width = 0.35     
    
    
            matplotlib.rcParams.update({'font.size': 17})
            figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
            ax = plt.subplot(111)
            ax.yaxis.grid(True)
    
            p1 = plt.bar(ind, count_positives, width, color='red')
            p2 = plt.bar(ind, count_negatives, width, bottom=count_positives, color='green')
            #p3 = plt.bar(ind,count_nan, width, bottom=count_positives+count_negatives, color= 'grey')
    
            plt.ylabel('Count')
    
            plt.xlabel('Hour of the day (count NaN)')
    
            #for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
            plt.title(station + '\n' + 'Model data differences greater than ' + str(number_standard_deviations) + ' standard deviation(s): +-' + str("%.2f" % thershold) + ' ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                     + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
                     'Footprints within date range: ' + str(count)+ '\n' + 'Count NaN: ' + str(count_nan_total))
            plt.xticks(ind, ('0 (' + str(count_nan[0]) + ')', '3 (' + str(count_nan[1]) + ')', '6 (' + str(count_nan[2]) + ')', '9 (' + str(count_nan[3]) + ')', '12 (' + str(count_nan[4]) + ')', '15 (' + str(count_nan[5]) + ')', '18 (' + str(count_nan[6]) + ')', '21 (' + str(count_nan[7]) + ')'), rotation='vertical')
    
    
            plt.legend((p1[0], p2[0]), ('>' + str(thershold) +  ' ppm over: ' + str(sum(count_positives)) , '>' + str(thershold_neg) + 'ppm below: ' + str(sum(count_negatives))))
    
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
            #pylab.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
    
            plt.show()
    
    
            #NEW: graph for by month:
    
            count_positives_month = np.array((count_jan, count_feb, count_mar, count_apr, count_may, count_jun, count_jul, count_aug, count_sep, count_oct, count_nov, count_dec))
            count_negatives_month = np.array((count_jan_neg, count_feb_neg, count_mar_neg, count_apr_neg, count_may_neg, count_jun_neg, count_jul_neg, count_aug_neg, count_sep_neg, count_oct_neg, count_nov_neg, count_dec_neg))
            count_nan_month = np.array((count_jan_nan, count_feb_nan, count_mar_nan, count_apr_nan, count_may_nan, count_jun_nan, count_jul_nan, count_aug_nan, count_sep_nan, count_oct_nan, count_nov_nan, count_dec_nan))
            
            
            figure(num=None, figsize=(11, 8), dpi=80, facecolor='w', edgecolor='k')
            matplotlib.rcParams.update({'font.size': 17})
    
            N = len(count_positives_month)
            ax = plt.subplot(111)
            ax.yaxis.grid(True)
    
            ind = np.arange(N)    
            width = 0.35       
    
            p1 = plt.bar(ind, count_positives_month, width, color='red')
            p2 = plt.bar(ind, count_negatives_month, width, bottom=count_positives_month, color='blue')
            #p3 = plt.bar(ind,count_nan_month, width, bottom=count_positives_month+count_negatives_month, color= 'grey')
    
            plt.ylabel('Count')
    
            plt.xlabel('Month (count NaN)')
    
            #for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
            plt.title(station + '\n' + 'Model data differences greater than ' + str(number_standard_deviations) + ' standard deviation(s): +-' + str("%.2f" % thershold) + ' ppm' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                     + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n'\
                     'Footprints within date range: ' + str(count)+ '\n' + 'Count NaN: ' + str( count_nan_total))
            plt.xticks(ind, ('Jan (' + str(count_nan_month[0]) + ')', 'Feb (' + str(count_nan_month[1]) + ')', 'Mar (' + str(count_nan_month[2]) + ')', 'Apr (' + str(count_nan_month[3]) + ')', 'May (' + str(count_nan_month[4]) + ')', 'Jun (' + str(count_nan_month[5]) + ')', 'Jul (' + str(count_nan_month[6]) + ')', 'Aug (' + str(count_nan_month[7]) + ')', 'Sep (' + str(count_nan_month[8]) + ')', 'Oct (' + str(count_nan_month[9]) + ')', 'Nov (' + str(count_nan_month[10]) + ')', 'Dec (' + str(count_nan_month[11]) + ')'), rotation='vertical')
    
            plt.legend((p1[0], p2[0]), ('>' + str(thershold) +  ' ppm over: ' + str(sum(count_positives)) , '>' + str(thershold_neg) + 'ppm below: ' + str(sum(count_negatives))))
    
            plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
            #pylab.legend(loc=9, bbox_to_anchor=(0.5, -0.1))
    
            plt.show()
       
        else:
            print ('Modeled concentrations are never disagreeing with measured concentrations above or below threshold ', threshold)
    
        index_station=index_station+1
    
    #aggregated bar all stations at the end.
    #one bar for overestimation ("positive") and one for under estimation ("negatives")
    
    #overestimating more than threshold by month
    y_pos = np.arange(len(list_stations))
    figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.subplot(111)
    p1 = ax.bar(y_pos-0.3, jan_list_positive ,width=0.03,color='r',align='center')
    p2 = ax.bar(y_pos-0.25, feb_list_positive,width=0.03,color='b',align='center')
    p3 = ax.bar(y_pos-0.20, mar_list_positive,width=0.03,color='b',align='center')
    p4 = ax.bar(y_pos-0.15, apr_list_positive,width=0.03,color='b',align='center')
    p5 = ax.bar(y_pos-0.10, may_list_positive,width=0.03,color='b',align='center')
    p6 = ax.bar(y_pos-0.05, jun_list_positive,width=0.03,color='b',align='center')
    p7 = ax.bar(y_pos+0.0, jul_list_positive,width=0.03,color='b',align='center')
    p8 = ax.bar(y_pos+0.05, aug_list_positive,width=0.03,color='b',align='center')
    p9 = ax.bar(y_pos+0.10, sep_list_positive,width=0.03,color='b',align='center')
    p10 = ax.bar(y_pos +0.15, oct_list_positive,width=0.03,color='b',align='center')
    p11 = ax.bar(y_pos+0.20, nov_list_positive,width=0.03,color='b',align='center')
    p12 = ax.bar(y_pos+0.25, dec_list_positive,width=0.03,color='g',align='center')
    ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0], p9[0], p10[0], p11[0], p12[0]), ('Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), ncol=2)
    
    ax.yaxis.grid(True)
    plt.xticks(y_pos, list_stations, rotation='vertical')
    plt.axhline(0, color='black', lw=2)
    
    #get the index of the last date (start at 0) to access it in the titles
    date_index_number = (len(date_range) - 1)
    plt.title('Count model data difference greater than ' + str(number_standard_deviations) + ' standard deviation(s) over' + '\n'  +  str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                         + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
             'Hour(s): all')
    plt.ylabel('count')
    plt.show()
    
    #underestimation more than threshold by month
    y_pos = np.arange(len(list_stations))
    figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.subplot(111)
    p1 = ax.bar(y_pos-0.3, jan_list_negative ,width=0.03,color='r',align='center')
    p2 = ax.bar(y_pos-0.25, feb_list_negative,width=0.03,color='b',align='center')
    p3 = ax.bar(y_pos-0.20, mar_list_negative,width=0.03,color='b',align='center')
    p4 = ax.bar(y_pos-0.15, apr_list_negative,width=0.03,color='b',align='center')
    p5 = ax.bar(y_pos-0.10, may_list_negative,width=0.03,color='b',align='center')
    p6 = ax.bar(y_pos-0.05, jun_list_negative,width=0.03,color='b',align='center')
    p7 = ax.bar(y_pos+0.0, jul_list_negative,width=0.03,color='b',align='center')
    p8 = ax.bar(y_pos+0.05, aug_list_negative,width=0.03,color='b',align='center')
    p9 = ax.bar(y_pos+0.10, sep_list_negative,width=0.03,color='b',align='center')
    p10 = ax.bar(y_pos +0.15, oct_list_negative,width=0.03,color='b',align='center')
    p11 = ax.bar(y_pos+0.20, nov_list_negative,width=0.03,color='b',align='center')
    p12 = ax.bar(y_pos+0.25, dec_list_negative,width=0.03,color='g',align='center')
    ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0], p9[0], p10[0], p11[0], p12[0]), ('Jan', 'Feb', 'Mar', 'Apr','May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), ncol=2)
    
    ax.yaxis.grid(True)
    plt.xticks(y_pos, list_stations, rotation='vertical')
    plt.axhline(0, color='black', lw=2)
    
    #get the index of the last date (start at 0) to access it in the titles
    date_index_number = (len(date_range) - 1)
    plt.title('Count model data difference greater than ' + str(number_standard_deviations) + ' standard deviation(s) below' + '\n'  +  str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                         + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
             'Hour(s): all')
    plt.ylabel('count')
    plt.show()
    
    #overestimating more than threshold by hour
    y_pos = np.arange(len(list_stations))
    figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.subplot(111)
    p1 = ax.bar(y_pos-0.4, zero_list_positive ,width=0.03,color='r',align='center')
    p2 = ax.bar(y_pos-0.3, three_list_positive,width=0.03,color='g',align='center')
    p3 = ax.bar(y_pos-0.2, six_list_positive,width=0.03,color='g',align='center')
    p4 = ax.bar(y_pos-0.1, nine_list_positive,width=0.03,color='g',align='center')
    p5 = ax.bar(y_pos+0, twelve_list_positive,width=0.03,color='g',align='center')
    p6 = ax.bar(y_pos +0.1, fifteen_list_positive,width=0.03,color='g',align='center')
    p7 = ax.bar(y_pos+0.2, eighteen_list_positive,width=0.03,color='g',align='center')
    p8 = ax.bar(y_pos+0.3, twentyone_list_positive,width=0.03,color='y',align='center')
    
    ax.yaxis.grid(True)
    ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0]), ('0', '3', '6', '9','12', '15', '18', '21') )
    
    plt.xticks(y_pos, list_stations, rotation='vertical')
    plt.axhline(0, color='black', lw=2)
    
    #get the index of the last date (start at 0) to access it in the titles
    date_index_number = (len(date_range) - 1)
    plt.title('Count model data difference greater than ' + str(number_standard_deviations) + ' standard deviation(s) over' + '\n'  +  str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                         + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
             'Hour(s): all')
    plt.ylabel('count')
    plt.show()
    
    #underestimating more than threshold by hour
    y_pos = np.arange(len(list_stations))
    figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
    ax = plt.subplot(111)
    p1 = ax.bar(y_pos-0.4, zero_list_negative ,width=0.03,color='r',align='center')
    p2 = ax.bar(y_pos-0.3, three_list_negative,width=0.03,color='g',align='center')
    p3 = ax.bar(y_pos-0.2, six_list_negative,width=0.03,color='g',align='center')
    p4 = ax.bar(y_pos-0.1, nine_list_negative,width=0.03,color='g',align='center')
    p5 = ax.bar(y_pos+0, twelve_list_negative ,width=0.03,color='g',align='center')
    p6 = ax.bar(y_pos +0.1, fifteen_list_negative ,width=0.03,color='g',align='center')
    p7 = ax.bar(y_pos+0.2, eighteen_list_negative,width=0.03,color='g',align='center')
    p8 = ax.bar(y_pos+0.3, twentyone_list_negative,width=0.03,color='y',align='center')
    
    ax.yaxis.grid(True)
    ax.legend((p1[0], p2[0], p3[0], p4[0], p5[0], p6[0], p7[0], p8[0]), ('0', '3', '6', '9','12', '15', '18', '21'))
    
    plt.xticks(y_pos, list_stations, rotation='vertical')
    plt.axhline(0, color='black', lw=2)
    
    #get the index of the last date (start at 0) to access it in the titles
    date_index_number = (len(date_range) - 1)
    plt.title('Count model data difference greater than ' + str(number_standard_deviations) + ' standard deviation(s) under' + '\n'  +  str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
                         + ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' +\
             'Hour(s): all')
    plt.ylabel('count')
    plt.show()
    
    Write number of standard deviations for display in graph (ex '1' or '2'):  1
    

    Back to top

    Relative footprint maps: areas associated with overestimates or areas associated with underestimates

    In this cell, the footprints are subset into footprints that have a model data difference over and under one standard deviation. The resulting subsets are divided by the average footprint for year 2017 at the stations and multiplied by 100. This give a percentage value for each footprint cell in the resulting relative footprint map. A value over 100 means that the cell has seen more sensitivity when the model over- or underestimated the concentrations at the stations (separate maps).
    A future development will be the possibility to export the date and times of the footprints with over- or underestimates to be used in the other Notebooks. Also, exporting the date and times of the "better performing" footprints is another option.

    In [39]:
    index_station=0
    
    
    dates_over=[]
    date_range[index_in_diff]
    pathFP='/opt/stiltdata/fsicos2/stiltweb/'
    
    
    
    #list_stations rather than all_stations in a few places
    for station in list_stations:
        
        
        count_diff_neg=0
        
        count_diff_pos=0
        
        index_in_diff=0
        count_all=0
    
        for value in list_of_list_diff_measurements_stilt_not_abs[index_station]:
            filename=(pathFP+'slots/'+stations[station]['locIdent']+'/'+str(date_range[index_in_diff].year)+'/'+str(date_range[index_in_diff].month).zfill(2)+'/'
                    +str(date_range[index_in_diff].year)+'x'+str(date_range[index_in_diff].month).zfill(2)+'x'+str(date_range[index_in_diff].day).zfill(2)+'x'+str(date_range[index_in_diff].hour).zfill(2)+'/foot')
    
            #for title. Not all stations in all_stations necessarily "valid" (both modeled and measured results)
            current_station_for_title=station[0:3]
            current_station=station
    
            #the loop over all footprints:
            if math.isnan(value)== False:
                if count_all==0:
    
                    f_fp = cdf.Dataset(filename)
                    fp_all_summed=f_fp.variables['foot'][:,:,:]
                    lon=f_fp.variables['lon'][:]
                    lat=f_fp.variables['lat'][:]
                    count_all=count_all +1
    
                else:
                    #no need to define lat and lon over and over ^
                    f_fp = cdf.Dataset(filename)
    
                    fp=f_fp.variables['foot'][:,:,:]
    
                    fp_all_summed=fp_all_summed + fp
    
                    count_all=count_all+1
    
            #possibly change
            stdev= np.nanstd(list_of_list_diff_measurements_stilt_not_abs[index_station])
            
            #when under - do not forget minus! make interactive. 
            if value<-stdev:
    
                if os.path.isfile(filename):
    
                    if count_diff_neg==0:
                        f_fp = cdf.Dataset(filename)
                        fp_summed_under=f_fp.variables['foot'][:,:,:]
                        #lon=f_fp.variables['lon'][:]
                        #lat=f_fp.variables['lat'][:]
                        count_diff_neg=count_diff_neg +1
    
                    else:
    
                        f_fp = cdf.Dataset(filename)
    
    
                        fp=f_fp.variables['foot'][:,:,:]
    
    
    
                        fp_summed_under=fp_summed_under + fp
                        count_diff_neg=count_diff_neg +1
    
    
                else:
                    print ('file does not exist: ',filename)
             
            #overestimates
            if value>stdev:
    
                if os.path.isfile(filename):
    
                    if count_diff_pos==0:
                        f_fp = cdf.Dataset(filename)
                        fp_summed_over=f_fp.variables['foot'][:,:,:]
                        #lon=f_fp.variables['lon'][:]
                        #lat=f_fp.variables['lat'][:]
                        count_diff_pos=count_diff_pos +1
    
                    else:
    
                        f_fp = cdf.Dataset(filename)
    
    
                        fp=f_fp.variables['foot'][:,:,:]
    
    
                        fp_summed_over=fp_summed_over + fp
                        count_diff_pos=count_diff_pos +1
    
    
                else:
                    print ('file does not exist: ',filename)
            
            #future potential: subset all "good values"
            #if value>-stdev and value<stdev:
                #dates_within_one_std.append(date_range[index_in_diff])
                
                    
            #adding for every value in diff_measurement_stilt_best_not_abs (one for each footprint in defined date range)  
            index_in_diff=index_in_diff+1
    
        average_under=fp_summed_under/count_diff_neg
        
        average_over=fp_summed_over/count_diff_pos
    
        average_all=fp_all_summed/count_all
    
        #if it is the same it will be 0... 
        #what where average_under is bigger. + value means more of underestimation
    
    
        #diff= average_under-average_all
    
        diff_under=(average_under/average_all)*100
        
        diff_over=(average_over/average_all)*100
        
        date_index_number = (len(date_range) - 1)
    
        #unit before: ppm / ($\mu$mol / m$^{2}$s)
    
        for_title=(str(current_station_for_title) + ' association with underestimation' + '\n' + 'Average over ' + str("%.2f" % np.nanstd(list_of_list_diff_measurements_stilt_not_abs[index_station])) + '(' + str(count_diff_neg) + ')' + \
                   ' - average all valid footprints (' + str(count_all) + ')' + '\n' + \
                   str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day) + ' to ' + str(date_range[date_index_number].year) +\
                   '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' + 'Hour(s): all')
        
        plot_maps_diff(diff_under, lon, lat, title=for_title, label='deviation from average', unit='[%]', linlog='linear', station=[current_station], zoom=current_station)
        
        for_title_over=(str(current_station_for_title) + ' association with overestimation' + '\n' + 'Average over ' + str("%.2f" % np.nanstd(list_of_list_diff_measurements_stilt_not_abs[index_station])) + '(' + str(count_diff_pos) + ')' + \
                   ' - average all valid footprints (' + str(count_all) + ')' + '\n' + \
                   str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day) + ' to ' + str(date_range[date_index_number].year) +\
                   '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ '\n' + 'Hour(s): all')
        
        plot_maps_diff(diff_over, lon, lat, title=for_title_over, label='deviation from average', unit='[%]', linlog='linear', station=[current_station], zoom=current_station)
    
        index_station=index_station+1
    
    #potential for future (make for all stations! --> save in seperate lists)
    #pickle date range for use in other notebooks.
    #f = open("custom_date_range.pkl","wb")
    #pickle.dump(dates_over,f)
    #f.close()