%run ~/STILT_modules_v2.5.ipynb
%run ~/ICOS_atmObs_modules_v2.5.ipynb
%run ~/for_thesis.ipynb
stations = create_STILT_dictionary()
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import matplotlib
import math
import numpy as np
import numpy
from mpl_toolkits.basemap import Basemap
import xlrd
import math
from netCDF4 import Dataset
#path to footprints used many places:
pathFP='/opt/stiltdata/fsicos2/stiltweb/'
list_stations_for_analysis=select_stations()
It is also possible to write the name of one, or several, more stations to base the analysis on
only_selection_stations=list_stations_for_analysis.value
all_stations=[]
for value in only_selection_stations:
all_stations.append(value)
additional_station=input('Write additional station(s) here, otherwise leave blank. If several, seperate by whitespace: ')
list_additional_station = additional_station.split()
for item in list_additional_station:
all_stations.append(str(item))
print('Your selection is: ', all_stations)
icos_stations = [ist[0:3] for ist in all_stations]
ICOS_files = get_ICOS_filename(icos_stations,download=True)
date_range, timeselect, start_date, end_date = date_range_date_hour()
date_index_number = (len(date_range) - 1)
This cell aims to look at how much of the sensitivity at the stations are within user defined distances.
Equirectangular approximation is used to calculate distances to the STILT cells from the stations to determine if the cell values should be included or not
The user defines three distances and the sensitivity within those distances are added up and displayed in bar graphs.
The two output bar graphs contain the same data, but the second one the result for each station is stacked.
#save in a dictonary:
dictionary_sensitivity_area={}
import time
#user put in distances
distances_pbl = input("Choose THREE distances from the station to check influence, print smallest to largest. Space between each entry: ")
update_dictionaries=input('Do you want to update the dictornaries with the current selections? yes for yes')
distances_pbl_list = distances_pbl.split()
distances_pbl_list = [int(a) for a in distances_pbl_list]
start = time.time()
#excel file with the cols/rows of stations.
ExcelFileName= 'stilt_stations_col_row.xls'
workbook = xlrd.open_workbook(ExcelFileName)
worksheet = workbook.sheet_by_name('stilt_stations_col_row')
num_rows = worksheet.nrows
num_cols = worksheet.ncols
#for each of the stations, add to these lists:
station_list=[]
average_footprint_1_list=[]
average_footprint_2_list=[]
average_footprint_3_list=[]
#the total of all cells
averaged_summed_footprint_list=[]
#just for the lat/lon values:
f_fp= Dataset('final_netcdf_point_source_emission.nc')
dictionary_sensitivity_area['info']={}
dictionary_sensitivity_area['info']['date_min']=(str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day))
dictionary_sensitivity_area['info']['date_max']=(str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day))
dictionary_sensitivity_area['info']['hours']=(str(timeselect))
dictionary_sensitivity_area['info']['stations']=all_stations
dictionary_sensitivity_area['info']['distances']=distances_pbl_list
#loop over all selected stations:
my_iterator=iter(all_stations)
for ist in my_iterator:
dictionary_sensitivity_area[ist]={}
dictionary_sensitivity_area[ist]['percent']={}
dictionary_sensitivity_area[ist]['percent']={}
dictionary_sensitivity_area[ist]['percent']={}
dictionary_sensitivity_area[ist]['absolute']={}
dictionary_sensitivity_area[ist]['absolute']={}
dictionary_sensitivity_area[ist]['absolute']={}
#these are to put distances to cells in. Min found. Needed to do in same loop for checking all three distances. --> probably only need the max!
value_1_row=[]
value_2_row=[]
value_3_row=[]
value_1_col=[]
value_2_col=[]
value_3_col=[]
count = 0
within_distance_1_total=0
within_distance_2_total=0
within_distance_3_total=0
#probably remove
summed_footprint_total = 0
#for each of the dates for the current station, add to these lists:
date_list=[]
percent_1_list=[]
percent_2_list=[]
percent_3_list=[]
within_distance_1_list=[]
within_distance_2_list=[]
within_distance_3_list=[]
summed_footprint_list=[]
#this is to locate the station column/row (=also lat/lon) in the excel file:
for curr_row in range(0, num_rows, 1):
#the station names are in the second (index 1) column:
if worksheet.cell_value(curr_row, 1) == ist:
#the "matched station" associated column and row in the footprint matrix is in column 3 (index 2) and column 4 (index 3)
station_col = worksheet.cell_value(curr_row, 2)
station_row = worksheet.cell_value(curr_row, 3)
#point one (measure from) is the one of the station. Measure from the "bottom" right corner of the station cell?
#measure to the bottom right in that case, so maybe that is fine.
#there is code somewhere to "move" the lat/lon dimensions halfway though the cell in each direction otherwise.
lon1= f_fp.variables['lon'][station_col]
lat1= f_fp.variables['lat'][station_row]
#how many cells away from the station cell. Which one to loop to.
for value in range(1,399,1):
#station col+value must be within STILT domain.
if (station_col+value)>399:
break
#check how many cells away, by checking distance to the furthest away value:
#lat2=lat1 --> same row (lat1+lat1 in the equation). Just moving along the different columns, check radius of circle.
#add if over 399 or whatever... break. have to send an message too
#check distance in longitude (column... only one that changes here).
lon2=f_fp.variables['lon'][station_col+value]
R = 6373.8
#latitude remain same.... just moving to furthest away points
x = math.radians(lon1-lon2)*math.cos(math.radians(lat1+lat1)/2)
y=math.radians(lat1-lat1)
distance_col = math.sqrt((x*x)+(y*y)) * R
if distance_col>int(distances_pbl_list[2]):
value_3_col.append(value)
break
#these are the values that decide how "far away" to loop to the furthest end of the circle.
#the lowers value is still "passing" the value, do minus 1 to be within.
loop_to_col_3= (min(value_3_col)-1)
#always the same distance in row --> only need to do it once for all the stations (will change depending on what distances the user puts in of course!)
for value in range(1,479,1):
#make sure staying within the domain
if (station_row+value)>479:
break
lat2=f_fp.variables['lat'][station_row+value]
R = 6373.8
x = math.radians(lon1-lon1)*math.cos(math.radians(lat1+lat2)/2)
y=math.radians(lat1-lat2)
distance_row = math.sqrt((x*x)+(y*y)) * R
if distance_row>int(distances_pbl_list[2]):
value_3_row.append(value)
break
#easiest way to get the summed footprint values: the aggregate. Do not have it for individual footprints.
#used in the presentation, but also to calculate percentage.
#the "fp" here also contains all the cell values that will be added within the different circles to determine the sensitivity.
nfp, fp, lon, lat, title2 = read_aggreg_footprints(ist, date_range, timeselect=timeselect)
summed_footprint= fp[0].sum()
loop_to_row_3= (min(value_3_row)-1)
#Have a "square" defined in which the "circle" around the station is.
#loop over all the cells in the square and add to the correct "within distance" the user defined.
within_distance_1= 0
within_distance_2= 0
within_distance_3= 0
#loop over all the cells in the square.
#range, looping until the STOP of the range, but not including--> +1.
for row in range(int(station_row-loop_to_row_3), (int(station_row+loop_to_row_3)+1), 1):
for col in range(int(station_col-loop_to_col_3), (int(station_col + loop_to_col_3)+1),1):
#add here --> check distance to the cell within the rectangle.
lon3=f_fp.variables['lon'][col]
#now, moving in different latitude directions also, not just on a line like when determining the square.
lat3=f_fp.variables['lat'][row]
x = math.radians(lon1-lon3)*math.cos(math.radians(lat1+lat3)/2)
y=math.radians(lat1-lat3)
distance_within_rectangle_1 = math.sqrt((x*x)+(y*y)) * R
#if less than this! --> add
if distance_within_rectangle_1<int(distances_pbl_list[0]):
within_distance_1= within_distance_1 + fp[0][row][col]
if distance_within_rectangle_1<int(distances_pbl_list[1]):
within_distance_2= within_distance_2 + fp[0][row][col]
if distance_within_rectangle_1<int(distances_pbl_list[2]):
within_distance_3= within_distance_3 + fp[0][row][col]
#percent, used only in the map representations commented out now, can change and use.
if within_distance_1 > 0 and summed_footprint>0:
percent_1= (within_distance_1/summed_footprint)*100
else:
percent_1=0
if within_distance_2 > 0 and summed_footprint>2:
percent_2= (within_distance_2/summed_footprint)*100
else:
percent_2=0
if within_distance_3 > 0 and summed_footprint>0:
percent_3= (within_distance_3/summed_footprint)*100
else:
percent_3=0
average_footprint_1_list.append(within_distance_1)
average_footprint_2_list.append(within_distance_2)
average_footprint_3_list.append(within_distance_3)
#put in dictionary
dictionary_sensitivity_area[ist]['percent'][distances_pbl_list[0]]=percent_1
dictionary_sensitivity_area[ist]['percent'][distances_pbl_list[1]]=percent_2
dictionary_sensitivity_area[ist]['percent'][distances_pbl_list[2]]=percent_3
dictionary_sensitivity_area[ist]['absolute'][distances_pbl_list[0]]=within_distance_1
dictionary_sensitivity_area[ist]['absolute'][distances_pbl_list[1]]=within_distance_2
dictionary_sensitivity_area[ist]['absolute'][distances_pbl_list[2]]=within_distance_3
dictionary_sensitivity_area[ist]['absolute']['total']=summed_footprint
averaged_summed_footprint_list.append(summed_footprint)
lat_station= stations[ist]['lat']
lon_station= stations[ist]['lon']
title = ('Average footprint ' + str(ist) + '\n' + str(start_date.year)+ '-'+ str(start_date.month) + '-' + str(start_date.day) + ' to ' + str(end_date.year) + '-' + str(end_date.month) + '-' + str(end_date.day)+ \
', Hour(s): ' + str(timeselect) + '\n' + 'Average sensitivity to whole domain: ' + str("%.2f" % summed_footprint) + '\n' + 'Average within '+ str(distances_pbl_list[0]) + ' km: ' + str("%.2f" % within_distance_1) \
+ ' (' + str("%.2f" % percent_1) + '%)' + '\n' + 'Average within ' + str(distances_pbl_list[1]) + ' km: ' + str("%.2f" % within_distance_2) + ' (' + str("%.2f" % percent_2) + '%)' + '\n' + 'Average within ' +\
str(distances_pbl_list[2]) + 'km: ' + str("%.2f" % within_distance_3) + ' (' + str("%.2f" % percent_3) + '%)')
#plot_maps_population(fp, lon, lat, lon_station, lat_station, title=title, label='surface influence', unit='[ppm / ($\mu$mol / m$^{2}$s)]', linlog='linear', vmin=-6, vmax=-2, station=[ist], zoom=ist)
plot_maps_population(fp, lon, lat, lon_station, lat_station, title=title, label='surface influence', unit='[ppm / ($\mu$mol / m$^{2}$s)]', linlog='linear', station=[ist], zoom=ist)
sorted_list_stations = [x for _,x in sorted(zip(averaged_summed_footprint_list,icos_stations))]
sorted_averaged_summed_footprint_list = [x for _,x in sorted(zip(averaged_summed_footprint_list,averaged_summed_footprint_list))]
sorted_average_footprint_1_list = [x for _,x in sorted(zip(averaged_summed_footprint_list,average_footprint_1_list))]
sorted_average_footprint_2_list = [x for _,x in sorted(zip(averaged_summed_footprint_list,average_footprint_2_list))]
sorted_average_footprint_3_list = [x for _,x in sorted(zip(averaged_summed_footprint_list,average_footprint_3_list))]
matplotlib.rcParams.update({'font.size': 16})
figure(num=None, figsize=(25, 12), dpi=80, facecolor='w', edgecolor='k')
y_pos = np.arange(len(sorted_list_stations))
ax = plt.subplot(111)
p1 = ax.bar(y_pos-0.3, sorted_averaged_summed_footprint_list,width=0.2,color='gainsboro',align='center')
p2 = ax.bar(y_pos-0.1, sorted_average_footprint_3_list,width=0.2,color='darkgrey',align='center')
p3 = ax.bar(y_pos+0.1, sorted_average_footprint_2_list,width=0.2,color='dimgrey',align='center')
p4 = ax.bar(y_pos+0.3, sorted_average_footprint_1_list,width=0.2,color='black',align='center')
ax.legend((p1[0], p2[0], p3[0], p4[0]), ('average summed footprints', 'average within ' + str(distances_pbl_list[2]) + 'km', 'average within ' + str(distances_pbl_list[1]) + 'km', 'average within ' + str(distances_pbl_list[0]) + 'km'))
plt.xticks(y_pos, sorted_list_stations)
plt.ylabel('All footprint cells summed')
date_index_number = (len(date_range) - 1)
plt.title('STILT Model estimated sensitivity to the model domain' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+\
' Hour(s): ' + timeselect)
plt.show()
#stacked bar, same as above in terms of data:
for_within_all=sorted_average_footprint_3_list + sorted_average_footprint_2_list + sorted_average_footprint_1_list
for_within_3= sorted_average_footprint_2_list + sorted_average_footprint_1_list
for_2_reduced=[x - y for x, y in zip(sorted_average_footprint_2_list,sorted_average_footprint_1_list)]
for_3_reduced=[x - y for x, y in zip(sorted_average_footprint_3_list,for_within_3)]
for_all_reduced= [x - y for x, y in zip(sorted_averaged_summed_footprint_list, for_within_all)]
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
y_pos = np.arange(len(sorted_list_stations))
ax = plt.subplot(111)
p1= plt.bar(y_pos, for_all_reduced, bottom= sorted_average_footprint_3_list, width=0.25,color='gainsboro',align='center')
p2= plt.bar(y_pos, for_3_reduced, bottom=sorted_average_footprint_2_list, width=0.25,color='darkgrey',align='center')
p3= plt.bar(y_pos, for_2_reduced, bottom=sorted_average_footprint_1_list, width=0.25,color='dimgrey',align='center')
p4= plt.bar(y_pos, sorted_average_footprint_1_list,width=0.25,color='black',align='center')
ax.legend((p1[0], p2[0], p3[0], p4[0]), ('average summed footprints', 'average within ' + str(distances_pbl_list[2]) + 'km', 'average within ' + str(distances_pbl_list[1]) + 'km', 'average within ' + str(distances_pbl_list[0]) + 'km'))
plt.xticks(y_pos, sorted_list_stations, rotation='vertical')
plt.ylabel('All footprint cells summarized')
plt.title('STILT Model estimated sensitivity to the model domain' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+\
' Hour(s): ' + timeselect)
plt.show()
import pickle
end = time.time()
#print(end - start)
if update_dictionaries=='yes':
#dict = dictionary_point_source
f = open("sensitivity_area_evaluation_custom.pkl","wb")
pickle.dump(dictionary_sensitivity_area,f)
f.close()
In this cell, the stations are ranked in terms of their percent sensitivity to the outermost part of the domain, defined as the rows and columns with the highest and lowest values
list_valid_stations=[]
list_percent_outer_by_station=[]
my_iterator=iter(all_stations)
for station in my_iterator:
count=0
tot_by_station=0
tot_whole_by_station=0
for date in date_range:
filename=(pathFP+'slots/'+stations[station]['locIdent']+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
+str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/foot')
if os.path.isfile(filename):
#access the footprint given the date/time.
f_fp = cdf.Dataset(filename)
fp=f_fp.variables['foot'][:,:,:]
#sum of all cells - to calculate percentage
tot_whole_by_date=fp.sum()
bottom=fp[0][0][:].sum()
top=fp[0][479][:].sum()
left=fp[0][:][0].sum()
right=fp[0][:][399].sum()
tot_by_date=bottom+top+left+right
tot_whole_by_station=tot_whole_by_station+tot_whole_by_date
tot_by_station=tot_by_station+tot_by_date
#for average
count=count+1
if count>0:
list_valid_stations.append(station)
average_whole_by_station=tot_whole_by_station/count
average_by_station=tot_by_station/count
percent_outer_by_station=(average_by_station/average_whole_by_station)*100
list_percent_outer_by_station.append(percent_outer_by_station)
sorted_list_valid_stations = [x for _,x in sorted(zip(list_percent_outer_by_station,list_valid_stations))]
sorted_list_percent_by_station=[x for _,x in sorted(zip(list_percent_outer_by_station,list_percent_outer_by_station))]
matplotlib.rcParams.update({'font.size': 19})
x_pos = numpy.arange(len(sorted_list_valid_stations))
fig, ax = plt.subplots(figsize=(25,10))
p1 = ax.bar(x_pos, sorted_list_percent_by_station,color='tomato',align='center')
ax.set_ylabel('Outer cells % of total sensitivity')
ax.set_xticks(x_pos)
ax.set_xticklabels(sorted_list_valid_stations,rotation='vertical')
ax.set_title('Average % sensitivity to the outermost cell rows and columns')
ax.yaxis.grid(True)
plt.show()
Here, the sensitivity values of each individual footprint are considered. The user defines a distance within which the sensitivity will be summarized. The values are in
turn displayed in a time series as well as sorted by hour and month to display averages in bar graphs. The same is true for the footprints sensitivities to the whole domain.
One final aspect considered is the relationship between the total sensitivity values and the anthropogenic component for each of the footprint. This is set up in a linear regression model.
Resulting R2 values (how much of the variation in the anthropogenic component can be explained by the difference in total sensitivity) are many times high, indicating that the two variables have a positive correlation.
distances_pbl = input("Choose the distance to check for time series (km): ")
distances_pbl_list = distances_pbl.split()
distances_pbl_list = [int(a) for a in distances_pbl_list]
#excel file with the cols/rows of stations.
ExcelFileName= 'stilt_stations_col_row.xls'
workbook = xlrd.open_workbook(ExcelFileName)
worksheet = workbook.sheet_by_name('stilt_stations_col_row')
num_rows = worksheet.nrows
num_cols = worksheet.ncols
#for each of the stations, add to these lists:
station_list=[]
my_iterator=iter(all_stations)
#just for the lat/lon values:
f_fp= Dataset('class_522_forgotten.nc')
#for each station...
for ist in my_iterator:
average_footprint_1_list=[]
averaged_summed_footprint_list=[]
list_anthro_value=[]
list_dates_antho=[]
#these are to put distances to cells in. Min found. Needed to do in same loop for checking all three distances.
value_1_row=[]
value_1_col=[]
count = 0
within_distance_1_total=0
summed_footprint_total = 0
#for each of the dates for the current station, add to these lists:
date_list=[]
percent_1_list=[]
within_distance_1_list=[]
summed_footprint_list=[]
#by month and by hour:
#within defined distance
jan_count=0
jan_tot_list=[]
feb_count=0
feb_tot_list=[]
mar_count=0
mar_tot_list=[]
apr_count=0
apr_tot_list=[]
may_count=0
may_tot_list=[]
jun_count=0
jun_tot_list=[]
jul_count=0
jul_tot_list=[]
aug_count=0
aug_tot_list=[]
sep_count=0
sep_tot_list=[]
oct_count=0
oct_tot_list=[]
nov_count=0
nov_tot_list=[]
dec_count=0
dec_tot_list=[]
#whole stilt domain
jan_count_whole=0
jan_tot_list_whole=[]
feb_count_whole=0
feb_tot_list_whole=[]
mar_count_whole=0
mar_tot_list_whole=[]
apr_count_whole=0
apr_tot_list_whole=[]
may_count_whole=0
may_tot_list_whole=[]
jun_count_whole=0
jun_tot_list_whole=[]
jul_count_whole=0
jul_tot_list_whole=[]
aug_count_whole=0
aug_tot_list_whole=[]
sep_count_whole=0
sep_tot_list_whole=[]
oct_count_whole=0
oct_tot_list_whole=[]
nov_count_whole=0
nov_tot_list_whole=[]
dec_count_whole=0
dec_tot_list_whole=[]
#by hour
count_0=0
tot_list_0=[]
count_3=0
tot_list_3=[]
count_6=0
tot_list_6=[]
count_9=0
tot_list_9=[]
count_12=0
tot_list_12=[]
count_15=0
tot_list_15=[]
count_18=0
tot_list_18=[]
count_21=0
tot_list_21=[]
#whole stilt domain
count_whole_0=0
tot_list_whole_0=[]
count_whole_3=0
tot_list_whole_3=[]
count_whole_6=0
tot_list_whole_6=[]
count_whole_9=0
tot_list_whole_9=[]
count_whole_12=0
tot_list_whole_12=[]
count_whole_15=0
tot_list_whole_15=[]
count_whole_18=0
tot_list_whole_18=[]
count_whole_21=0
tot_list_whole_21=[]
#count nan --> number of nan-values by month (will be in labels for the different months)
count_nan_jan=0
count_nan_feb=0
count_nan_mar=0
count_nan_apr=0
count_nan_may=0
count_nan_jun=0
count_nan_jul=0
count_nan_aug=0
count_nan_sep=0
count_nan_oct=0
count_nan_nov=0
count_nan_dec=0
count_nan_0=0
count_nan_3=0
count_nan_6=0
count_nan_9=0
count_nan_12=0
count_nan_15=0
count_nan_18=0
count_nan_21=0
#this is to locate the station column/row in the excel file:
for curr_row in range(0, num_rows, 1):
#the station names are in the second (index 1) column:
if worksheet.cell_value(curr_row, 1) == ist:
#the "matched station" associated column and row in the footprint matrix is in column 3 (index 2) and column 4 (index 3)
station_col = worksheet.cell_value(curr_row, 2)
station_row = worksheet.cell_value(curr_row, 3)
#point one (measure from) is the one of the station. Measure from the "bottom" right corner of the station cell?
#measure to the bottom right in that case, so maybe that is fine.
#there is code somewhere to "move" the lat/lon dimensions halfway though the cell in each direction otherwise.
lon1= f_fp.variables['lon'][station_col]
lat1= f_fp.variables['lat'][station_row]
#if distances_pbl_list[0]==1000:
# print ('Calculating the total')
#else:
#what column value is the furthest away within the distance?
for value in range(1,399,1):
#station col+value must be within STILT domain.
if (station_col+value)>399:
break
#check how many cells away:
#lat2=lat1 --> same row when checking for the "maximum" away - r of the circle
#add if over 399 or whatever... break. have to send an message too
#check distance in longitude (row... only one that should change. confirm when tool running. move lat to before the loop since same for all stations.)
lon2=f_fp.variables['lon'][station_col+value]
R = 6373.8
x = math.radians(lon1-lon2)*math.cos(math.radians(lat1+lat1)/2)
y=math.radians(lat1-lat1)
distance_col = math.sqrt((x*x)+(y*y)) * R
#only interested in the furthest away?
if distance_col>int(distances_pbl_list[0]):
#then do the min in this list!
value_1_col.append(value)
break
#these are the values that decide how "far away" to loop.
loop_to_col_1= min(value_1_col)
#always the same distance in row --> only need to do it once
for value in range(1,479,1):
#check how many cells away:
if (station_row+value)>479:
break
lat2=f_fp.variables['lat'][station_row+value]
R = 6373.8
x = math.radians(lon1-lon1)*math.cos(math.radians(lat1+lat2)/2)
y=math.radians(lat1-lat2)
distance_row = math.sqrt((x*x)+(y*y)) * R
if distance_row>int(distances_pbl_list[0]):
#then do the min in this list!
value_1_row.append(value)
break
loop_to_row_1= min(value_1_row)
#now that we know how far to loop away, we do it for each data in the date_range (for each station)
#####################################################
#for each footprint in that date range:
for date in date_range:
filename=(pathFP+'slots/'+stations[ist]['locIdent']+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
+str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/foot')
if os.path.isfile(filename):
#access the footprint given the date/time.
f_fp = cdf.Dataset(filename)
date_list.append(date)
#this is new --> check the sensitivity for each seperate footprint
fp=f_fp.variables['foot'][:,:,:]
summed_footprint= fp[0].sum()
#by month
if date.month==1:
jan_count_whole=jan_count_whole+1
jan_tot_list_whole.append(summed_footprint)
if date.month==2:
feb_count_whole=feb_count_whole+1
feb_tot_list_whole.append(summed_footprint)
if date.month==3:
mar_count_whole=mar_count_whole+1
mar_tot_list_whole.append(summed_footprint)
if date.month==4:
apr_count_whole=apr_count_whole+1
apr_tot_list_whole.append(summed_footprint)
if date.month==5:
may_count_whole=may_count_whole+1
may_tot_list_whole.append(summed_footprint)
if date.month==6:
jun_count_whole=jun_count_whole+1
jun_tot_list_whole.append(summed_footprint)
if date.month==7:
jul_count_whole=jul_count_whole+1
jul_tot_list_whole.append(summed_footprint)
if date.month==8:
aug_count_whole=aug_count_whole+1
aug_tot_list_whole.append(summed_footprint)
if date.month==9:
sep_count_whole=sep_count_whole+1
sep_tot_list_whole.append(summed_footprint)
if date.month==10:
oct_count_whole=oct_count_whole+1
oct_tot_list_whole.append(summed_footprint)
if date.month==11:
nov_count_whole=nov_count_whole+1
nov_tot_list_whole.append(summed_footprint)
if date.month==12:
dec_count_whole=dec_count_whole+1
dec_tot_list_whole.append(summed_footprint)
#by hour
if date.hour==0:
count_whole_0=count_whole_0+1
tot_list_whole_0.append(summed_footprint)
if date.hour==3:
count_whole_3=count_whole_3+1
tot_list_whole_3.append(summed_footprint)
if date.hour==6:
count_whole_6=count_whole_6+1
tot_list_whole_6.append(summed_footprint)
if date.hour==9:
count_whole_9=count_whole_9+1
tot_list_whole_9.append(summed_footprint)
if date.hour==12:
count_whole_12=count_whole_12+1
tot_list_whole_12.append(summed_footprint)
if date.hour==15:
count_whole_15=count_whole_15+1
tot_list_whole_15.append(summed_footprint)
if date.hour==18:
count_whole_18=count_whole_18+1
tot_list_whole_18.append(summed_footprint)
if date.hour==21:
count_whole_21=count_whole_21+1
tot_list_whole_21.append(summed_footprint)
within_distance_1= 0
#can maybe define range with the loop to row 3...
#happens once for every date:
for row in range(fp.shape[1]):
for col in range(fp.shape[2]):
if row>(station_row-loop_to_row_1) and row<(station_row+loop_to_row_1+1) and col>(station_col-loop_to_col_1) and col<(station_col+loop_to_col_1+1):
within_distance_1= within_distance_1 + fp[0][row][col]
if date.month==1:
jan_count=jan_count+1
jan_tot_list.append(within_distance_1)
if date.month==2:
feb_count=feb_count+1
feb_tot_list.append(within_distance_1)
if date.month==3:
mar_count=mar_count+1
mar_tot_list.append(within_distance_1)
if date.month==4:
apr_count=apr_count+1
apr_tot_list.append(within_distance_1)
if date.month==5:
may_count=may_count+1
may_tot_list.append(within_distance_1)
if date.month==6:
jun_count=jun_count+1
jun_tot_list.append(within_distance_1)
if date.month==7:
jul_count=jul_count+1
jul_tot_list.append(within_distance_1)
if date.month==8:
aug_count=aug_count+1
aug_tot_list.append(within_distance_1)
if date.month==9:
sep_count=sep_count+1
sep_tot_list.append(within_distance_1)
if date.month==10:
oct_count=oct_count+1
oct_tot_list.append(within_distance_1)
if date.month==11:
nov_count=nov_count+1
nov_tot_list.append(within_distance_1)
if date.month==12:
dec_count=dec_count+1
dec_tot_list.append(within_distance_1)
#by hour
if date.hour==0:
count_0=count_0+1
tot_list_0.append(within_distance_1)
if date.hour==3:
count_3=count_3+1
tot_list_3.append(within_distance_1)
if date.hour==6:
count_6=count_6+1
tot_list_6.append(within_distance_1)
if date.hour==9:
count_9=count_9+1
tot_list_9.append(within_distance_1)
if date.hour==12:
count_12=count_12+1
tot_list_12.append(within_distance_1)
if date.hour==15:
count_15=count_15+1
tot_list_15.append(within_distance_1)
if date.hour==18:
count_18=count_18+1
tot_list_18.append(within_distance_1)
if date.hour==21:
count_21=count_21+1
tot_list_21.append(within_distance_1)
average_footprint_1_list.append(within_distance_1)
averaged_summed_footprint_list.append(summed_footprint)
#also append the
#add to count nan if date is nan:
else:
if date.month==1:
count_nan_jan=count_nan_jan+1
if date.month==2:
count_nan_feb=count_nan_feb+1
if date.month==3:
count_nan_mar=count_nan_mar+1
if date.month==4:
count_nan_apr=count_nan_apr+1
if date.month==5:
count_nan_may=count_nan_may+1
if date.month==6:
count_nan_jun=count_nan_jun+1
if date.month==7:
count_nan_jul=count_nan_jul+1
if date.month==8:
count_nan_aug=count_nan_aug+1
if date.month==9:
count_nan_sep=count_nan_sep+1
if date.month==10:
count_nan_oct=count_nan_oct+1
if date.month==11:
count_nan_nov=count_nan_nov+1
if date.month==12:
count_nan_dec=count_nan_dec+1
#nan by hour
if date.hour==0:
count_nan_0=count_nan_0+1
if date.hour==3:
count_nan_3=count_nan_3+1
if date.hour==6:
count_nan_6=count_nan_6+1
if date.hour==9:
count_nan_9=count_nan_9+1
if date.hour==12:
count_nan_12=count_nan_12+1
if date.hour==15:
count_nan_15=count_nan_15+1
if date.hour==18:
count_nan_18=count_nan_18+1
if date.hour==21:
count_nan_21=count_nan_21+1
#also get standard deviation values!
standard_deviation_within_distance = numpy.nanstd(average_footprint_1_list)
standard_deviation_whole_summed = numpy.nanstd(averaged_summed_footprint_list)
average_within_distance= numpy.nanmean(average_footprint_1_list)
average_deviation_whole_summed= numpy.nanmean(averaged_summed_footprint_list)
#time series: summed footprints within defined distance.
matplotlib.rcParams.update({'font.size': 17})
fig = p.figure(figsize=(25,10))
ax = fig.add_subplot(111)
#HEREE
p.plot(date_list, average_footprint_1_list, linestyle='-',color='g')
p.axhline(0, color='black', lw=1)
date_index_number = (len(date_range) - 1)
p.title('Sensitivity within ' + str(distances_pbl_list[0]) + ' km, by footprint, for station ' + str(ist) + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)\
+ ', Hour(s): ' + timeselect + '\n' + 'Average: ' + str("%.2f" % average_within_distance) + '\n' + 'Standard deviation: ' + str("%.2f" % standard_deviation_within_distance))
#ax.set_xlim(start_date,end_date)
ax.set_ylabel('All footprints summarized ')
ax.grid(axis='x')
p.show()
#prepare data for bar graph to show by month wihtin the certain distances
by_month_averages=[]
by_month_names_for_label=[]
if jan_count>0:
by_month_averages.append(numpy.mean(jan_tot_list))
by_month_names_for_label.append('Jan' + ' (' + str(count_nan_jan) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(jan_tot_list)))
if feb_count>0:
by_month_averages.append(numpy.mean(feb_tot_list))
by_month_names_for_label.append('Feb' + ' (' + str(count_nan_feb) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(feb_tot_list)))
if mar_count>0:
by_month_averages.append(numpy.mean(mar_tot_list))
by_month_names_for_label.append('Mar' + ' (' + str(count_nan_mar) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(mar_tot_list)))
if apr_count>0:
by_month_averages.append(numpy.mean(apr_tot_list))
by_month_names_for_label.append('Apr' + ' (' + str(count_nan_apr) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(apr_tot_list)))
if may_count>0:
by_month_averages.append(numpy.mean(may_tot_list))
by_month_names_for_label.append('May' + ' (' + str(count_nan_apr) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(apr_tot_list)))
if jun_count>0:
by_month_averages.append(numpy.mean(jun_tot_list))
by_month_names_for_label.append('Jun' + ' (' + str(count_nan_jun) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(jun_tot_list)))
if jul_count>0:
by_month_averages.append(numpy.mean(jul_tot_list))
by_month_names_for_label.append('Jul' + ' (' + str(count_nan_jul) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(jul_tot_list)))
if aug_count>0:
by_month_averages.append(numpy.mean(aug_tot_list))
by_month_names_for_label.append('Aug' + ' (' + str(count_nan_aug) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(aug_tot_list)))
if sep_count>0:
by_month_averages.append(numpy.mean(sep_tot_list))
by_month_names_for_label.append('Sep' + ' (' + str(count_nan_sep) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(sep_tot_list)))
if oct_count>0:
by_month_averages.append(numpy.mean(oct_tot_list))
by_month_names_for_label.append('Oct' + ' (' + str(count_nan_oct) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(oct_tot_list)))
if nov_count>0:
by_month_averages.append(numpy.mean(nov_tot_list))
by_month_names_for_label.append('Nov' + ' (' + str(count_nan_nov) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(nov_tot_list)))
if dec_count>0:
by_month_averages.append(numpy.mean(dec_tot_list))
by_month_names_for_label.append('Dec' + ' (' + str(count_nan_dec) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(dec_tot_list)))
#display by month/by hour for within distance:
N = len(by_month_averages)
ind = np.arange(N)
width = 0.35
#added figure stuff for size change
from matplotlib.pyplot import figure
figure(num=None, figsize=(11, 7), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.yaxis.grid(True)
plt.bar(ind, by_month_averages, width, color='blue')
matplotlib.rcParams.update({'font.size': 12})
plt.ylabel('All footprints summed')
plt.xlabel('Month (count NaN)')
#added figsize
#for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
plt.title(ist +' summed footprint cells within ' + str(distances_pbl_list[0]) + ' km' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day) + ', Hour(s): ' + timeselect)
plt.xticks(ind, by_month_names_for_label, rotation='vertical')
plt.axhline(0, color='black', lw=2)
plt.show()
#prepare to show the same, within defined distance but by hour:
by_hour_averages=[]
by_hour_names_for_label=[]
if count_0>0:
by_hour_averages.append(numpy.mean(tot_list_0))
by_hour_names_for_label.append('0' + ' (' + str(count_nan_0) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_0)))
if count_3>0:
by_hour_averages.append(numpy.mean(tot_list_3))
by_hour_names_for_label.append('3' + ' (' + str(count_nan_3) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_3)))
if count_6>0:
by_hour_averages.append(numpy.mean(tot_list_6))
by_hour_names_for_label.append('6' + ' (' + str(count_nan_6) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_6)))
if count_9>0:
by_hour_averages.append(numpy.mean(tot_list_9))
by_hour_names_for_label.append('9' + ' (' + str(count_nan_9) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_9)))
if count_12>0:
by_hour_averages.append(numpy.mean(tot_list_12))
by_hour_names_for_label.append('12' + ' (' + str(count_nan_12) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_12)))
if count_15>0:
by_hour_averages.append(numpy.mean(tot_list_15))
by_hour_names_for_label.append('15' + ' (' + str(count_nan_15) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_15)))
if count_18>0:
by_hour_averages.append(numpy.mean(tot_list_18))
by_hour_names_for_label.append('18' + ' (' + str(count_nan_18) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_18)))
if count_21>0:
by_hour_averages.append(numpy.mean(tot_list_21))
by_hour_names_for_label.append('21' + ' (' + str(count_nan_21) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_21)))
N = len(by_hour_averages)
ind = np.arange(N)
width = 0.35
#added figure stuff for size change
from matplotlib.pyplot import figure
figure(num=None, figsize=(11, 7), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.yaxis.grid(True)
plt.bar(ind, by_hour_averages, width, color='green')
matplotlib.rcParams.update({'font.size': 12})
plt.ylabel('All footprints summed')
plt.xlabel('Hour (count NaN)')
#added figsize
#for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
plt.title(ist +' summed footprint cells within ' + str(distances_pbl_list[0]) + ' km' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day) + ', Hour(s): ' + timeselect)
plt.xticks(ind, by_month_names_for_label, rotation='vertical')
plt.axhline(0, color='black', lw=2)
plt.show()
#showing the summarized cells:
matplotlib.rcParams.update({'font.size': 17})
fig = p.figure(figsize=(25,10))
ax = fig.add_subplot(111)
p.plot(date_list, averaged_summed_footprint_list, linestyle='-',color='g')
p.axhline(0, color='black', lw=1)
date_index_number = (len(date_range) - 1)
p.title('Sensitivity to the whole domain, by footprint, for station ' + str(ist) + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)\
+ ', Hour(s): ' + timeselect + '\n' + 'Average: ' + str("%.2f" % average_deviation_whole_summed) + '\n' + 'Standard deviation: ' + str("%.2f" % standard_deviation_whole_summed))
#ax.set_xlim(start_date,end_date)
ax.set_ylabel('All footprints summarized ')
ax.grid(axis='x')
#ax.legend(loc='upper right')
#ax.tick_params(axis='y', labelcolor='r')
p.show()
#by month, for whole:
by_month_averages_whole=[]
by_month_names_for_label_whole=[]
if jan_count_whole>0:
by_month_averages_whole.append(numpy.mean(jan_tot_list_whole))
by_month_names_for_label_whole.append('Jan' + ' (' + str(count_nan_jan) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(jan_tot_list_whole)))
if feb_count_whole>0:
by_month_averages_whole.append(numpy.mean(feb_tot_list_whole))
by_month_names_for_label_whole.append('Feb' + ' (' + str(count_nan_feb) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(feb_tot_list_whole)))
if mar_count_whole>0:
by_month_averages_whole.append(numpy.mean(mar_tot_list_whole))
by_month_names_for_label_whole.append('Mar' + ' (' + str(count_nan_mar) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(mar_tot_list_whole)))
if apr_count_whole>0:
by_month_averages_whole.append(numpy.mean(apr_tot_list_whole))
by_month_names_for_label_whole.append('Apr' + ' (' + str(count_nan_apr) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(apr_tot_list_whole)))
if may_count_whole>0:
by_month_averages_whole.append(numpy.mean(may_tot_list_whole))
by_month_names_for_label_whole.append('May' + ' (' + str(count_nan_apr) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(apr_tot_list_whole)))
if jun_count_whole>0:
by_month_averages_whole.append(numpy.mean(jun_tot_list_whole))
by_month_names_for_label_whole.append('Jun' + ' (' + str(count_nan_jun) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(jun_tot_list_whole)))
if jul_count_whole>0:
by_month_averages_whole.append(numpy.mean(jul_tot_list_whole))
by_month_names_for_label_whole.append('Jul' + ' (' + str(count_nan_jul) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(jul_tot_list_whole)))
if aug_count_whole>0:
by_month_averages_whole.append(numpy.mean(aug_tot_list_whole))
by_month_names_for_label_whole.append('Aug' + ' (' + str(count_nan_aug) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(aug_tot_list_whole)))
if sep_count_whole>0:
by_month_averages_whole.append(numpy.mean(sep_tot_list_whole))
by_month_names_for_label_whole.append('Sep' + ' (' + str(count_nan_sep) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(sep_tot_list_whole)))
if oct_count_whole>0:
by_month_averages_whole.append(numpy.mean(oct_tot_list_whole))
by_month_names_for_label_whole.append('Oct' + ' (' + str(count_nan_oct) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(oct_tot_list_whole)))
if nov_count_whole>0:
by_month_averages_whole.append(numpy.mean(nov_tot_list_whole))
by_month_names_for_label_whole.append('Nov' + ' (' + str(count_nan_nov) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(nov_tot_list_whole)))
if dec_count_whole>0:
by_month_averages_whole.append(numpy.mean(dec_tot_list_whole))
by_month_names_for_label_whole.append('Dec' + ' (' + str(count_nan_dec) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(dec_tot_list_whole)))
print('whole, by month: ', by_month_averages_whole)
N = len(by_month_averages_whole)
ind = np.arange(N)
width = 0.35
#added figure stuff for size change
from matplotlib.pyplot import figure
figure(num=None, figsize=(11, 7), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.yaxis.grid(True)
plt.bar(ind, by_month_averages_whole, width, color='blue')
matplotlib.rcParams.update({'font.size': 12})
plt.ylabel('All footprints summed')
plt.xlabel('Month (count NaN)')
#added figsize
#for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
plt.title(ist +' summed footprints cells within whole STILT domain' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day) + ', Hour(s): ' + timeselect)
plt.xticks(ind, by_month_names_for_label_whole, rotation='vertical')
plt.axhline(0, color='black', lw=2)
plt.show()
#by hour, whole:
by_hour_averages_whole=[]
by_hour_names_for_label_whole=[]
if count_whole_0>0:
by_hour_averages_whole.append(numpy.mean(tot_list_whole_0))
by_hour_names_for_label_whole.append('0' + ' (' + str(count_nan_0) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_whole_0)))
if count_whole_3>0:
by_hour_averages_whole.append(numpy.mean(tot_list_whole_3))
by_hour_names_for_label_whole.append('3' + ' (' + str(count_nan_3) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_whole_3)))
if count_whole_6>0:
by_hour_averages_whole.append(numpy.mean(tot_list_whole_6))
by_hour_names_for_label_whole.append('6' + ' (' + str(count_nan_6) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_whole_6)))
if count_whole_9>0:
by_hour_averages_whole.append(numpy.mean(tot_list_whole_9))
by_hour_names_for_label_whole.append('9' + ' (' + str(count_nan_9) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_whole_9)))
if count_whole_12>0:
by_hour_averages_whole.append(numpy.mean(tot_list_whole_12))
by_hour_names_for_label_whole.append('12' + ' (' + str(count_nan_12) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_whole_12)))
if count_whole_15>0:
by_hour_averages_whole.append(numpy.mean(tot_list_whole_15))
by_hour_names_for_label_whole.append('15' + ' (' + str(count_nan_15) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_whole_15)))
if count_whole_18>0:
by_hour_averages_whole.append(numpy.mean(tot_list_whole_18))
by_hour_names_for_label_whole.append('18' + ' (' + str(count_nan_18) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_whole_18)))
if count_whole_21>0:
by_hour_averages_whole.append(numpy.mean(tot_list_whole_21))
by_hour_names_for_label_whole.append('21' + ' (' + str(count_nan_21) + ')' + '\n' + 'Std: ' + str("%.2f" % numpy.std(tot_list_whole_21)))
N = len(by_hour_averages_whole)
ind = np.arange(N)
width = 0.35
#added figure stuff for size change
from matplotlib.pyplot import figure
figure(num=None, figsize=(11, 7), dpi=80, facecolor='w', edgecolor='k')
ax = plt.subplot(111)
ax.yaxis.grid(True)
plt.bar(ind, by_hour_averages_whole, width, color='green')
matplotlib.rcParams.update({'font.size': 12})
plt.ylabel('All footprints summed')
plt.xlabel('Hour (count NaN)')
#added figsize
#for title --> use variables of last cell as well (ex number of no measurements. nan here can also just mean no cells for it)
plt.title(ist +' summed footprints cells within whole STILT domain' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+ ', Hour(s): ' + timeselect)
plt.xticks(ind, by_hour_names_for_label_whole, rotation='vertical')
plt.axhline(0, color='black', lw=2)
plt.show()
#compare to anthro data!
df = read_stilt_timeseries(ist, date_range)
if df.empty:
print ('no STILT data for defined date range for '), ist
else:
date_index_number = (len(date_range) - 1)
for value in df['co2.fuel']:
if df['co2.fuel'].index[count] in date_range:
date= df['co2.fuel'].index[count]
if math.isnan(value)==False:
back_val=df['co2.background'][count]
stilt_val=df['co2.stilt'][count]
bio_val=df['co2.bio'][count]
#to get anthropogenic total:
industry_val_orig =df['co2.industry'][count]
energy_val_orig=df['co2.energy'][count]
transport_val_orig=df['co2.transport'][count]
others_val_orig=df['co2.others'][count]
anthropogenic_val_orig = industry_val_orig + energy_val_orig + transport_val_orig + others_val_orig
list_anthro_value.append(anthropogenic_val_orig)
list_dates_antho.append(date)
count=count+1
anthro_valid_both=[]
sensitivty_valid_both=[]
index_for_date=0
for value in list_anthro_value:
if math.isnan(value)==False:
if list_dates_antho[index_for_date] in date_list:
index_correct=date_list.index(list_dates_antho[index_for_date])
if math.isnan(averaged_summed_footprint_list[index_correct])==False:
anthro_valid_both.append(value)
sensitivty_valid_both.append(averaged_summed_footprint_list[index_correct])
index_for_date=index_for_date+1
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
x=anthro_valid_both
y=sensitivty_valid_both
results = sm.OLS(y, x).fit()
print(ist, 'Anthropogenic emission as dependent on sensitivity of footprint:')
print(results.summary())
print('Min sensitivity: ', min(averaged_summed_footprint_list))
print('Max sensitivity: ', max(averaged_summed_footprint_list))
print('Min anthropogenic: ', min(list_anthro_value))
print('Max anthropogenic: ', max(list_anthro_value))