%run ~/STILT_modules_v2.5.ipynb
%run ~/ICOS_atmObs_modules_v2.5.ipynb
%run ~/for_thesis.ipynb
stations = create_STILT_dictionary()
pathFP='/opt/stiltdata/fsicos2/stiltweb/'
from netCDF4 import Dataset
import pandas as pd
import math
import numpy
from mpl_toolkits.basemap import Basemap, cm
import matplotlib
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt
import statistics
list_stations_for_analysis=select_stations()
It is also possible to write the name of one, or several, more stations to base the analysis on (for instance potential stations)
Footprints for the stations, for the date range of interest, needs to be available. Go here to examine see footprint availability: STILT result viewer
To generate footprints for new date range and/or station: STILT calculation service
only_selection_stations=list_stations_for_analysis.value
#stations in drop down menu of last cell, appended to "all_statins" used throughout the Notebook
all_stations=[]
for value in only_selection_stations:
all_stations.append(value)
#user input, ex new stations. Add here.
additional_station=input('Write additional station(s) here, otherwise leave blank. If several, seperate by whitespace: ')
#if user input many additional stations, split.
list_additional_station = additional_station.split()
for item in list_additional_station:
all_stations.append(str(item))
print('Your selection is: ', all_stations)
#download the data associated with stations in "all_stations"
icos_stations = [ist[0:3] for ist in all_stations]
ICOS_files = get_ICOS_filename(icos_stations,download=True)
date_range, timeselect, start_date, end_date = date_range_date_hour()
date_index_number = (len(date_range) - 1)
NOTE: need files "all_corine_except_ocean.nc" and "oceans_finalized.nc" uploaded to the server
For information on the dataset and the meaning of the different classes, follow this link
all_corine_classes= Dataset('all_corine_except_ocean.nc')
#the "onceans_finalized" dataset is seperate: CORINE class 523 (oceans) did not extend beyond exclusive zone
#complemented with Natural Earth data.
#CORINE does not cover the whole area, "nodata" area is never ocean, rather landbased data.
oceans_finalized= Dataset('oceans_finalized.nc')
#define lat and lon if want to display the data on a map:
lon=all_corine_classes.variables['lon'][:]
lat=all_corine_classes.variables['lat'][:]
#access all the different land cover classes in the .nc files:
fp_111 = all_corine_classes.variables['area_111'][:,:]
fp_112 = all_corine_classes.variables['area_112'][:,:]
fp_121 = all_corine_classes.variables['area_121'][:,:]
fp_122 = all_corine_classes.variables['area_122'][:,:]
fp_123 = all_corine_classes.variables['area_123'][:,:]
fp_124 = all_corine_classes.variables['area_124'][:,:]
fp_131 = all_corine_classes.variables['area_131'][:,:]
fp_132 = all_corine_classes.variables['area_132'][:,:]
fp_133 = all_corine_classes.variables['area_133'][:,:]
fp_141 = all_corine_classes.variables['area_141'][:,:]
fp_142 = all_corine_classes.variables['area_142'][:,:]
fp_211 = all_corine_classes.variables['area_211'][:,:]
fp_212 = all_corine_classes.variables['area_212'][:,:]
fp_213 = all_corine_classes.variables['area_213'][:,:]
fp_221 = all_corine_classes.variables['area_221'][:,:]
fp_222 = all_corine_classes.variables['area_222'][:,:]
fp_223 = all_corine_classes.variables['area_223'][:,:]
fp_231 = all_corine_classes.variables['area_231'][:,:]
fp_241 = all_corine_classes.variables['area_241'][:,:]
fp_242 = all_corine_classes.variables['area_242'][:,:]
fp_243 = all_corine_classes.variables['area_243'][:,:]
fp_244 = all_corine_classes.variables['area_244'][:,:]
fp_311 = all_corine_classes.variables['area_311'][:,:]
fp_312 = all_corine_classes.variables['area_312'][:,:]
fp_313 = all_corine_classes.variables['area_313'][:,:]
fp_321 = all_corine_classes.variables['area_321'][:,:]
fp_322 = all_corine_classes.variables['area_322'][:,:]
fp_323 = all_corine_classes.variables['area_323'][:,:]
fp_324 = all_corine_classes.variables['area_324'][:,:]
fp_331 = all_corine_classes.variables['area_331'][:,:]
fp_332 = all_corine_classes.variables['area_332'][:,:]
fp_333 = all_corine_classes.variables['area_333'][:,:]
fp_334 = all_corine_classes.variables['area_334'][:,:]
fp_335 = all_corine_classes.variables['area_335'][:,:]
fp_411 = all_corine_classes.variables['area_411'][:,:]
fp_412 = all_corine_classes.variables['area_412'][:,:]
fp_421 = all_corine_classes.variables['area_421'][:,:]
fp_422 = all_corine_classes.variables['area_422'][:,:]
fp_423 = all_corine_classes.variables['area_423'][:,:]
fp_511 = all_corine_classes.variables['area_511'][:,:]
fp_512 = all_corine_classes.variables['area_512'][:,:]
fp_521 = all_corine_classes.variables['area_521'][:,:]
fp_522 = all_corine_classes.variables['area_522'][:,:]
#CORINE combined with natural earth data for oceans:
fp_523 = oceans_finalized.variables['ocean_ar2'][:,:]
#have a variable that represents the whole area of the cell,
#used to get a percentage breakdown of each corine class.
fp_total_area = all_corine_classes.variables['area_stilt'][:,:]
#19 aggregated classes (these are used in the current bar graphs but can be updated by each user)
urban = fp_111+fp_112+fp_141+fp_142
industrial = fp_131 + fp_133 + fp_121
road_and_rail = fp_122
ports_and_apirports= fp_123+fp_124
dump_sites = fp_132
staple_cropland_not_rice = fp_211 + fp_212 + fp_241 + fp_242 + fp_243
rice_fields = fp_213
cropland_fruit_berry_grapes_olives = fp_221 + fp_222 + fp_223
pastures = fp_231
broad_leaved_forest = fp_311
coniferous_forest = fp_312
mixed_forest = fp_313 + fp_244
natural_grasslands = fp_321 + fp_322
transitional_woodland_shrub= fp_323 + fp_324
bare_natural_areas = fp_331 + fp_332 + fp_333 + fp_334
glaciers_prepetual_snow = fp_335
wet_area= fp_411 + fp_412 + fp_421 + fp_422
inland_water_bodies = fp_423 + fp_511 + fp_512 + fp_521 + fp_522
oceans = fp_523
NOTE: need to run the above cell first to import all the data
There are a few options that can be answered upon running the cell:
Should there be one bar graph per station (absolute and percent)?
Do you want to show a bar graph with the result of all selected categories in one? If so, a dropdown with selection will come up at the end of answering the questions
Do you want to save the output?
output_one_for_each=input("Ouput one bar graph per station (both absolute and percent)? Print yes for yes")
output_aggregate=input("Ouput one bar graph with only selected categories (absolute) for all stations in the defined station list? Print yes for yes")
save_figs = input("Save the figures? Print yes for yes ")
if output_aggregate== 'yes':
land_cover_classes= select_land_cover_classes()
else:
list_only_selected_land_cover_classes= ['Urban', 'Industrial', 'Roads and railroads', 'Ports and airports', 'Dump sites', 'Cropland: Staple except rice', 'Rice fields', 'Cropland: fruits, berries, grapes and olives', 'Pastures', 'Broad leaved forest', 'Coniferous forest', 'Mixed forest', 'Natural grasslands', 'Transitional woodlands and shrub', 'Natural bare areas', 'Glaciers and prepetual snow', 'Wet areas', 'Inland water bodies', 'Ocean']
This will finalize the selection and realize if there are too many land cover classes/station to include in the same graph.
#put in next cell once verified
only_selected_land_cover_classes=land_cover_classes.value
list_only_selected_land_cover_classes=[]
for value in only_selected_land_cover_classes:
list_only_selected_land_cover_classes.append(value)
print('Your selection is: ', list_only_selected_land_cover_classes)
list_only_selected_land_cover_classes.append('No data')
total_bars=len(all_stations)*len(list_only_selected_land_cover_classes)
if total_bars>200:
print('\n' + 'WARNING, there will be many bars! consider running the setup again with fewer stations and/or land cover categories' + '\n' +\
'You will have ' + str(total_bars) + ' and no more than 200 is recommended.')
update_dictionaries=input('Do you want to update the dictornaries with the current selections? yes for yes')
no_data=fp_total_area-urban -industrial-road_and_rail-ports_and_apirports-dump_sites-staple_cropland_not_rice-rice_fields-cropland_fruit_berry_grapes_olives-pastures-broad_leaved_forest-coniferous_forest-mixed_forest-natural_grasslands-\
transitional_woodland_shrub-bare_natural_areas-glaciers_prepetual_snow-wet_area-inland_water_bodies-oceans
dictionary = {'Urban': {'name': urban, 'color': 'grey'}, 'Industrial': {'name': industrial, 'color': 'firebrick'}, 'Roads and railroads': {'name': road_and_rail, 'color': 'black'},\
'Ports and airports': {'name': ports_and_apirports, 'color': 'darkgrey'}, 'Dump sites': {'name': dump_sites, 'color': 'gold'},\
'Cropland: Staple except rice': {'name': staple_cropland_not_rice, 'color': 'darkgoldenrod'}, 'Rice fields': {'name': rice_fields, 'color': 'palegoldenrod'},\
'Cropland: fruits, berries, grapes and olives': {'name': cropland_fruit_berry_grapes_olives, 'color':'darkorange'}, 'Pastures': {'name':pastures,'color': 'yellow'},\
'Broad leaved forest':{'name': broad_leaved_forest, 'color': 'green'}, 'Coniferous forest': {'name': coniferous_forest, 'color':'lightgreen'}, 'Mixed forest': {'name': mixed_forest, 'color':'yellowgreen'}, \
'Natural grasslands': {'name': natural_grasslands, 'color':'khaki'}, 'Transitional woodlands and shrub': {'name': transitional_woodland_shrub, 'color':'brown'},\
'Natural bare areas':{'name':bare_natural_areas,'color':'silver'}, 'Glaciers and prepetual snow': {'name':glaciers_prepetual_snow,'color':'snow'},\
'Wet areas': {'name': wet_area,'color':'lightskyblue'}, 'Inland water bodies': {'name' :inland_water_bodies, 'color':'deepskyblue'},\
'Ocean': {'name':oceans, 'color':'blue'}, 'No data': {'name': no_data, 'color':'purple'}}
#for aggregated graph
list_of_lists_selected_land_cover=[]
list_stations=[]
station_iterator=iter(all_stations)
dictionary_land_cover={}
dictionary_land_cover['info']={}
dictionary_land_cover['info']['date_min']=(str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day))
dictionary_land_cover['info']['date_max']=(str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day))
dictionary_land_cover['info']['hours']=(str(timeselect))
dictionary_land_cover['info']['stations']=all_stations
dictionary_land_cover['info']['selection']=list_only_selected_land_cover_classes
for station in station_iterator:
count_nan_stations = 0
list_stations.append(station)
#each selected land cover class appended to this list:
list_selected_land_cover=[]
#for percent:
list_percent_selected_land_cover=[]
list_selected_land_cover_stdev=[]
list_colors=[]
#for_standard_deviation:
list_selected_land_cover_total_one_per_date_time=[]
dictionary_land_cover[station]={}
for selection in list_only_selected_land_cover_classes:
dictionary_land_cover[station][selection]={}
list_selected_land_cover_multiplied=[]
fp_selected_land_cover=dictionary[selection]['name']
station_iterator=iter(all_stations)
for_average_selected_land_cover = 0
#to calculcat the percentage.
for_average_fp_total_area_multiplied_total = 0
number_valid_dates = 0
#for standard deviation:
list_selected_land_cover_total_one_per_date_time=[]
for date in date_range:
filename=(pathFP+'slots/'+stations[station]['locIdent']+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
+str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/foot')
if os.path.isfile(filename):
#be able to display in bar graph
number_valid_dates = number_valid_dates + 1
f_fp = cdf.Dataset(filename)
#three dimensions, but same size of cells since only using one slize (for particular date/time)
fp_current=f_fp.variables['foot'][:,:,:]
lon=f_fp.variables['lon'][:]
lat=f_fp.variables['lat'][:]
#multiply land cover with current footprint (one footprint per date/time in date_range)
fp_selected_land_cover_multiplied = fp_selected_land_cover*fp_current
#compare to the max, for percentage:
fp_total_area_multiplied= fp_total_area*fp_current
list_selected_land_cover_multiplied.append(fp_selected_land_cover_multiplied)
#total- for this particular date/time:
#heree ---> took awat [0], did not matter
selected_land_cover_total = fp_selected_land_cover_multiplied.sum()
fp_total_area_multiplied_total = fp_total_area_multiplied.sum()
#for standard deviation: need one value for each date/time
list_selected_land_cover_total_one_per_date_time.append(selected_land_cover_total)
dictionary_land_cover[station][selection][(selection + ' (color)')]=dictionary[selection]['color']
for_average_selected_land_cover= for_average_selected_land_cover + selected_land_cover_total
for_average_fp_total_area_multiplied_total=for_average_fp_total_area_multiplied_total+fp_total_area_multiplied_total
else:
#print ('file does not exist: ',filename)
count_nan_stations = count_nan_stations + 1
if number_valid_dates>0:
#colors --> fetch for specific land cover type from dictionary
list_colors.append(dictionary[selection]['color'])
average_selected_land_cover = for_average_selected_land_cover/number_valid_dates
dictionary_land_cover[station][selection][(selection + ' (absolute)')]=average_selected_land_cover
#to calcualte percentage:
average_fp_total_area_multiplied_total= for_average_fp_total_area_multiplied_total/number_valid_dates
#total values for land cover
list_selected_land_cover.append(average_selected_land_cover)
percent_selected_land_cover=(average_selected_land_cover/average_fp_total_area_multiplied_total)*100
dictionary_land_cover[station][selection][(selection + ' (percent)')]=percent_selected_land_cover
list_percent_selected_land_cover.append(percent_selected_land_cover)
#standard deviation:
selection_land_cover_stdev=numpy.nanstd(list_selected_land_cover_total_one_per_date_time)
list_selected_land_cover_stdev.append(selection_land_cover_stdev)
summed_footprints = (sum(list_selected_land_cover_multiplied))
average_footprint = summed_footprints/number_valid_dates
else:
print ('no valid dates for station: ', station)
#for the aggregate:
list_of_lists_selected_land_cover.append(list_selected_land_cover)
#create bars graphs for each station, if chosen to do so.
if output_one_for_each=='yes':
matplotlib.rcParams.update({'font.size': 17})
x_pos = numpy.arange(len(list_only_selected_land_cover_classes))
fig, ax = plt.subplots(figsize=(20,10))
ax.bar(x_pos, list_selected_land_cover,
yerr=list_selected_land_cover_stdev,
align='center',
alpha=0.5,
color=list_colors,
capsize=10)
ax.set_ylabel('absolute interaction' + '\n' + ' (average of each footrpint * area (km$^2$) underlaying land cover type)')
ax.set_xticks(x_pos)
ax.set_xticklabels(list_only_selected_land_cover_classes, rotation='vertical')
date_index_number = (len(date_range) - 1)
ax.set_title('Station: ' + str(station) + '\n' + 'Interaction with different land cover types' + '\n' + 'Absolute interaction with standard deviation error bar' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+\
' Hour(s): ' + timeselect + '\n' + 'NaN footprints: ' + str(count_nan_stations))
ax.yaxis.grid(True)
plt.show()
#graph, same as above but percent:
matplotlib.rcParams.update({'font.size': 17})
x_pos = numpy.arange(len(list_only_selected_land_cover_classes))
fig, ax = plt.subplots(figsize=(20,10))
ax.bar(x_pos, list_percent_selected_land_cover,
align='center',
color=list_colors)
ax.set_ylabel('Percent interaction')
ax.set_xticks(x_pos)
ax.set_xticklabels(list_only_selected_land_cover_classes, rotation='vertical')
date_index_number = (len(date_range) - 1)
ax.set_title('Station: ' + str(station) + '\n' + 'Interaction with different land cover types' + '\n' + 'Percent interaction' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+\
' Hour(s): ' + timeselect + '\n' + 'NaN footprints: ' + str(count_nan_stations))
ax.yaxis.grid(True)
plt.show()
#bar graph with all stations and their selected land cover classes.
if output_aggregate=='yes':
matplotlib.rcParams.update({'font.size': 17})
list_of_list_per_land_cover_class=[]
list_per_land_cover_class=[]
#in list_of_lists_selected_land_cover, have one list per station with all land cover types and their absolute interaction.
#to display in the aggregated bar graph, need to make lists by land cover type rather than by station. Order determines
#which station each value belongs to:
index_land_cover_in_order=0
for item in range(len(list_of_lists_selected_land_cover[0])):
list_per_land_cover_class=[item[index_land_cover_in_order] for item in list_of_lists_selected_land_cover]
list_of_list_per_land_cover_class.append(list_per_land_cover_class)
index_land_cover_in_order=index_land_cover_in_order+1
figure(num=None, figsize=(22, 10), dpi=80, facecolor='w', edgecolor='k')
y_pos = numpy.arange(len(list_stations))
ax = plt.subplot(111)
#make sure that the spacing between the bars are okay.
#how many land cover classes determines it.
half_count_list_selected_land_cover_classes = 0.5 * len(list_only_selected_land_cover_classes)
if len(list_only_selected_land_cover_classes)>9:
start_ax = -(0.05 * half_count_list_selected_land_cover_classes)
else:
start_ax = -(0.1 * half_count_list_selected_land_cover_classes)
count=1
name_list=[]
for each_class in list_only_selected_land_cover_classes:
name_list.append('p' + str(count))
index_start_ax = 0
index=0
list_for_legend=[]
list_for_legend_names=[]
for each_class in list_only_selected_land_cover_classes:
if len(list_only_selected_land_cover_classes)>9:
#when want bar graph with total land cover breakdown for STILT
#name_for_class= ax.bar(y_pos + (start_ax + index_start_ax), list_percentages[index], width=0.05, color=dictionary[each_class]['color'], align='center')
name_for_class= ax.bar(y_pos + (start_ax + index_start_ax), list_of_list_per_land_cover_class[index], width=0.05, color=dictionary[each_class]['color'], align='center')
list_for_legend.append(name_for_class[0])
list_for_legend_names.append(each_class)
index=index + 1
index_start_ax = index_start_ax + 0.05
else:
name_for_class= ax.bar(y_pos + (start_ax + index_start_ax), list_of_list_per_land_cover_class[index], width=0.1, color=dictionary[each_class]['color'], align='center')
list_for_legend.append(name_for_class[0])
list_for_legend_names.append(each_class)
index=index + 1
index_start_ax = index_start_ax + 0.1
if len(list_only_selected_land_cover_classes)>9:
ax.legend(list_for_legend, list_for_legend_names, ncol=2)
else:
ax.legend(list_for_legend, list_for_legend_names)
ax.yaxis.grid(True)
#ax.yaxis.grid(True)
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.ylabel('absolute interaction' + '\n' + ' (average of each footrpint * area (km$^2$) underlaying land cover type)')
date_index_number = (len(date_range) - 1)
plt.title('Interaction with different land cover types' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+\
' Hour(s): ' + timeselect)
plt.show()
if update_dictionaries=='yes':
f = open("land_cover_breakdown_custom.pkl","wb")
pickle.dump(dictionary_land_cover,f)
f.close()
Make a selection of land cover classes which you want to see the spatial distribution in terms of contribution of. Have to make a selection here even if selected for section 2B
land_cover_classes_spatial_dist= select_land_cover_classes()
only_selected_land_cover_classes_spatial_dist=land_cover_classes_spatial_dist.value
list_only_selected_land_cover_classes_spatial_dist=[]
for value in only_selected_land_cover_classes_spatial_dist:
list_only_selected_land_cover_classes_spatial_dist.append(value)
print('Your selection is: ', list_only_selected_land_cover_classes_spatial_dist)
#create a dictionary where the key is the land use class/ute classification.
dictionary = {'Urban': urban, 'Industrial': industrial, 'Roads and railroads': road_and_rail,
'Ports and airports':ports_and_apirports, 'Dump sites':dump_sites,
'Cropland: Staple except rice': staple_cropland_not_rice, 'Rice fields': rice_fields,
'Cropland: fruits, berries, grapes and olives': cropland_fruit_berry_grapes_olives,
'Pastures': pastures,'Broad leaved forest':broad_leaved_forest,
'Coniferous forest': coniferous_forest, 'Mixed forest': mixed_forest,
'Natural grasslands':natural_grasslands, 'Transitional woodlands and shrub': transitional_woodland_shrub,
'Natural bare areas':bare_natural_areas,
'Glaciers and prepetual snow': glaciers_prepetual_snow,'Wet areas': wet_area,
'Inland water bodies': inland_water_bodies, 'Ocean':oceans}
matplotlib.rcParams.update({'font.size': 17})
lon=all_corine_classes.variables['lon'][:]
lat=all_corine_classes.variables['lat'][:]
station_iterator=iter(all_stations)
for station in station_iterator:
for selection in list_only_selected_land_cover_classes_spatial_dist:
fp_selected_land_cover=dictionary[selection]
#want to look at the maps for all the stations
list_selected_land_cover=[]
list_stations=[]
station_iterator=iter(all_stations)
count=0
for_average_selected_land_cover = 0
number_valid_dates = 0
#add each footprint * population to list. One list for each station.
list_selected_land_cover_multiplied =[]
for date in date_range:
filename=(pathFP+'slots/'+stations[station]['locIdent']+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
+str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/foot')
if os.path.isfile(filename):
#be able to display in bar graph
number_valid_dates = number_valid_dates + 1
f_fp = cdf.Dataset(filename)
#three dimensions, but same size of cells since only using one slize (for particular date/time)
fp_current=f_fp.variables['foot'][:,:,:]
lon=f_fp.variables['lon'][:]
lat=f_fp.variables['lat'][:]
fp_selected_land_cover_multiplied = fp_selected_land_cover*fp_current
list_selected_land_cover_multiplied.append(fp_selected_land_cover_multiplied)
fp_current_total= fp_current[0].sum()
selected_land_cover_total = fp_selected_land_cover_multiplied[0].sum()
for_average_selected_land_cover= for_average_selected_land_cover + selected_land_cover_total
#else:
#print ('file does not exist: ',filename)
if number_valid_dates>0:
list_stations.append(icos_stations[count])
average_selected_land_cover = for_average_selected_land_cover/number_valid_dates
list_selected_land_cover.append(average_selected_land_cover)
#station lat long into point on map:
lat_station= stations[station]['lat']
lon_station= stations[station]['lon']
summed_footprints = (sum(list_selected_land_cover_multiplied))
average_footprint = summed_footprints/number_valid_dates
date_index_number = (len(date_range) - 1)
for_title = (str(selection) + ' seen at ' + str(station) + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+\
', hour(s): ' + timeselect)
plot_maps_population(average_footprint, lon, lat, lon_station, lat_station, title=for_title, unit='Average (footprint * land cover (km$^2$) per cell)', vmin=0, station=[station], zoom=station)
else:
print ('no valid dates for station: ', station)
count=count+1
Need to have the file "point_with_pop_data.nc" uploaded to the server
pop_data= Dataset('point_with_pop_data.nc')
fp_pop=pop_data.variables['Sum_TOT_P'][:,:]
Note that, if choosing to output the maps for each station, the scale bar changes depending on maximum cell influence for that particular station
for the given date range and hour(s). It makes it hard to compare the maps.
update_dictionaries=input('Do you want to update the dictornaries with the current selections? yes for yes')
matplotlib.rcParams.update({'font.size': 17})
list_pop=[]
list_stations=[]
list_stdev=[]
station_iterator=iter(all_stations)
count=0
sorting=input('Sort from lowerst to highest interaction? Print yes for yes ')
show_maps=input('Show the influence map for each station? Print yes for yes ')
population_sensitivity={}
population_sensitivity['info']={}
population_sensitivity['info']['date_min']=(str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day))
population_sensitivity['info']['date_max']=(str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day))
population_sensitivity['info']['hours']=(str(timeselect))
population_sensitivity['info']['stations']=all_stations
for station in station_iterator:
list_fp_pop_total_for_std=[]
population_sensitivity[station]={}
for_average_pop = 0
number_valid_dates = 0
#add each footprint * population to list. One list for each station.
list_fp_pop_multiplied =[]
for date in date_range:
filename=(pathFP+'slots/'+stations[station]['locIdent']+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
+str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/foot')
if os.path.isfile(filename):
#be able to display in bar graph
number_valid_dates = number_valid_dates + 1
f_fp = cdf.Dataset(filename)
#three dimensions, but same size of cells since only using one slize (for particular date/time)
fp_current=f_fp.variables['foot'][:,:,:]
lon=f_fp.variables['lon'][:]
lat=f_fp.variables['lat'][:]
fp_pop_multiplied = fp_pop*fp_current
list_fp_pop_multiplied.append(fp_pop_multiplied)
fp_current_total= fp_current[0].sum()
fp_pop_total = fp_pop_multiplied[0].sum()
list_fp_pop_total_for_std.append(fp_pop_total)
for_average_pop = for_average_pop + fp_pop_total
#else:
#print ('file does not exist: ',filename)
if number_valid_dates>0:
stdev_pop=statistics.stdev(list_fp_pop_total_for_std)
list_stdev.append(stdev_pop)
list_stations.append(icos_stations[count])
average_pop = for_average_pop/number_valid_dates
population_sensitivity[station]['average']=average_pop
population_sensitivity[station]['stdev']=stdev_pop
list_pop.append(average_pop)
#station lat long into point on map!
if show_maps=='yes':
lat_station= stations[station]['lat']
lon_station= stations[station]['lon']
summed_footprints = (sum(list_fp_pop_multiplied))
average_footprint = summed_footprints/number_valid_dates
date_index_number = (len(date_range) - 1)
for_title = (str(station) + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+\
', hour(s): ' + timeselect + '\n' + 'Total average population seen: ' + str("%.0f" % average_pop)+ '\n' + 'Standard deviation: ' + str("%.0f" % stdev_pop))
plot_maps_population(average_footprint, lon, lat, lon_station, lat_station, title=for_title, unit='Average (footprint cells * population per cell)', station=[station], zoom=station)
else:
print ('no valid dates for station: ', station)
count=count+1
if sorting=='yes':
list_pop_sorted = [x for _,x in sorted(zip(list_pop,list_pop))]
list_stations = [x for _,x in sorted(zip(list_pop,list_stations))]
figure(num=None, figsize=(20, 10), dpi=80, facecolor='w', edgecolor='k')
y_pos = numpy.arange(len(list_stations))
ax = plt.subplot(111)
if sorting=='yes':
p1 = ax.bar(y_pos, list_pop_sorted,width=0.5,color='purple',align='center')
else:
p1 = ax.bar(y_pos, list_pop,width=0.5,color='purple',align='center')
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.ylabel('average footprint * total population in footprint cells')
plt.title('Station interaction with population' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+\
' Hour(s): ' + timeselect)
ax.yaxis.grid(True)
plt.show()
if update_dictionaries=='yes':
#dict = dictionary_point_source
f = open("sensitivity_pop_custom.pkl","wb")
pickle.dump(population_sensitivity,f)
f.close()
Need to have the file "final_netcdf_point_source_emission.nc" uploaded to the server.
Different from population data: can translate the emissions within each stilt cell to the effect it will have to the final CO2 concentrations at the stations.
Just need to get it in the right unit (micromoles/m2s) and multiply by the individual or averaged footprints
point_source_data= Dataset('final_netcdf_point_source_emission.nc')
lon=point_source_data.variables['lon'][:]
lat=point_source_data.variables['lat'][:]
#emissions in kg/year in the variable "Sum_Tota_1"
fp_point_source=point_source_data.variables['Sum_Tota_1'][:,:]
#different from population data: can translate the emissions within each stilt cell to the effect it will have to the final CO2 concentrations at the stations.
#just need to get it in the right unit (micromole/m2s) and multiply by the individual or aggregated footprints
#divide by the molar weight in kg. 12 (C)+16(O)+16(O) =44 0.044 in kg. get number of moles of C this way. Want it in micromole though: 1 mole= 1000000 micromole
fp_point_source_moles_C=fp_point_source/0.044
#how many micro-mole is that? multiply by 1000000
fp_point_source_micromoles_C=fp_point_source_moles_C*1000000
#a NetCDF file with the grid size calues in m2
f_gridarea = cdf.Dataset('gridareaSTILT.nc')
#area stored in "cell_area"
gridarea = f_gridarea.variables['cell_area'][:]
fp_point_source_m2= fp_point_source_micromoles_C/gridarea
#how many micro moles let out per second (have yearly data)
fp_point_source_m2_s= fp_point_source_m2/31536000
Need to have the file "final_netcdf_point_source_emission" uploaded to the server.
The data consists of CO2 emission from 2149 facilities that have reported to the European Pollutant Release and Transfer Register.
update_dictionaries=input('Do you want to update the dictornaries with the current selections? yes for yes')
matplotlib.rcParams.update({'font.size': 16})
sorting=input('Sort from lowerst to highest interaction? Print yes for yes ')
show_maps=input('Show the influence map for each station? Print yes for yes ')
list_carbon=[]
list_stations=[]
station_iterator=iter(all_stations)
count=0
#should it really be defined here?
list_fp_carbon_multiplied=[]
#populate the dictionary (will be used if choose to update dictionaries)
dictionary_point_source={}
dictionary_point_source['info']={}
dictionary_point_source['info']['date_min']=(str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day))
dictionary_point_source['info']['date_max']=(str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day))
dictionary_point_source['info']['hours']=(str(timeselect))
dictionary_point_source['info']['stations']=all_stations
for station in station_iterator:
#append each to stdev
list_carbob_total_for_std=[]
dictionary_point_source[station]={}
for_average_carbon = 0
number_valid_dates = 0
list_dates=[]
for date in date_range:
filename=(pathFP+'slots/'+stations[station]['locIdent']+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
+str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/foot')
fp_carbon_total=0
if os.path.isfile(filename):
list_dates.append(date)
number_valid_dates = number_valid_dates + 1
f_fp = cdf.Dataset(filename)
fp_current=f_fp.variables['foot'][:,:,:]
lon=f_fp.variables['lon'][:]
lat=f_fp.variables['lat'][:]
fp_carbon_multiplied = fp_point_source_m2_s*fp_current
list_fp_carbon_multiplied.append(fp_carbon_multiplied)
fp_carbon_total = fp_carbon_total + fp_carbon_multiplied[0].sum()
list_carbob_total_for_std.append(fp_carbon_total)
for_average_carbon = for_average_carbon + fp_carbon_total
#else:
#print ('file does not exist: ',filename)
if number_valid_dates>0:
stdev_carbon=statistics.stdev(list_carbob_total_for_std)
list_stations.append(icos_stations[count])
average_carbon = for_average_carbon/number_valid_dates
list_carbon.append(average_carbon)
dictionary_point_source[station]['average']=average_carbon
dictionary_point_source[station]['stdev']=stdev_carbon
summed_footprints = (sum(list_fp_carbon_multiplied))
average_footprint = summed_footprints/number_valid_dates
if show_maps=='yes':
lat_station= stations[station]['lat']
lon_station= stations[station]['lon']
date_index_number = (len(date_range) - 1)
for_title = (str(station) + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+\
', Hour(s): ' + timeselect + '\n' + 'Average contribution CO2 from point source emissions: ' + str("%.3f" % average_carbon)+ ' ppm' + '\n' + 'Standard deviation: ' + str("%.3f" % stdev_carbon) + ' ppm')
#same for point source and for population
plot_maps_population(average_footprint, lon, lat, lon_station, lat_station, title=for_title, unit='ppm', station=[station], zoom=station)
if sorting=='yes':
sorted_list_carbon = [x for _,x in sorted(zip(list_carbon,list_carbon))]
list_stations = [x for _,x in sorted(zip(list_carbon,list_stations))]
figure(num=None, figsize=(25, 10), dpi=80, facecolor='w', edgecolor='k')
y_pos = numpy.arange(len(list_stations))
ax = plt.subplot(111)
if sorting=='yes':
p1 = ax.bar(y_pos, sorted_list_carbon,width=0.5,color='goldenrod',align='center')
else:
p1 = ax.bar(y_pos, list_carbon,width=0.5,color='goldenrod',align='center')
plt.xticks(y_pos, list_stations, rotation='vertical')
plt.ylabel('ppm')
date_index_number = (len(date_range) - 1)
plt.title('Contribution from point source emissions to the CO2 mixing ratio' + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)+\
', Hour(s): ' + timeselect)
ax.yaxis.grid(True)
plt.show()
if update_dictionaries=='yes':
#dict = dictionary_point_source
f = open("sensitivity_point_source_custom.pkl","wb")
pickle.dump(dictionary_point_source,f)
f.close()
User specifies threshold: average contribution from a facility for the specified date range needs to be over the threshold for the facility to be included in the graph.
import xlrd
#excel file with contribution from each facility, the ratio of the individual facility's contribution to the cell total,
#and row/col they are located in.
ExcelFileName= 'point_source_facilities_names_ratios.xls'
workbook = xlrd.open_workbook(ExcelFileName)
worksheet = workbook.sheet_by_name('point_source_facilities_names_r')
#num_rows to loop over all facilities in later step.
num_rows = worksheet.nrows
station_iterator=iter(all_stations)
threshold_timeseries=input('Threshold for time series inclusion of individual facilities (in ppm): ')
for station in station_iterator:
#aggregated footprint:
fp=[]
nfp=0
first = True
for dd in date_range:
filename=(pathFP+'slots/'+stations[station]['locIdent']+'/'+str(dd.year)+'/'+str(dd.month).zfill(2)+'/'
+str(dd.year)+'x'+str(dd.month).zfill(2)+'x'+str(dd.day).zfill(2)+'x'+str(dd.hour).zfill(2)+'/foot')
#print (filename)
if os.path.isfile(filename):
f_fp = cdf.Dataset(filename)
if (first):
fp=f_fp.variables['foot'][:,:,:]
lon=f_fp.variables['lon'][:]
lat=f_fp.variables['lat'][:]
first = False
else:
fp=fp+f_fp.variables['foot'][:,:,:]
f_fp.close()
nfp+=1
#else:
#print ('file does not exist: ',filename)
if nfp > 0:
fp=fp/nfp
else:
print ('no footprints found')
#average footprint grid
average_footprint=fp*fp_point_source_m2_s
#average_footprint[0][row][col]
#average contribution
average_contribution= average_footprint[0].sum()
#lat and long of station to plot on a map
lat_station= stations[station]['lat']
lon_station= stations[station]['lon']
date_index_number = (len(date_range) - 1)
list_name_facility=[]
list_ratio=[]
list_row_facility=[]
list_col_facility=[]
for curr_row in range(1, num_rows, 1):
row_facility=int(worksheet.cell_value(curr_row, 2))
col_facility=int(worksheet.cell_value(curr_row, 1))
ratio=worksheet.cell_value(curr_row, 3)
if average_footprint[0][row_facility][col_facility]*ratio>float(threshold_timeseries):
name_facility=worksheet.cell_value(curr_row, 0)
from_facility= ratio*average_footprint[0][row_facility][col_facility]
list_name_facility.append(name_facility)
list_ratio.append(ratio)
list_row_facility.append(row_facility)
list_col_facility.append(col_facility)
#append each "valid" date to a list, use in time series.
list_dates=[]
list_of_list_values_by_date=[]
list_total_by_date=[]
for date in date_range:
filename=(pathFP+'slots/'+stations[station]['locIdent']+'/'+str(date.year)+'/'+str(date.month).zfill(2)+'/'
+str(date.year)+'x'+str(date.month).zfill(2)+'x'+str(date.day).zfill(2)+'x'+str(date.hour).zfill(2)+'/foot')
if os.path.isfile(filename):
list_dates.append(date)
f_fp = cdf.Dataset(filename)
#footprint specific date and time
fp_current=f_fp.variables['foot'][:,:,:]
#grid with contribution given specific date and time
fp_carbon_multiplied = fp_point_source_m2_s*fp_current
#time series for the total contribution value
list_total_by_date=fp_carbon_multiplied[0].sum()
index=0
#for each date/time, get one value per facility.
#this list adjuted later for fitting in time series (one list per station for each date/time rather)
list_values_by_date=[]
for facility in list_name_facility:
#multiply by ratio to get specific facility's influence, not the whole cell
value_by_date=fp_carbon_multiplied[0][list_row_facility[index]][list_col_facility[index]]*list_ratio[index]
list_values_by_date.append(value_by_date)
index=index+1
#append to list of list (one list for each date/time)
list_of_list_values_by_date.append(list_values_by_date)
index_facility=0
#for each facility, get one value per date. The values have been appended in the same order for each date/time
#access correctly with "index_facility"
if len(list_name_facility) > 0:
matplotlib.rcParams.update({'font.size': 17})
fig = p.figure(figsize=(20,10))
ax = fig.add_subplot(111)
for each_facility in list_name_facility:
list_by_facility=[]
for each_list in list_of_list_values_by_date:
list_by_facility.append(each_list[index_facility])
#one plot for each facility that is over
p.plot(list_dates,
list_by_facility, linestyle='-',
#color --> randomly generated. Different color for each of the facilities (hopefully)
color=numpy.random.rand(3,),
#in label, want the facility name and the average for that facility for the whole time series
label=(str(list_name_facility[index_facility]) + ', average: ' + str("%.4f" %numpy.nanmean(list_by_facility)) + ' ppm'))
index_facility=index_facility+1
p.plot(list_dates,
list_by_facility, linestyle='-',
color='black',
label=('Total contribution point source emissions average: ' + str("%.4f" %numpy.nanmean(list_by_facility)) + ' ppm'))
p.axhline(0, color='black', lw=1)
date_index_number = (len(date_range) - 1)
p.title('Shift in CO2 attributed to different point source facilities at ' + str(station) + '\n' + str(date_range[0].year) + '-' + str(date_range[0].month) + '-' + str(date_range[0].day)\
+ ' to ' + str(date_range[date_index_number].year) + '-' + str(date_range[date_index_number].month) + '-' + str(date_range[date_index_number].day)\
+ ', Hour(s): ' + timeselect + '\n' + 'Threshold for inclusion: ' + str(threshold_timeseries) + ' ppm')
ax.set_xlim(start_date,end_date)
ax.set_ylabel('ppm')
ax.legend(loc='upper left')
ax.tick_params(axis='y')
ax.yaxis.grid(True)
p.show()
else:
print('No facilities contributing over' , str(threshold_timeseries) , ' ppm for ' , station)