-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_inventory.py
207 lines (145 loc) · 6.96 KB
/
plot_inventory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/local/bin python
import os
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib as mpl
# use the Agg environment to generate an image rather than outputting to screen
mpl.use('Agg')
import matplotlib.pyplot as plt
import setup
import qc_utils as utils
# what is available
START_YEAR = 1800
END_YEAR = dt.date.today().year
DAYS_IN_AVERAGE_MONTH = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
TODAY = dt.datetime.strftime(dt.datetime.now(), "%Y%m%d")
#*********************************************
def read_stations() -> None:
"""
Process the GHCNH history file
Select stations which have defined lat, lon and elevation;
at least N years of data (using start/end dates). Also do additional
processing for Canadian (71*) and German (09* and 10*) stations.
:returns: list of station objects
"""
all_stations = []
try:
# process the station list
station_list = pd.read_fwf(setup.STATION_LIST, widths=(11, 9, 10, 7, 3, 40, 5),
header=None, names=("id", "latitude", "longitude", "elevation", "state", "name", "wmo"))
station_IDs = station_list.id
for st, station_id in enumerate(station_IDs):
# print(f"{dt.datetime.now()} {station_id:11s} ({st+1}/{station_IDs.shape[0]})")
station = utils.Station(station_id, station_list.latitude[st], station_list.longitude[st],
station_list.elevation[st])
if station.lat != "" and station.lon != "" and station.elev != "" and float(station.elev) != -999.9:
# test if elevation is known (above Dead-Sea shore in Jordan at -423m)
if float(station.elev) > -430.0:
station.name = station_list.name[st]
station.wmo = station_list.wmo[st]
station.mergers = []
all_stations += [station]
except OSError:
print(f"{setup.STATION_LIST:s} does not exist.")
raise OSError
print(f"{len(station_IDs)} stations in full GHCNH")
print(f"{len(all_stations)} stations with defined metadata")
return np.array(all_stations) # read_stations
#*********************************************
def extract_inventory(station: utils.Station, inventory: np.ndarray, data_start: int,
data_end: int, do_mergers:bool = True) -> np.ndarray:
"""
Extract the information from the ISD inventory file
:param station station: station object to extract data for
:param array inventory: the full ISD inventory to extract information from
:param int data_start: first year of data
:param int data_end: last year of data
:param bool do_mergers: include the information from merged stations when doing selection cuts
:returns: array of Nyears x 12 months containing number of observation records in each month
"""
monthly_obs = np.zeros([END_YEAR - START_YEAR + 1, 12])
# are WMO IDs sufficient?
locs, = np.where(inventory[:, 0] == station.id)
if len(locs) == 0:
return []
else:
this_station = inventory[locs, :]
locs, = np.where(this_station[:, 2].astype(int) != 0)
station.start = this_station[locs[0], 1].astype(int)
station.end = this_station[locs[-1], 1].astype(int)
for year in this_station:
monthly_obs[int(year[1]) - START_YEAR, :] = year[3:].astype(int)
if do_mergers:
if station.mergers != []:
for mstation in station.mergers:
merger_station = inventory[inventory[:, 0] == mstation, :]
for year in merger_station:
# only replace if have more observations
# can't tell how the merge will work with interleaving obs, so just take the max
locs = np.where(year[3:].astype(int) > monthly_obs[int(year[1]) - START_YEAR, :])
monthly_obs[int(year[1]) - START_YEAR, locs] = year[3:][locs].astype(int)
# if restricted data period
if data_start != START_YEAR:
monthly_obs = monthly_obs[(data_start - START_YEAR):, :]
if data_end != END_YEAR:
monthly_obs = monthly_obs[:(data_end - END_YEAR), :]
return monthly_obs # extract_inventory
#*********************************************
def process_inventory(candidate_stations: list, data_start: int, data_end: int) -> None:
"""
Process the ISD inventory file
:param list candidate_stations: list of station objects to match up
:param int data_start: first year of data
:param int data_end: last year of data
"""
print("reading GHCNH inventory")
# read in the inventory
try:
inventory = np.genfromtxt(os.path.join(setup.SUBDAILY_METADATA_DIR, setup.INVENTORY),
skip_header=8, dtype=str)
except OSError:
pass
all_counts = np.zeros((len(candidate_stations), 12*(data_end-data_start+1)))
name_labels = []
last_station = "-"
# spin through each station
for s, station in enumerate(candidate_stations):
# print(f"{s}/{len(candidate_stations)}")
if station.id[0] != last_station:
name_labels += [[station.id[0], f"{s}"]]
last_station = station.id[0]
print(last_station)
# extract the observations in each month for this station
monthly_obs = extract_inventory(station, inventory, data_start, data_end, do_mergers=True)
if len(monthly_obs) != 0:
all_counts[s, :] = monthly_obs.reshape(-1)
# if s > 1000: break
# hide the zero-count months
all_counts = np.ma.masked_where(all_counts <= 0, all_counts)
name_labels = np.array(name_labels)
# now plot this
station_counter = np.arange(len(candidate_stations))
years = np.arange(data_start, data_end+1, 1/12)
print("plotting GHCNH inventory")
plt.figure(figsize=(8, 25))
plt.clf()
ax1 = plt.axes([0.1, 0.02, 0.84, 0.95])
plt.pcolormesh(years, station_counter, all_counts, cmap=plt.cm.viridis,
norm=mpl.colors.Normalize(vmin=0, vmax=1000))
plt.colorbar(orientation="horizontal", label="Monthly obs counts",
extend="max", ticks=np.arange(0, 1100, 100),
pad=0.02, aspect=30, fraction=0.02)
plt.ylabel("Station sequence number")
ax2 = ax1.twinx()
ax2.set_ylim([None, len(station_counter)])
ax2.set_yticks(name_labels[:, 1].astype(int), name_labels[:, 0])
ax2.set_ylabel("Station start letter")
plt.savefig(os.path.join(setup.SUBDAILY_IMAGE_DIR, f"station_plot_{setup.DATESTAMP[:-1]}.png"), dpi=300)
return # process_inventory
if __name__ == "__main__":
# parse text file into candidate list, with lat, lon, elev and time span limits applied
all_stations = read_stations()
data_start = 1800
data_end = dt.datetime.now().year
process_inventory(all_stations, data_start, data_end)