-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcompliance_checker.py
580 lines (508 loc) · 43 KB
/
compliance_checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
import os
import xarray as xr
import pandas as pd
import datetime
import numpy as np
#decoding time units and variable values in a netCDF file conforming to the Climate and Forecasting (CF) netCDF conventions.
import cftime
# progress bar
from tqdm import tqdm
import subprocess
### Commit number:
try:
bashCommand = "git log --pretty=format:'%h' -n 1"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
commit_num, error = process.communicate()
commit_num = commit_num.decode("UTF-8")
except:
print('Commit number associtad with this code. Is there a .git in this directory ?')
commit_num = 'No commit number identified.'
#######################################
#### specify your source path
#######################################
source_path = './test'
#######################################
# Compliance values to be monitored
#######################################
workdir = os.getcwd()
try:
# load csv :
ismip = pd.read_csv(workdir + '/ismip6_criteria.csv',delimiter=';',decimal=",")
except IOError:
print('ERROR: Unable to open the compliance criteria file (.csv required with ; as delimiter and , for decimal.). Is the path to the file correct ? '+ workdir + 'ismip6_criteria_v0.csv')
else:
ismip_meta = ismip.to_dict('records')
# get the list of variables
ismip_var = [dic['variable'] for dic in ismip_meta]
# get the mandatory variables
ismip_mandatory_var = ismip['variable'][ismip.mandatory==1].tolist()
variables = ismip_var
mandatory_variables = ismip_mandatory_var
# experiments ISMIP6 extension (2300) setup
experiments_ismip6_ext =[{'experiment':'ctrlAE', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,7,1),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE01', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE02', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE03', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE04', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE05', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE06', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE07', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE08', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE09', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE10', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE11', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE12', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE13', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286},
{'experiment':'expAE14', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2300,6,30),'endsup':datetime.datetime(2301, 1, 1),'duration':286}
]
# experiments ISMIP6 setup
experiments_ismip6 =[{'experiment':'hist', 'startinf':datetime.datetime(1979, 6, 30),'startsup':datetime.datetime(1980, 1, 1),'endinf':datetime.datetime(2014, 6, 30),'endsup':datetime.datetime(2015, 1, 1),'duration':35},
{'experiment':'ctrl', 'startinf':datetime.datetime(1979, 6, 30),'startsup':datetime.datetime(1980, 1, 1),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':120},
{'experiment':'ctrl_proj', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,7,1),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp01', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp02', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp03', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp04', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp05', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp06', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp07', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp08', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp09', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp10', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp11', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp12', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
{'experiment':'exp13', 'startinf':datetime.datetime(2015, 1, 1),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86}
]
scalar_variables_ismip6 = ['lim','limnsw','iareagr','iareafl','tendacabf','tendlibmassbf','tendlibmassbffl','tendlicalvf','tendlifmassbf','tendligroundf']
scalar_variables = scalar_variables_ismip6
####### Set up the correct setup according your Experiment: experiments_ismip6_ext for ISMIP6 extension (2300) OR experiments_ismip6 for ISMIP6 (2100)
experiments = experiments_ismip6
# obtain the directory tree : return directories (=experiments) and files (=variables)
def files_and_subdirectories(path):
files = []
directories = []
for f in os.listdir(path):
if os.path.isfile(os.path.join(path, f)):
files.append(f)
elif os.path.isdir(os.path.join(path, f)):
directories.append(f)
return directories, files
# check motonocity of a list (used to check time serie)
def strictly_increasing(L):
return all(x<y for x, y in zip(L, L[1:]))
###############################################
# create the compliance_checker_log.txt file
###############################################
# stop the checker if Typeerror occurs.
try:
with open(os.path.join(source_path,'compliance_checker_log.txt'),"w") as f:
print('-> Checking '+ source_path)
print( )
experiment_directories,files = files_and_subdirectories(source_path)
today = datetime.date.today()
f.write('************************************************************************************\n')
f.write('************* Ice Sheet Model Simulations - Compliance Checker *************\n')
f.write('************************************************************************************\n')
f.write(f'Commit Number: {commit_num} \n')
f.write('verification criteria: ismip6_criteria.csv \n')
f.write('date: '+ today.strftime("%Y/%m/%d") +'\n')
f.write('source: https://github.com/jbbarre/ISM_SimulationChecker \n')
f.write(' \n')
f.write('------------------------------------------------------------------------------------\n')
f.write('Verified directory: '+ source_path +' \n')
f.write('------------------------------------------------------------------------------------\n')
f.write(' \n')
f.write(' \n')
f.write(' \n')
f.write(' \n')
f.write('====================================================================================\n')
f.write('================ DETAILED RESULTS ================\n')
f.write('====================================================================================\n')
f.write('Tips: Use Cltr+F to look for specific problems. \n')
f.write(' \n')
###############################################
# Start the compliance checker
###############################################
# total number of errors for the entire compliance check.
total_errors = 0
# total number of warnings for the entire compliance check.
total_warnings = 0
# total number of errors related to naming tests for the entire compliance check.
total_naming_errors = 0
# total number of errors related to numerical tests for the entire compliance check.
total_num_errors = 0
# total number of errors related to spatial tests for the entire compliance check.
total_spatial_errors = 0
# total number of errors related to time tests for the entire compliance check.
total_time_errors = 0
# total number of errors related to missing mandatory files (= mandatory variables).
total_file_errors = 0
# gather all the naming issues to report in the synthesis.
report_naming_issues =[]
#initialize files checked counter
file_counter = 0
#initialize files checked counter
exp_counter = 0
for xp in experiment_directories:
exp_counter += 1
exp_dir,exp_files = files_and_subdirectories(os.path.join(source_path, xp))
exp_files=list(filter(lambda file: file.split('.')[-1] == 'nc', exp_files))
# total number of errors for the experiment.
exp_errors = 0
# total number of errors related to naming tests of the experiment.
exp_naming_errors = 0
# total number of errors related to numerical tests of the experiment.
exp_num_errors = 0
# total number of errors related to spatial tests of the experiment.
exp_spatial_errors = 0
# total number of errors related to time tests of the experiment.
exp_time_errors = 0
# total number of errors related to files(=variables) in the experiment.
exp_file_errors = 0
# total number of warnings for the experiment.
exp_warnings = 0
# total number of warnings related to naming tests of the experiment.
exp_naming_warnings = 0
# total number of warnings related to numerical tests of the experiment.
exp_num_warnings = 0
# total number of warnings related to spatial tests of the experiment.
exp_spatial_warnings = 0
# total number of warnings related to time tests of the experiment.
exp_time_warnings = 0
# create the list of missing mandatory variables - List could be empty -
for i in exp_files:
file_name_split = i.split('_')
variable = file_name_split[0]
temp_mandatory_var = mandatory_variables
if variable in mandatory_variables:
temp_mandatory_var.remove(variable)
#split the experiment directory name
experiment_chain = xp.split('_')
if len(experiment_chain) == 2 :
#get the experiment name (example: exp05)
experiment_name = '_'.join(experiment_chain[:-1])
#get the resolution as integer
grid_resolution = int(experiment_chain[-1])
else:
experiment_name = xp
grid_resolution = 0
print('Error in the naming of the experiment ',xp,'. Should be similar to expXXX_RES')
if experiment_name in [dic['experiment'] for dic in experiments]:
f.write('\n ')
f.write('**********************************************************\n')
f.write(' ** Experiment: ' + experiment_name + ' \n ')
f.write('**********************************************************\n')
f.write('\n ')
if not temp_mandatory_var:
f.write('Mandatory variables Test: ' + xp + ' : all mandatory variables exist. \n')
else:
f.write('ERROR: In experiment ' + xp +', these mandatory variable(s) is (are) missing: '+ str(temp_mandatory_var)+'\n')
exp_file_errors += len(temp_mandatory_var)
for file in tqdm(exp_files):
file_counter += 1
# total number of errors for the variable.
var_errors = 0
# total number of warnings for the variable.
var_warnings = 0
# total number of errors related to the naming tests of the variable.
var_naming_errors = 0
# total number of errors related to the numerical tests of the variable.
var_num_errors = 0
# total number of errors related to the spatial tests of the variable.
var_spatial_errors = 0
# total number of errors related to the time tests of the variable.
var_time_errors = 0
# total number of warnings for the variable.
var_warnings = 0
# total number of warnings for the variable.
var_warnings = 0
# total number of warnings related to the naming tests of the variable.
var_naming_warnings = 0
# total number of warnings related to the numerical tests of the variable.
var_num_warnings = 0
# total number of warnings related to the spatial tests of the variable.
var_spatial_warnings = 0
# total number of warnings related to the time tests of the variable.
var_time_warnings = 0
split_path=os.path.normpath(file).split(os.sep)
file_name = split_path[-1]
file_name_split = file_name.split('_')
considered_variable = file_name_split[0]
region = file_name_split[1]
group = file_name_split[2]
model = file_name_split[3]
file_extention = file_name_split[len(file_name_split)-1][-2:]
# Load the netcdf file
ds = xr.open_dataset(os.path.join(source_path,xp,file))
# Load local variables included in the netcdf file
file_variables = list(ds.data_vars)
# test file extention
if file_extention != 'nc':
f.write(' !! ' + file_name + ' is not a NETCDF file. The compliance check is ignored.'+'\n')
#f.write (' \n')
else:
# test if the structure of the file name is correct
if int(len(file_name_split)) == 5:
# NAMING TEST
# test if experiment name (host directory) and exp in variable file name are the same.
# name of the experiment in the file name.
experiment_varname = file_name_split[4][:-3]
if experiment_varname == experiment_name:
# test IF the file is not a scalar variable then run check ELSE check next variable
if considered_variable in variables:
f.write (' \n')
f.write('Experiment: '+ experiment_name + ' - File: ' + file_name + '\n')
f.write(' \n')
# TEST data dimensions: x,y,t ok?
header_ds = ds.to_dict(data=False)
dim = set(list(header_ds['coords'].keys()))
#perform compliance even is time is missing. check on time is managed below
if set(['x','y']).issubset(dim):
# NAMING TEST
if region.upper() in ['AIS', 'GIS']:
#f.write('Studied Region: ' + region + '\n')
if region == 'AIS':
# AIS Grid
grid_extent = [-3040000,-3040000,3040000,3040000]
possible_resolution = [1,2,4,8,16,32]
else:
# GIS Grid
grid_extent = [-720000,-3450000,960000,-570000]
possible_resolution = [1,2,4,5,10,20]
for ivar in file_variables:
if ivar in ismip_var:
f.write('** Tested Variable: '+ ivar +'\n')
f.write (' \n')
# get index in the ismip_var list
var_index = [k for k in range(len(ismip_var)) if ismip_var[k]==ivar]
# NUMERICAL TESTS
f.write('NUMERICAL Tests \n')
# check the unit
if ds[ivar].attrs['units'] == ismip_meta[var_index[0]]['units']:
f.write(' - The unit is correct: ' + ds[ivar].attrs['units']+'\n')
else:
f.write(' - ERROR: The unit of the variable is ' + ds[ivar].attrs['units'] + ' and should be ' + ismip_meta[var_index[0]]['units']+' \n')
var_num_errors += 1
# check if the array is full of NAN values
if False in ds[ivar].isnull():
# check the min value
if ds[ivar].min(skipna=True).item()>=ismip_meta[var_index[0]]['min_value_'+region.lower()]:
f.write(' - The minimum value successfully verified.\n')
else:
f.write(' - ERROR: The minimum value (' + str(ds[ivar].min(skipna=True).values.item(0)) + ') is out of range. Min value accepted: ' + str(ismip_meta[var_index[0]]['min_value_'+region.lower()])+'\n')
var_num_errors += 1
# check the max value
if ds[ivar].max(skipna=True).item()<=ismip_meta[var_index[0]]['max_value_'+region.lower()]:
f.write(' - The maximum value successfully verified.\n')
else:
f.write(' - ERROR: The maximum value (' + str(ds[ivar].max(skipna=True).values.item(0)) + ') is out of range. Max value accepted: ' + str(ismip_meta[var_index[0]]['max_value_'+region.lower()])+'\n')
var_num_errors += 1
else:
f.write(' - ERROR: The array only contains Nan values.\n')
var_num_errors += 1
# SPATIAL TESTS
# SPATIAL:Check spatial extent of the grid
f.write('SPATIAL Tests \n')
# get the grid from the file
coords = ds.coords.to_dataset()
Xbottomleft=int(min(coords['x']).values.item())
Ybottomleft=int(min(coords['y']).values.item())
Xtopright=int(max(coords['x']).values.item())
Ytopright=int(max(coords['y']).values.item())
if Xbottomleft == grid_extent[0] & Ybottomleft == grid_extent[1]:
f.write(' - Grid: Lowest left corner is well defined.\n')
else:
f.write(' - ERROR: Lowest left corner of the grid [' + str(Xbottomleft) + ',' + str(Ybottomleft) + '] is not correctly defined. [' + str(grid_extent[0])+ ',' + str(grid_extent[1]) + '] Expected\n')
var_spatial_errors += 1
if Xtopright == grid_extent[2] & Ytopright == grid_extent[3]:
f.write(' - Grid: Upper right corner is well defined.\n')
else:
f.write(' - ERROR: Upper rigth corner of the grid [' + str(Xtopright) + ',' + str(Ytopright) + '] is not correctly defined. [' + str(grid_extent[0]) + ',' + str(grid_extent[1])+ '] Expected\n')
var_spatial_errors += 1
#SPATIAL:check the spatial resolution
Xresolution = round((coords['x'][1].values-coords['x'][0].values)/1000,0)
Yresolution = round((coords['y'][1].values-coords['y'][0].values)/1000,0)
if Xresolution in set(possible_resolution) and Yresolution in set(possible_resolution):
if Xresolution == grid_resolution and Yresolution == grid_resolution:
f.write(' - The grid resolution (' + str(Xresolution) + ') was successfully verified.\n')
else:
f.write(' - ERROR: The grid resolution ( ' + str(Xresolution) + ' or ' + str(Yresolution) + ') is different of ' + str(grid_resolution) + 'declared in the file name.\n')
var_spatial_errors += 1
else:
f.write(' - Error: x: ' + str(Xresolution) + ',y: ' + str(Yresolution) + ' is not an authorized grid resolution.\n')
var_spatial_errors += 1
# TIME TESTS
f.write('TIME Tests \n')
#check if time dimension is not missing
if set(['t']).issubset(dim) or set(['time']).issubset(dim):
iteration = len(ds.coords['time'])
start_exp = min(ds['time']).values.astype("datetime64[D]")
end_exp = max(ds['time']).values.astype("datetime64[D]")
avgyear = 365 # pedants definition of a year length with leap years
duration_days = (end_exp - start_exp)
duration_years = duration_days.astype('timedelta64[Y]')/np.timedelta64(1,'Y')
index_exp=[dic['experiment'] for dic in experiments].index(experiment_name)
#test if start_exp and end_exp are datetime format
if np.issubdtype(start_exp.dtype, np.datetime64) & np.issubdtype(start_exp.dtype, np.datetime64):
#check Monotonicity of the time serie
if strictly_increasing(ds.coords['time']):
# test Time step : should be 360<timestep<367
if isinstance((ds['time'].values[1]-ds['time'].values[0]),datetime.timedelta):
time_step = (ds['time'].values[1]-ds['time'].values[0]).days
else:
if isinstance((ds['time'].values[1]-ds['time'].values[0]),np.timedelta64):
time_step = np.timedelta64(ds['time'].values[1]-ds['time'].values[0], 'D')/ np.timedelta64(1, 'D')
else:
time_step = ds['time'].values[1]-ds['time'].values[10]
if 360<=time_step<=367:
f.write(' - Time step: ' + str(time_step) + ' days' + '\n')
else:
f.write(' - ERROR: the time step(' + str(time_step) + ') should be comprised between [360,367].\n')
var_time_errors += 1
# test duration (iteration = length of the coords 'time')
duration_days = pd.to_timedelta(time_step * iteration,'D')
duration_years = round(pd.to_numeric(duration_days.days / avgyear))
if duration_years == experiments[index_exp]['duration']:
f.write(" - Experiment lasts " + str(duration_years) + ' years.\n')
# test Starting date
dateformat_start_exp = datetime.datetime(start_exp.item().year, start_exp.item().month, start_exp.item().day)
if experiments[index_exp]['startinf'] <= dateformat_start_exp <= experiments[index_exp]['startsup']:
f.write(' - Experiment starts correctly on ' + start_exp.item().strftime('%Y-%m-%d') + '.\n')
else:
f.write(' - ERROR: the experiment starts the ' + start_exp.item().strftime('%Y-%m-%d') + '. The date should be comprised between ' + experiments[index_exp]['startinf'].strftime('%Y-%m-%d') + ' and ' + experiments[index_exp]['startsup'].strftime('%Y-%m-%d')+'\n')
var_time_errors += 1
# test Ending date
dateformat_end_exp = datetime.datetime(end_exp.item().year, end_exp.item().month, end_exp.item().day)
if experiments[index_exp]['endinf'] <= dateformat_end_exp <= experiments[index_exp]['endsup']:
f.write(' - Experiment ends correctly on ' + end_exp.item().strftime('%Y-%m-%d') + '.\n')
else:
f.write(' - ERROR: the experiment ends on ' + end_exp.item().strftime('%Y-%m-%d') + '. The date should be comprised between ' + experiments[index_exp]['endinf'].strftime('%Y-%m-%d') + ' and ' + experiments[index_exp]['endsup'].strftime('%Y-%m-%d')+'\n')
var_time_errors += 1
else:
end_date = start_exp + np.timedelta64(experiments[2]['duration']*365,'D')
f.write(' - ERROR: the experiment lasts ' + str(duration_years) + ' years. The duration should be ' + str(experiments[index_exp]['duration']) + ' years\n')
f.write(' - As the experiment started on ' + start_exp.item().strftime('%Y-%m-%d') + ' , it should end on '+ end_date.item().strftime('%Y-%m-%d')+'\n')
var_time_errors += 1
else: #time serie not monotonous
f.write(' - ERROR: the time serie is not monotonous. Time segments have probably been concatenate in a wrong order.\n')
var_time_errors += 1
else:
#not a datetime format
f.write(' - ERROR: the time format of the Netcdf file is not recognized.Time Tests have been ignored.\n')
var_time_errors += 1
else: #Time dimension is missing
f.write(' - ERROR: The time dimensions is missing. Time Tests have been ignored.\n')
var_time_errors += 1
else:
# NAMING TEST
f.write('- ERROR: Region ' + region + ' not recognized. It should be AIS or GIS. The compliance check has been interrupted for this variable.\n')
report_naming_issues.append('Compliance check ignored: region (AIS/GIS) not identified in the file ' + file_name + ' due to wrong naming.')
var_naming_errors += 1
else:
## TEST data dimensions: x or y is missing
f.write('- ERROR: Compliance check ignored: x or y in the mandatory dimensions (x,y,t) is missing.\n')
f.write(' Only ' + str(list(header_ds['coords'].keys())) + ' has been detected.\n')
report_naming_issues.append('Compliance check ignored: x or y in the mandatory dimensions (x,y,t) is missing in ' + file_name )
var_naming_errors += 1
var_errors = var_errors + var_naming_errors + var_num_errors + var_spatial_errors + var_time_errors
var_warnings = var_warnings + var_num_warnings + var_spatial_warnings + var_time_warnings
f.write('\n')
f.write('----------------------------------------------------------\n')
f.write(experiment_name + ' - ' + considered_variable + ' - File:' + file_name+'\n')
if var_errors > 0:
f.write(str(var_errors) + ' error(s). Please review before sharing.'+'\n')
else:
f.write('No errors. Good job !'+'\n')
if var_warnings > 0:
f.write(str(var_warnings) + ' warning(s). Please review before sharing.'+'\n')
else:
f.write('No warnings.'+'\n')
f.write('----------------------------------------------------------\n')
else:
# NAMING TEST
f.write(' - ERROR: in the file name ' + file_name + ', the experiment name ('+experiment_varname+') do not match the directory name: ' + experiment_name + '.\n')
report_naming_issues.append('Compliance check ignored: in the file name ' + file_name + ', the experiment name (' + experiment_varname + ') do not match the directory name: ' + experiment_name + '.\n')
var_naming_errors += 1
var_errors = var_errors + var_naming_errors + var_num_errors + var_spatial_errors + var_time_errors
var_warnings = var_warnings + var_num_warnings + var_spatial_warnings + var_time_warnings
else:
# NAMING TEST
f.write(' - ERROR: the file name ' + file_name + ' do not follow the naming convention.\n')
report_naming_issues.append('Compliance check ignored: file ' + file_name + ' do not follow the naming convention.')
var_naming_errors += 1
var_errors = var_errors + var_naming_errors + var_num_errors + var_spatial_errors + var_time_errors
var_warnings = var_warnings + var_num_warnings + var_spatial_warnings + var_time_warnings
exp_naming_errors = exp_naming_errors + var_naming_errors
exp_num_errors = exp_num_errors + var_num_errors
exp_spatial_errors = exp_spatial_errors + var_spatial_errors
exp_time_errors = exp_time_errors + var_time_errors
exp_errors = exp_time_errors + exp_spatial_errors + exp_num_errors + exp_naming_errors+exp_file_errors
exp_num_warnings = exp_num_warnings + var_num_warnings
exp_spatial_warnings = exp_spatial_warnings + var_spatial_warnings
exp_time_warnings = exp_time_warnings + var_time_warnings
else:
f.write('\n ')
f.write('**********************************************************\n')
f.write(' ** Experiment: ' + experiment_name + ' \n ')
f.write('**********************************************************\n')
f.write('\n ')
f.write('ERROR: The compliance check is ignored for experiment ' + experiment_name + ' as it is not in ' + [exp['experiment'] for exp in experiments] +'. \n')
exp_naming_errors +=1
exp_errors = exp_time_errors + exp_spatial_errors + exp_num_errors + exp_naming_errors + exp_file_errors
report_naming_issues.append('Compliance check ignored : experiment ' + experiment_name + ' not in the experiments list.')
print(experiment_name,': compliance check processed.')
if exp_errors >0:
print('Found' , exp_errors , 'errors. Check compliance_checker_log.txt for details.')
else:
print('Successfully verified with no errors')
print( )
# Update counters.
total_naming_errors += exp_naming_errors
total_num_errors += exp_num_errors
total_spatial_errors += exp_spatial_errors
total_time_errors += exp_time_errors
total_file_errors += exp_file_errors
total_errors = total_naming_errors + total_num_errors + total_spatial_errors + total_time_errors + total_file_errors
#feedback terminal
print('-------------------------------------------------------------------------')
print(source_path,': compliance check processed.')
if total_errors >0:
print('Found a total of' , total_errors , 'errors. Check compliance_checker_log.txt for details.')
else:
print('Successfully verified with no errors')
print('-------------------------------------------------------------------------')
###################################################
# insert synthesis at the top of the log file
###################################################
with open(os.path.join(source_path,'compliance_checker_log.txt'), "r") as f:
contents = f.readlines()
# lines insert position
iline = 11
contents.insert(iline, str(exp_counter) + ' experiments checked.\n')
iline += 1
contents.insert(iline, str(file_counter) + ' files checked (Scalar files are ignored).\n')
iline += 2
contents.insert(iline, str(total_errors) + ' error(s) detected.\n')
iline += 1
contents.insert(iline, ' - Mandatory variables: ' + str(total_file_errors) + ' error(s)\n')
iline += 1
contents.insert(iline, ' - Naming Tests : ' + str(total_naming_errors) + ' error(s)\n')
iline += 1
contents.insert(iline, ' - Numerical Tests : ' + str(total_num_errors) + ' error(s)\n')
iline += 1
contents.insert(iline, ' - Spatial Tests : ' + str(total_spatial_errors) + ' error(s)\n')
iline += 1
contents.insert(iline, ' - Time Tests : ' + str(total_time_errors) + ' error(s)\n')
iline += 2
contents.insert(iline, str(total_warnings) + ' warning(s) detected.\n')
iline += 2
if total_naming_errors > 0 :
contents.insert(iline, 'Naming tests errors report: \n' )
iline += 1
for i in range(iline,len(report_naming_issues)):
contents.insert(i, ' - ' + report_naming_issues[i-24] + '\n')
contents.insert(iline+len(report_naming_issues), '\n')
with open(os.path.join(source_path,'compliance_checker_log.txt'), "w") as f:
f.writelines(contents)
except TypeError as err:
print('Something went wrong with your dataset. Please, check your file(s) carrefully. Erreur :', err)