Skip to content

Commit

Permalink
- Associated fields now correctly used to set quality flag.
Browse files Browse the repository at this point in the history
- Fix for broken DAYCLI sequence and phenomenonTime.
- First order statistic appended to observed property when present.
  • Loading branch information
david-i-berry committed Nov 20, 2024
1 parent 66ff091 commit b361379
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 31 deletions.
104 changes: 73 additions & 31 deletions bufr2geojson/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@
"Pa": "hPa"
}

with open(f"{RESOURCES}{os.sep}031021.json") as fh:
ASSOCIATED_FIELDS = json.load(fh)

# list of BUFR attributes
ATTRIBUTES = ['code', 'units', 'scale', 'reference', 'width']

Expand Down Expand Up @@ -184,6 +187,7 @@ def __init__(self, raise_on_error=False):
"09": {}, # reserved
"22": {}, # some sst sensors in class 22
"25": {}, # processing information
"31": {}, # associated field significance
"33": {}, # BUFR/CREX quality information
"35": {} # data monitoring information
}
Expand Down Expand Up @@ -258,7 +262,7 @@ def get_qualifiers(self) -> list:
"""

classes = ("01", "02", "03", "04", "05", "06",
"07", "08", "22", "25", "35")
"07", "08", "22", "25", "31", "33", "35")

identification = {}
wigos_md = {}
Expand Down Expand Up @@ -797,6 +801,10 @@ def as_geojson(self, bufr_handle: int, id: str,
while codes_bufr_keys_iterator_next(key_iterator):
# get key
key = codes_bufr_keys_iterator_get_name(key_iterator)
if "associatedField" in key: # we've already processed, skip
last_key = key
continue

# identify what we are processing
if key in (HEADERS + ECMWF_HEADERS + UNEXPANDED_DESCRIPTORS):
continue
Expand All @@ -812,6 +820,41 @@ def as_geojson(self, bufr_handle: int, id: str,
f = int(fxxyyy[0:1])
xx = int(fxxyyy[1:3])
yyy = int(fxxyyy[3:6])

# because of the way eccode works we need to check for associated
# fields. These are returned after
associated_field = None
try:
associated_field_value = codes_get(bufr_handle, f"{key}->associatedField")
associated_field = codes_get(bufr_handle, f"{key}->associatedField->associatedFieldSignificance")
associated_field = f"{associated_field}"
associated_field = ASSOCIATED_FIELDS.get(associated_field)
except:
pass

if associated_field is not None:
flabel = associated_field.get('label','')
ftype = associated_field.get('type','')
if ftype == 'int':
associated_field_value = f"{int(associated_field_value)}"
associated_field_value = \
associated_field.get('values',{}).get(associated_field_value,'')
else:
funits = associated_field.get('units', '')
associated_field_value = f"{associated_field_value} {funits}"
quality_flag = {
'inScheme': "https://codes.wmo.int/bufr4/codeflag/0-31-021",
'flag': flabel,
'flagValue': associated_field_value
}
else:
quality_flag = {
'inScheme': None,
'flag': None,
'flagValue': None
}

assert f == 0
# get value and attributes
# get as array and convert to scalar if required
value = codes_get_array(bufr_handle, key)
Expand Down Expand Up @@ -891,14 +934,23 @@ def as_geojson(self, bufr_handle: int, id: str,

# determine whether we have data or metadata
append = False
if xx < 9:
if xx < 9: # metadata / significance qualifiers
if ((xx >= 4) and (xx < 8)) and (key == last_key):
append = True
self.set_qualifier(fxxyyy, key, value, description,

if fxxyyy == "004023" and sequence == "307075": # fix for broken DAYCLI sequence
self.set_qualifier(fxxyyy, key, value, description,
attributes, append)
self.set_qualifier(fxxyyy, key, value+1, description,
attributes, append)
else:
self.set_qualifier(fxxyyy, key, value, description,
attributes, append)
last_key = key
continue
elif xx == 31:
if yyy in (12, 31):
raise NotImplementedError
last_key = key
continue
elif xx in (25, 33, 35):
Expand All @@ -920,16 +972,6 @@ def as_geojson(self, bufr_handle: int, id: str,
# self.get_identification()
metadata = self.get_qualifiers()
metadata["BUFR_element"] = fxxyyy
# metadata["provenance"] = headers.copy()


#metadata_hash = hashlib.md5(json.dumps(metadata).encode("utf-8")).hexdigest() # noqa
#md = {
# "id": metadata_hash,
# "metadata": list()
#}
#for idx in range(len(metadata)):
# md["metadata"].append(metadata[idx])

observing_procedure = "http://codes.wmo.int/wmdr/SourceOfObservation/unknown" # noqa

Expand Down Expand Up @@ -957,6 +999,13 @@ def as_geojson(self, bufr_handle: int, id: str,
else:
result_time = phenomenon_time

# check if we have statistic, if so modify observed_property
fos = self.get_qualifier("08","first_order_statistics",None)
observed_property = f"{key}"
if fos is not None:
fos = fos.get("description","")
observed_property = f"{key} ({fos.lower()})"

data = {
"geojson": {
"id": feature_id,
Expand All @@ -971,7 +1020,7 @@ def as_geojson(self, bufr_handle: int, id: str,
"host": host_id, # noqa
"observer": None,
"observationType": observation_type, # noqa
"observedProperty": key,
"observedProperty": observed_property,
"observingProcedure": observing_procedure,
"phenomenonTime": phenomenon_time,
"resultTime": result_time,
Expand All @@ -982,11 +1031,7 @@ def as_geojson(self, bufr_handle: int, id: str,
"standardUncertainty": None
},
"resultQuality": [
{
"inScheme": None,
"flag": None,
"flagValue": None
}
quality_flag
],
"parameter": {
"hasProvenance": None,
Expand All @@ -1013,8 +1058,7 @@ def as_geojson(self, bufr_handle: int, id: str,
"_meta": {
"data_date": self.get_time(),
"identifier": feature_id,
"geometry": self.get_location() # ,
# "metadata_hash": metadata_hash
"geometry": self.get_location()
},
"_headers": headers.copy()
}
Expand All @@ -1040,16 +1084,14 @@ def transform(data: bytes, guess_wsi: bool = False,


# get message
bulletins = []
position = 0
print(len(data))
while position < len(data):
bulletin_start = data.find(b"BUFR", position)
bulletin_end = data.find(b"7777", position)
print(bulletin_start, bulletin_end)
position = bulletin_end + 4
if -1 in (bulletin_start, bulletin_end):
break
#bulletins = []
#position = 0
#while position < len(data):
# bulletin_start = data.find(b"BUFR", position)
# bulletin_end = data.find(b"7777", position)
# position = bulletin_end + 4
# if -1 in (bulletin_start, bulletin_end):
# break

# eccodes needs to read from a file, create a temporary fiole
tmp = tempfile.NamedTemporaryFile()
Expand Down
82 changes: 82 additions & 0 deletions bufr2geojson/resources/031021.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"1": {
"type": "int",
"label": "1-bit indicator of quality",
"values": {
"0": "Good",
"1": "Suspect or bad"
}
},
"2": {
"type": "int",
"label": "2-bit indicator of quality",
"values": {
"0" : "Good",
"1" : "Slightly suspect",
"2" : "Highly suspect",
"3" : "Bad"
}
},
"5": {
"type": "int",
"label": "8-bit indicator of quality",
"values": {
"0": "Data checked and declared good",
"1": "Data checked and declared suspect",
"2": "Data checked and declared aggregated",
"3": "Data checked and declared out of instrument range",
"4": "Data checked, declared aggregated, and out of instrument range",
"5": "Parameter is not measured at the station",
"6": "Daily value not provided",
"7": "Data unchecked",
"255": "Missing (QC info not available)"
}
},
"6": {
"type": "int",
"label": "4-bit indicator of quality control class according to GTSPP",
"values": {
"0": "Unqualified",
"1": "Correct value (all checks passed)",
"2": "Probably good but value inconsistent with statistics (differ from climatology)",
"3": "Probably bad (spike, gradient, ... if other tests passed)",
"4": "Bad value, impossible value (out of scale, vertical instability, constant profile)",
"5": "Value modified during quality control",
"8": "Interpolated value",
"9": "Missing value"
}
},
"7": {
"type": "float",
"label": "Percentage confidence",
"units": "%"
},
"8": {
"type": "int",
"label": "2-bit indicator of quality (alt)",
"values": {
"0": "Not suspected",
"1": "Suspected",
"2": "Reserved",
"3": "Information not required"
}
},
"9": {
"type": "int",
"label": "Status of ancillary data",
"values": {
"0": "Data present, good, collocated",
"1": "Data available but of degraded quality and not used",
"2": "No spatiotemporally collocated data available",
"15": "Missing value"
}
},
"21": {
"type": "int",
"label": "1-bit indicator of correction",
"values": {
"0": "Original value",
"1": "Substituted/corrected value"
}
}
}

0 comments on commit b361379

Please sign in to comment.