diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3bc81ea..179f562 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9] + python-version: ["3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v2 @@ -36,6 +36,7 @@ jobs: python3 setup.py install - name: run tests ⚙️ run: | + export ECCODES_DEFINITION_PATH=/usr/share/miniconda/envs/__setup_conda/share/eccodes/definitions cd tests pytest - name: run flake8 ⚙️ diff --git a/Dockerfile b/Dockerfile index cdada7a..8482e13 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,28 +19,22 @@ # ############################################################################### -#FROM ubuntu:latest -FROM wmoim/dim_eccodes_baseimage:2.28.0 - +FROM wmoim/dim_eccodes_baseimage:jammy-2.36.0 ENV DEBIAN_FRONTEND="noninteractive" \ TZ="Etc/UTC" \ ECCODES_DIR=/opt/eccodes \ + ECCODES_DEFINITION_PATH=/opt/eccodes/share/eccodes/definitions \ PATH="${PATH}:/opt/eccodes/bin" -RUN echo "Acquire::Check-Valid-Until \"false\";\nAcquire::Check-Date \"false\";" | cat > /etc/apt/apt.conf.d/10no--check-valid-until \ - && apt-get update -y \ - && apt-get install -y ${BUILD_PACKAGES} libudunits2-0 \ - && apt-get remove --purge -y ${BUILD_PACKAGES} \ - && apt autoremove -y \ - && apt-get -q clean \ - && rm -rf /var/lib/apt/lists/* - - -COPY . /tmp/bufr2geojson +# Install additional packges +RUN echo apt-get update -y \ + && apt-get upgrade -y \ + && apt-get install -y libudunits2-0 curl -RUN cd /tmp/bufr2geojson && python3 setup.py install -# clean up -RUN cd /tmp && rm -r bufr2geojson +WORKDIR /tmp +COPY . /tmp +RUN cd /tmp && python3 setup.py install +RUN cd /tmp && rm -r ./* -WORKDIR / +WORKDIR /local diff --git a/bufr2geojson/__init__.py b/bufr2geojson/__init__.py index 83f0f61..b38660d 100644 --- a/bufr2geojson/__init__.py +++ b/bufr2geojson/__init__.py @@ -26,6 +26,7 @@ import csv from datetime import datetime, timedelta import hashlib +from io import BytesIO import json import logging import os @@ -35,10 +36,9 @@ import tempfile from typing import Iterator, Union - from cfunits import Units from eccodes import (codes_bufr_new_from_file, codes_clone, - codes_get_array, codes_set, + codes_get_array, codes_set, codes_write, codes_release, codes_get, CODES_MISSING_LONG, CODES_MISSING_DOUBLE, codes_bufr_keys_iterator_new, @@ -50,17 +50,16 @@ LOGGER = logging.getLogger(__name__) -# some 'constants' +# some 'constants' / env variables SUCCESS = True NUMBERS = (float, int, complex) MISSING = ("NA", "NaN", "NAN", "None") -NULLIFY_INVALID = True # TODO: move to env. variable - -BUFR_TABLE_VERSION = 37 # default BUFR table version +NULLIFY_INVALID = os.environ.get("BUFR2GEOJSON_NULLIFY_INVALID", True) THISDIR = os.path.dirname(os.path.realpath(__file__)) RESOURCES = f"{THISDIR}{os.sep}resources" +ASSOCIATED_FIELDS_FILE = f"{RESOURCES}{os.sep}031021.json" CODETABLES = {} - +FLAGTABLES = {} ECCODES_DEFINITION_PATH = codes_definition_path() if not os.path.exists(ECCODES_DEFINITION_PATH): LOGGER.debug('ecCodes definition path does not exist, trying environment') @@ -68,18 +67,31 @@ LOGGER.debug(f'ECCODES_DEFINITION_PATH: {ECCODES_DEFINITION_PATH}') if ECCODES_DEFINITION_PATH is None: raise EnvironmentError('Cannot find ecCodes definition path') - TABLEDIR = Path(ECCODES_DEFINITION_PATH) / 'bufr' / 'tables' / '0' / 'wmo' +# TODO - read preferred units from config file # PREFERRED UNITS PREFERRED_UNITS = { "K": "Celsius", "Pa": "hPa" } +# The following is required as the code table from ECMWF is incomplete +# and that from github/wmo-im not very usable. +try: + with open(ASSOCIATED_FIELDS_FILE) as fh: + ASSOCIATED_FIELDS = json.load(fh) +except Exception as e: + LOGGER.error(f"Error loading associated field table (031021) - {e}") + raise e + # list of BUFR attributes ATTRIBUTES = ['code', 'units', 'scale', 'reference', 'width'] +# Dictionary to store attributes for each element, caching is more +# efficient +_ATTRIBUTES_ = {} + # list of ecCodes keys for BUFR headers HEADERS = ["edition", "masterTableNumber", "bufrHeaderCentre", "bufrHeaderSubCentre", "updateSequenceNumber", "dataCategory", @@ -92,6 +104,7 @@ UNEXPANDED_DESCRIPTORS = ["unexpandedDescriptors"] +# list of headers added by ECMWF and ecCodes ECMWF_HEADERS = ["rdb", "rdbType", "oldSubtype", "localYear", "localMonth", "localDay", "localHour", "localMinute", "localSecond", "rdbtimeDay", "rdbtimeHour", "rdbtimeMinute", @@ -106,8 +119,19 @@ LOCATION_DESCRIPTORS = ["latitude", "latitude_increment", "latitude_displacement", "longitude", - "longitude_increment", "longitude_displacement", - "height_of_station_ground_above_mean_sea_level"] + "longitude_increment", "longitude_displacement"] + +ZLOCATION_DESCRIPTORS = ["height", "flight_level", "grid_point_altitude"] + +RELATIVE_OBS_HEIGHT = ["height_above_station", + "height_of_sensor_above_local_ground_or_deck_of_marine_platform", # noqa, land only + "height_of_sensor_above_water_surface", # noqa, marine only + "depth_below_land_surface", + "depth_below_water_surface" + ] + +OTHER_Z_DESCRIPTORS = ["geopotential", "pressure", "geopotential_height", + "water_pressure"] TIME_DESCRIPTORS = ["year", "month", "day", "hour", "minute", "second", "time_increment", "time_period"] @@ -121,6 +145,44 @@ "wigos_identifier_series", "wigos_issuer_of_identifier", "wigos_issue_number", "wigos_local_identifier_character"] +WSI_DESCRIPTORS = ["wigos_identifier_series", "wigos_issuer_of_identifier", + "wigos_issue_number", "wigos_local_identifier_character"] + +IDENTIFIERS_BY_TYPE = { + # 0 surface data (land) + "0": { + "0": ["block_number", "station_number"], + "1": ["block_number", "station_number"], + "2": ["block_number", "station_number"], + "3": ["ship_or_mobile_land_station_identifier"], + "4": ["ship_or_mobile_land_station_identifier"], + "5": ["ship_or_mobile_land_station_identifier"], + "default": ["block_number", "station_number"] + }, + + # 1 surface data (sea) + "1": { + "0": ["ship_or_mobile_land_station_identifier"], + "6": ["ship_or_mobile_land_station_identifier"], + "7": ["ship_or_mobile_land_station_identifier"], + "15": ["ship_or_mobile_land_station_identifier"], + "25": [], + "ship": ["ship_or_mobile_land_station_identifier"], + "buoy_5digit": ["region_number", "wmo_region_sub_area", "buoy_or_platform_identifier"], # noqa + "buoy_7digit": ["stationary_buoy_platform_identifier_e_g_c_man_buoys"] + # 7 digit id, 5 digit id (region, subarea, buoy id) + }, + # 2 vertical sounding (other than satellite) + "2": { + "default": ["block_number", "station_number"] + }, + # 31 oceanographic + "31": { + "default": "" + } +} + + # dictionary to store jsonpath parsers, these are compiled the first time that # they are used. jsonpath_parsers = dict() @@ -132,7 +194,8 @@ def __init__(self, raise_on_error=False): self.raise_on_error = raise_on_error - # dict to store qualifiers in force and for accounting + # dict to store qualifiers in force and for accounting, strictly only + # those < 9 remain in force but some others in practice are assumed to self.qualifiers = { "01": {}, # identification "02": {}, # instrumentation @@ -143,7 +206,11 @@ def __init__(self, raise_on_error=False): "07": {}, # location (vertical) "08": {}, # significance qualifiers "09": {}, # reserved - "22": {} # some sst sensors in class 22 + "22": {}, # some sst sensors in class 22 + "25": {}, # processing information + "31": {}, # associated field significance + "33": {}, # BUFR/CREX quality information + "35": {} # data monitoring information } def set_qualifier(self, fxxyyy: str, key: str, value: Union[NUMBERS], @@ -162,28 +229,29 @@ def set_qualifier(self, fxxyyy: str, key: str, value: Union[NUMBERS], :returns: None """ - - # get class of descriptor - xx = fxxyyy[1:3] - # first check whether the value is None, if so remove and exit - if [value, description] == [None, None]: - if key in self.qualifiers[xx]: - del self.qualifiers[xx][key] - else: - if key in self.qualifiers[xx] and append: - self.qualifiers[xx][key]["value"] = \ - [self.qualifiers[xx][key]["value"], value] + try: + # get class of descriptor + xx = fxxyyy[1:3] + # first check whether the value is None, if so remove and exit + if [value, description] == [None, None]: + if key in self.qualifiers[xx]: + del self.qualifiers[xx][key] else: - self.qualifiers[xx][key] = { - "code": fxxyyy, - "key": key, - "value": value, - "attributes": attributes, - "description": description - } - - def set_time_displacement(self, key, value, append=False): - raise NotImplementedError() + if key in self.qualifiers[xx] and append: + self.qualifiers[xx][key]["value"] = \ + [self.qualifiers[xx][key]["value"], value] + else: + self.qualifiers[xx][key] = { + "code": fxxyyy, + "key": key, + "value": value, + "attributes": attributes, + "description": description + } + except Exception as e: + LOGGER.error(f"Error in BUFRParser.set_qualifier: {e}") + if self.raise_on_error: + raise e def get_qualifier(self, xx: str, key: str, default=None) -> Union[NUMBERS]: """ @@ -207,16 +275,25 @@ def get_qualifier(self, xx: str, key: str, default=None) -> Union[NUMBERS]: return value - def get_qualifiers(self) -> list: + def get_qualifiers(self) -> dict: """ Function to return all qualifiers set (excluding special qualifiers such as date and time) - :returns: List containing qualifiers, their values and units + :returns: Dictionary containing qualifiers, their values and units - + grouped by class. """ - classes = ("01", "02", "03", "04", "05", "06", "07", "08", "22") - result = list() + classes = list(self.qualifiers.keys()) + + identification = {} + wigos_md = {} + qualifiers = {} + processing = {} + monitoring = {} + quality = {} + associated_field = {} + # name, value, units for c in classes: for k in self.qualifiers[c]: @@ -227,8 +304,9 @@ def get_qualifiers(self) -> list: continue if k in ID_DESCRIPTORS: continue + if c in ("04", "05", "06"): # , "07"): + LOGGER.warning(f"Unhandled location information {k}") # now remaining qualifiers - name = k value = self.qualifiers[c][k]["value"] units = self.qualifiers[c][k]["attributes"]["units"] description = self.qualifiers[c][k]["description"] @@ -236,16 +314,52 @@ def get_qualifiers(self) -> list: description = strip2(description) except AttributeError: pass - q = { - "name": name, - "value": value, - "units": units, - "description": description - } - result.append(q) + except Exception as e: + LOGGER.error(f"{e}") + + # set the qualifier value, result depends on type + if units in ("CODE TABLE", "FLAG TABLE"): + q = { + "value": value.copy() + } + elif units == "CCITT IA5": + q = {"value": description} + else: + q = { + "value": value, + "units": units, + "description": description + } + + # now assign to type of qualifier + if c == "01": + identification[k] = q.copy() + if c in ("02", "03", "07", "22"): + wigos_md[k] = q.copy() + if c in ("08", "09"): + qualifiers[k] = q.copy() + if c == "25": + processing[k] = q.copy() + if c == "31": + associated_field[k] = q.copy() + if c == "33": + quality[k] = q.copy() + if c == "35": + monitoring[k] = q.copy() + + result = { + "identification": identification, + "instrumentation": wigos_md, + "qualifiers": qualifiers, + "processing": processing, + "monitoring": monitoring, + "quality": quality, + "associated_field": associated_field + } + return result - def get_location(self) -> Union[dict, None]: + def get_location(self, bufr_class: int = None) -> Union[dict, None]: """ Function to get location from qualifiers and to apply any displacements or increments @@ -256,7 +370,6 @@ def get_location(self) -> Union[dict, None]: """ # first get latitude - #if not (("005001" in self.qualifiers["05"]) ^ ("005002" in self.qualifiers["05"])): # noqa if "latitude" not in self.qualifiers["05"]: LOGGER.warning("Invalid location in BUFR message, no latitude") LOGGER.warning(self.qualifiers["05"]) @@ -270,8 +383,7 @@ def get_location(self) -> Union[dict, None]: if "latitude_displacement" in self.qualifiers["05"]: # noqa y_displacement = deepcopy(self.qualifiers["05"]["latitude_displacement"]) # noqa latitude["value"] += y_displacement["value"] - latitude = round(latitude["value"], - latitude["attributes"]["scale"]) + latitude = round(latitude["value"], latitude["attributes"]["scale"]) # noqa # now get longitude if "longitude" not in self.qualifiers["06"]: @@ -290,13 +402,8 @@ def get_location(self) -> Union[dict, None]: # round to avoid extraneous digits longitude = round(longitude["value"], longitude["attributes"]["scale"]) # noqa - # now station elevation - if "height_of_station_ground_above_mean_sea_level" in self.qualifiers["07"]: # noqa - elevation = deepcopy(self.qualifiers["07"]["height_of_station_ground_above_mean_sea_level"]) # noqa - elevation = round(elevation["value"], elevation["attributes"]["scale"]) # noqa - else: - elevation = None - # no elevation displacement in BUFR + z = self.get_zcoordinate(bufr_class) + height = z.get('z_amsl', {}).get('value') # check for increments, not yet implemented if "005011" in self.qualifiers["05"] or \ @@ -307,8 +414,8 @@ def get_location(self) -> Union[dict, None]: location = [longitude, latitude] - if elevation is not None: - location.append(elevation) + if height is not None: + location.append(height) if None in location: LOGGER.debug('geometry contains null values; setting to None') @@ -318,6 +425,103 @@ def get_location(self) -> Union[dict, None]: "coordinates": location } + def get_zcoordinate(self, bufr_class: int = None) -> Union[dict, None]: + # class 07 gives vertical coordinate + result = {} + + # 1) Height of sensor above local ground + height of station AMSL + # 2) Height of barometer AMSL + # 3) Height or altitude + # 4) Geopotential + # 5) Pressure + # 6) Height above station + height of station AMSL + # 7) Height + # 8) Geopotential height + + + station_ground = self.qualifiers["07"].get("height_of_station_ground_above_mean_sea_level",None) # noqa + + abs_height = [] + if bufr_class == 10: + if "height_of_barometer_above_mean_sea_level" in self.qualifiers["07"]: # noqa + abs_height.append("height_of_barometer_above_mean_sea_level") + else: + for k in ZLOCATION_DESCRIPTORS: + if k in self.qualifiers["07"]: + abs_height.append(k) + + rel_height = [] + for k in RELATIVE_OBS_HEIGHT: + if k in self.qualifiers["07"]: + rel_height.append(k) + + other_height = [] + for k in OTHER_Z_DESCRIPTORS: + if k in self.qualifiers["07"]: + other_height.append(k) + + # if we have other heights we want to nullify abs and rel + if len(other_height) == 1: + abs_height = [] + rel_height = [] + + # check we have as many heights as expected + if len(abs_height) > 1: + LOGGER.warning("Multiple absolute heights found, setting to None. See metadata") # noqa + abs_height = [] + + if len(rel_height) > 1: + LOGGER.warning("Multiple relative heights found, setting to None. See metadata") # noqa + rel_height = [] + + if len(other_height) > 1: + LOGGER.warning("Multiple other heights found, setting to None. See metadata") # noqa + other_height = [] + + z_amsl = None + z_alg = None + z_other = None + + if len(rel_height) == 1 and station_ground is not None: + assert station_ground.get('attributes').get('units') == self.qualifiers["07"].get(rel_height[0]).get('attributes').get('units') # noqa + z_amsl = station_ground.get('value') + self.qualifiers["07"].get(rel_height[0], {}).get('value') # noqa + z_alg = self.qualifiers["07"].get(rel_height[0], {}).get('value') + if 'depth' in rel_height[0]: + z_alg = -1 * z_alg + elif len(abs_height) == 1 and station_ground is not None: + z_amsl = self.qualifiers["07"].get(abs_height[0], {}).get('value') + z_alg = z_amsl - station_ground.get('value') + else: + if len(abs_height) == 1: + z_amsl = self.qualifiers["07"].get(abs_height[0], {}).get('value') # noqa + if len(rel_height) == 1: + z_alg = self.qualifiers["07"].get(rel_height[0], {}).get('value') # noqa + + if len(other_height) == 1: + z_other = self.qualifiers["07"].get(other_height[0], {}) + + if z_amsl is not None: + result['z_amsl'] = { + 'name': 'height_above_mean_sea_level', + 'value': z_amsl, + 'units': 'm' + } + + if z_other is not None: + result['z'] = { + 'name': z_other.get('key'), + 'value': z_other.get('value'), + 'units': z_other.get('attributes').get('units') + } + elif z_alg is not None: + result['z'] = { + 'name': 'height_above_local_ground', + 'value': z_alg, + 'units': 'm' + } + + return result + def get_time(self) -> str: """ Function to get time from qualifiers and to apply any displacements or @@ -351,8 +555,15 @@ def get_time(self) -> str: else: offset = 0 time_ = f"{year:04d}-{month:02d}-{day:02d} {hour:02d}:{minute:02d}:{second:02d}" # noqa - time_ = datetime.strptime(time_, "%Y-%m-%d %H:%M:%S") - time_ = time_ + timedelta(days=offset) + + try: + time_ = datetime.strptime(time_, "%Y-%m-%d %H:%M:%S") + time_ = time_ + timedelta(days=offset) + except Exception as e: + LOGGER.error(e) + LOGGER.debug(time_) + raise e + time_list = None # check if we have any increment descriptors, not yet supported @@ -392,7 +603,9 @@ def get_time(self) -> str: value = [value, 0] else: value = [0, value] + time_list = [None] * len(value) + for tidx in range(len(value)): time_list[tidx] = deepcopy(time_) if units not in ("years", "months"): @@ -545,7 +758,7 @@ def get_identification(self, guess_wsi: bool = False) -> dict: # 7 digit buoy number # 001087 - _type = "marine_observing_platform_identifier" + _type = "7_digit_marine_observing_platform_identifier" if _type in self.qualifiers["01"]: id_ = self.get_qualifier("01", _type) tsi = strip2(id_) @@ -559,7 +772,7 @@ def get_identification(self, guess_wsi: bool = False) -> dict: return { "wsi": wsi, "tsi": tsi, - "type": "7_digit_marine_observing_platform_identifier" + "type": _type } # flag if we do not have WSI @@ -598,8 +811,37 @@ def get_code_value(self, fxxyyy: str, code: int) -> str: return decoded + def get_flag_value(self, fxxyyy: str, flags: str) -> str: + if flags is None: + return None + table = int(fxxyyy) + if self.table_version not in FLAGTABLES: + FLAGTABLES[self.table_version] = {} + + if fxxyyy not in FLAGTABLES[self.table_version]: + FLAGTABLES[self.table_version][fxxyyy] = {} + tablefile = TABLEDIR / str(self.table_version) / 'codetables' / f'{table}.table' # noqa + with tablefile.open() as csvfile: + reader = csv.reader(csvfile, delimiter=" ") + for row in reader: + FLAGTABLES[self.table_version][fxxyyy][int(row[0])] = " ".join(row[2:]) # noqa + + flag_table = FLAGTABLES[self.table_version][fxxyyy] + + bits = [int(flag) for flag in flags] + nbits = len(bits) + values = [] + for idx in range(nbits): + if bits[idx]: + key = idx+1 + value = flag_table.get(key) + if value is not None: + values.append(value) + + return values + def as_geojson(self, bufr_handle: int, id: str, - serialize: bool = False, guess_wsi: bool = False) -> dict: + guess_wsi: bool = False) -> dict: """ Function to return GeoJSON representation of BUFR message @@ -644,21 +886,22 @@ def as_geojson(self, bufr_handle: int, id: str, headers[header] = codes_get(bufr_handle, header) except Exception as e: if header == "subsetNumber": - LOGGER.warning("subsetNumber not found, continuing") continue LOGGER.error(f"Error reading {header}") raise e - characteristic_date = headers["typicalDate"] - characteristic_time = headers["typicalTime"] + self.reportType = headers.get('dataCategory') + + # characteristic_date = headers["typicalDate"] + # characteristic_time = headers["typicalTime"] try: sequence = codes_get_array(bufr_handle, UNEXPANDED_DESCRIPTORS[0]) + sequence = sequence.tolist() except Exception as e: LOGGER.error(f"Error reading {UNEXPANDED_DESCRIPTORS}") raise e - - sequence = sequence.tolist() + # convert to string sequence = [f"{descriptor}" for descriptor in sequence] sequence = ",".join(sequence) headers["sequence"] = sequence @@ -668,7 +911,6 @@ def as_geojson(self, bufr_handle: int, id: str, key_iterator = codes_bufr_keys_iterator_new(bufr_handle) # set up data structures - data = {} last_key = None index = 0 @@ -676,6 +918,10 @@ def as_geojson(self, bufr_handle: int, id: str, while codes_bufr_keys_iterator_next(key_iterator): # get key key = codes_bufr_keys_iterator_get_name(key_iterator) + if "associatedField" in key: # we've already processed, skip + last_key = key + continue + # identify what we are processing if key in (HEADERS + ECMWF_HEADERS + UNEXPANDED_DESCRIPTORS): continue @@ -687,10 +933,49 @@ def as_geojson(self, bufr_handle: int, id: str, continue # get class + # get class etc + f = int(fxxyyy[0:1]) xx = int(fxxyyy[1:3]) + yyy = int(fxxyyy[3:6]) + + # because of the way eccode works we need to check for associated + # fields. These are returned after + associated_field = None + try: + associated_field_value = codes_get(bufr_handle, f"{key}->associatedField") # noqa + associated_field = codes_get(bufr_handle, f"{key}->associatedField->associatedFieldSignificance") # noqa + associated_field = f"{associated_field}" + associated_field = ASSOCIATED_FIELDS.get(associated_field) + except Exception: + pass + + if associated_field is not None: + flabel = associated_field.get('label', '') + ftype = associated_field.get('type', '') + if ftype == 'int': + associated_field_value = f"{int(associated_field_value)}" + associated_field_value = \ + associated_field.get('values',{}).get(associated_field_value, '') # noqa + else: + funits = associated_field.get('units', '') + associated_field_value = f"{associated_field_value} {funits}" # noqa + quality_flag = { + 'inScheme': "https://codes.wmo.int/bufr4/codeflag/0-31-021", # noqa + 'flag': flabel, + 'flagValue': associated_field_value + } + else: + quality_flag = { + 'inScheme': None, + 'flag': None, + 'flagValue': None + } + + assert f == 0 # get value and attributes # get as array and convert to scalar if required value = codes_get_array(bufr_handle, key) + _value = None if (len(value) == 1) and (not isinstance(value, str)): value = value[0] if value in (CODES_MISSING_DOUBLE, CODES_MISSING_LONG): @@ -706,24 +991,49 @@ def as_geojson(self, bufr_handle: int, id: str, # get attributes attributes = {} - for attribute in ATTRIBUTES: - attribute_key = f"{key}->{attribute}" - try: - attribute_value = codes_get(bufr_handle, attribute_key) - except Exception as e: - LOGGER.warning(f"Error reading {attribute_key}: {e}") - attribute_value = None - if attribute_value is not None: - attributes[attribute] = attribute_value + if fxxyyy in _ATTRIBUTES_: + attributes = _ATTRIBUTES_[fxxyyy] + attributes = attributes.copy() + else: + for attribute in ATTRIBUTES: + attribute_key = f"{key}->{attribute}" + try: + attribute_value = codes_get(bufr_handle, attribute_key) + except Exception as e: + LOGGER.warning(f"Error reading {attribute_key}: {e}") + attribute_value = None + if attribute_value is not None: + attributes[attribute] = attribute_value + _ATTRIBUTES_[fxxyyy] = attributes.copy() units = attributes["units"] + # scale = attributes["scale"] + # next decoded value if from code table description = None - if attributes["units"] == "CODE TABLE": + observation_type = "http://www.opengis.net/def/observationType/OGC-OM/2.0/OM_Measurement" # noqa default type + if attributes["units"] == "CODE TABLE" and value is not None: description = self.get_code_value(attributes["code"], value) + observation_type = "http//www.opengis.net/def/observationType/OGC-OM/2.0/OM_CategoryObservation" # noqa + _value = { + 'codetable': f"http://codes.wmo.int/bufr4/codeflag/{f:1}-{xx:02}-{yyy:03}", # noqa + 'entry': f"{value}", # noqa + 'description': description + } + elif attributes["units"] == "FLAG TABLE" and value is not None: + observation_type = "http//www.opengis.net/def/observationType/OGC-OM/2.0/OM_CategoryObservation" # noqa + nbits = attributes['width'] + description = self.get_flag_value(attributes["code"], "{0:0{1}b}".format(value, nbits)) # noqa + _value = { + 'flagtable': f"http://codes.wmo.int/bufr4/codeflag/{f:1}-{xx:02}-{yyy:03}", # noqa + 'entry': "{0:0{1}b}".format(value, nbits), + 'description': description + } elif attributes["units"] == "CCITT IA5": description = value value = None + observation_type = "http//www.opengis.net/def/observationType/OGC-OM/2.0/OM_Observation" # noqa + if (units in PREFERRED_UNITS) and (value is not None): value = Units.conform(value, Units(units), Units(PREFERRED_UNITS[units])) @@ -732,98 +1042,155 @@ def as_geojson(self, bufr_handle: int, id: str, value = round(value, 6) units = PREFERRED_UNITS[units] attributes["units"] = units - # now process - # first process key to something more sensible + + if _value is not None: + value = _value.copy() + + # now process, convert key to snake case key = re.sub("#[0-9]+#", "", key) key = re.sub("([a-z])([A-Z])", r"\1_\2", key) key = key.lower() + + # determine whether we have data or metadata append = False - if xx < 9: + if xx < 9 and fxxyyy != '004053': # noqa - metadata / significance qualifiers. 0040552 is misplaced, it is not a time coordinate! if ((xx >= 4) and (xx < 8)) and (key == last_key): append = True - self.set_qualifier(fxxyyy, key, value, description, - attributes, append) - elif xx == 31: - pass - else: - if fxxyyy == "022067": - append = False + + if fxxyyy == "004023" and sequence == "307075": # noqa fix for broken DAYCLI sequence self.set_qualifier(fxxyyy, key, value, description, attributes, append) - continue - if value is not None: - # self.get_identification() - metadata = self.get_qualifiers() - metadata_hash = hashlib.md5(json.dumps(metadata).encode("utf-8")).hexdigest() # noqa - md = { - "id": metadata_hash, - "metadata": list() - } - for idx in range(len(metadata)): - md["metadata"].append(metadata[idx]) - wsi = self.get_wsi(guess_wsi) - feature_id = f"WIGOS_{wsi}_{characteristic_date}T{characteristic_time}" # noqa - feature_id = f"{feature_id}{id}-{index}" + self.set_qualifier(fxxyyy, key, value+1, description, + attributes, append) + else: + self.set_qualifier(fxxyyy, key, value, description, + attributes, append) + last_key = key + continue + elif xx == 31: + if yyy in (12, 31): + raise NotImplementedError + last_key = key + continue + elif xx in (25, 33, 35): + self.set_qualifier(fxxyyy, key, value, description, + attributes, append) + last_key = key + continue + + if fxxyyy == ("022067", "022055", "022056", "022060", + "022068", "022080", "022081", "022078", + "022094", "022096"): + append = False + self.set_qualifier(fxxyyy, key, value, description, + attributes, append) + last_key = key + continue + + if value is not None: + # self.get_identification() + metadata = self.get_qualifiers() + metadata["BUFR_element"] = fxxyyy + z = self.get_zcoordinate(bufr_class=xx) + if z is not None: + metadata["zCoordinate"] = z.get('z') + metadata['BUFRheaders'] = headers + observing_procedure = "http://codes.wmo.int/wmdr/SourceOfObservation/unknown" # noqa + + wsi = self.get_wsi(guess_wsi) + host_id = wsi + if wsi is None: + wsi = "UNKNOWN" # + host_id = self.get_tsi() + feature_id = f"{index}" + + try: phenomenon_time = self.get_time() - if "/" in phenomenon_time: - result_time = phenomenon_time.split("/") - result_time = result_time[1] - else: - result_time = phenomenon_time - data[feature_id] = { - "geojson": { - "id": feature_id, - "conformsTo": ["http://www.wmo.int/spec/om-profile-1/1.0/req/geojson"], # noqa - "reportId": f"WIGOS_{wsi}_{characteristic_date}T{characteristic_time}{id}", # noqa - "type": "Feature", - "geometry": self.get_location(), - "properties": { - # "identifier": feature_id, - "wigos_station_identifier": wsi, - "phenomenonTime": phenomenon_time, - "resultTime": result_time, - "name": key, + except Exception as e: + LOGGER.warning( + f"Error getting phenomenon time, skipping ({e})") + continue + + result_time = datetime.now().strftime('%Y-%m-%d %H:%M') + + # check if we have statistic, if so modify observed_property + fos = self.get_qualifier("08", "first_order_statistics", None) + observed_property = f"{key}" + if fos is not None: + fos = fos.get("description", "") + observed_property = f"{key} ({fos.lower()})" + + data = { + "geojson": { + "id": feature_id, + "conformsTo": ["https://wis.wmo.int/spec/wccdm-obs/1/conf/observation"], # noqa + "type": "Feature", + "geometry": self.get_location(bufr_class=xx), + "properties": { + "host": host_id, # noqa + "observer": None, + "observationType": observation_type, # noqa + "observedProperty": observed_property, + "observingProcedure": observing_procedure, + "phenomenonTime": phenomenon_time, + "resultTime": result_time, + "validTime": None, + "result": { "value": value, "units": attributes["units"], - "description": description, - "metadata": metadata, - "index": index, - "fxxyyy": fxxyyy - } - }, - "_meta": { - "data_date": self.get_time(), - "identifier": feature_id, - "geometry": self.get_location(), - "metadata_hash": metadata_hash - }, - "_headers": deepcopy(headers) + "standardUncertainty": None + }, + "resultQuality": [ + quality_flag + ], + "parameter": { + "hasProvenance": None, + "status": None, + "version": 0, + "comment": None, + "reportType": f"{headers['dataCategory']:03}{headers['internationalDataSubCategory']:03}", # noqa + "reportIdentifier": f"{id}", + "isMemberOf": None, + "additionalProperties": metadata + }, + "featureOfInterest": [ + { + "id": None, + "label": None, + "relation": None + } + ], + "index": index, } - else: - pass - last_key = key - index += 1 + }, + "_meta": { + "data_date": self.get_time(), + "identifier": feature_id, + "geometry": self.get_location() + }, + "_headers": headers + } + yield data + last_key = key + index += 1 codes_bufr_keys_iterator_delete(key_iterator) - if serialize: - data = json.dumps(data, indent=4) - return data -def transform(data: bytes, serialize: bool = False, - guess_wsi: bool = False) -> Iterator[dict]: +def transform(data: bytes, guess_wsi: bool = False, + source_identifier: str = "") -> Iterator[dict]: """ Main transformation :param data: byte string of BUFR data - :param serialize: whether to return as JSON string (default is False) - :param guess_wsi: whether to 'guess' WSI based on TSI and allocaiotn rules + :param guess_wsi: whether to 'guess' WSI based on TSI and allocation rules + :param source_identifier: identifier of the source (eg. filename ( file ID) :returns: `generator` of GeoJSON features """ error = False - # FIXME: figure out how to pass a bytestring to ecCodes BUFR reader + # eccodes needs to read from a file, create a temporary fiole tmp = tempfile.NamedTemporaryFile() with open(tmp.name, 'wb') as f: f.write(data) @@ -853,38 +1220,84 @@ def transform(data: bytes, serialize: bool = False, if not error: nsubsets = codes_get(bufr_handle, "numberOfSubsets") LOGGER.info(f"{nsubsets} subsets") - collections = dict() + for idx in range(nsubsets): - LOGGER.debug(f"Extracting subset {idx}") - codes_set(bufr_handle, "extractSubset", idx+1) - codes_set(bufr_handle, "doExtractSubsets", 1) - LOGGER.debug("Cloning subset to new message") + # reportIdentifier = None + if nsubsets > 1: # noqa this is only required if more than one subset (and will crash if only 1) + LOGGER.debug(f"Extracting subset {idx+1} of {nsubsets}") # noqa + codes_set(bufr_handle, "extractSubset", idx+1) + codes_set(bufr_handle, "doExtractSubsets", 1) + LOGGER.debug("Cloning subset to new message") + single_subset = codes_clone(bufr_handle) + + with BytesIO() as bufr_bytes: + codes_write(single_subset, bufr_bytes) + bufr_bytes.seek(0) + bhash = hashlib.md5() + bhash.update(bufr_bytes.getvalue()) + reportIdentifier = bhash.hexdigest() + LOGGER.debug("Unpacking") codes_set(single_subset, "unpack", True) parser = BUFRParser() - # only include tag if more than 1 subset in file - tag = "" - if nsubsets > 1: - tag = f"-{idx}" + + tag = reportIdentifier try: data = parser.as_geojson(single_subset, id=tag, - serialize=serialize, guess_wsi=guess_wsi) # noqa except Exception as e: LOGGER.error("Error parsing BUFR to GeoJSON, no data written") # noqa LOGGER.error(e) data = {} - del parser - collections = deepcopy(data) - yield collections + for obs in data: + # noqa set identifier, and report id (prepending file and subset numbers) + id = obs.get('geojson', {}).get('id', {}) + if source_identifier in ("", None): + source_identifier = obs.get('geojson', {}).get('properties',{}).get('host', "") # noqa + obs['geojson']['id'] = f"{reportIdentifier}-{id}" # noqa update feature id to include report id + # now set prov data + prov = { + "prefix": { + "prov": "http://www.w3.org/ns/prov#", + "schema": "https://schema.org/" + }, + "entity": { + f"{source_identifier}": { + "prov:type": "schema:DigitalDocument", + "prov:label": "Input data file", + "schema:encodingFormat": "application/bufr" + }, + f"{obs['geojson']['id']}": { + "prov:type": "observation", + "prov:label": f"Observation {id} from subset {idx} of message {imsg}" # noqa + } + }, + "wasDerivedFrom": { + "_:wdf": { + "prov:generatedEntity": f"{obs['geojson']['id']}", # noqa + "prov:usedEntity": f"{source_identifier}", + "prov:activity": "_:bufr2geojson" + } + }, + "activity": { + "_:bufr2geojson": { + "prov:type": "prov:Activity", + "prov:label": f"Data transformation using version {__version__} of bufr2geojson", # noqa + "prov:endTime": datetime.now().strftime('%Y-%m-%d %H:%M:%S') # noqa + } + } + } + obs['geojson']['properties']['parameter']['hasProvenance'] = prov.copy() # noqa + yield obs + + del parser codes_release(single_subset) else: - collections = {} - yield collections + yield {} if not error: codes_release(bufr_handle) @@ -908,14 +1321,12 @@ def strip2(value) -> str: return None if isinstance(value, str): - space = ' ' + pass # space = ' ' elif isinstance(value, bytes): - space = b' ' + # space = b' ' + pass else: # make sure we have a string - space = ' ' + # space = ' ' value = f"{value}" - if value.startswith(space) or value.endswith(space): - LOGGER.warning(f"value '{value}' is space padded; upstream data should be fixed") # noqa - return value.strip() diff --git a/bufr2geojson/cli.py b/bufr2geojson/cli.py index 1f832df..8fdf717 100644 --- a/bufr2geojson/cli.py +++ b/bufr2geojson/cli.py @@ -70,13 +70,14 @@ def data(): @cli_option_verbosity def transform(ctx, bufr_file, output_dir, verbosity): click.echo(f"Transforming {bufr_file.name} to geojson") - result = as_geojson(bufr_file.read()) + result = as_geojson(bufr_file.read(), source_identifier=bufr_file.name) for collection in result: for key, item in collection.items(): - outfile = f"{output_dir}{os.sep}{key}.json" - data = item['geojson'] - with open(outfile, "w") as fh: - fh.write(json.dumps(data, indent=4)) + if key == "geojson": + identifier = item['id'] + outfile = f"{output_dir}{os.sep}{identifier}.json" + with open(outfile, "w") as fh: + fh.write(json.dumps(item, indent=4)) click.echo("Done") diff --git a/bufr2geojson/resources/031021.json b/bufr2geojson/resources/031021.json new file mode 100644 index 0000000..0266539 --- /dev/null +++ b/bufr2geojson/resources/031021.json @@ -0,0 +1,82 @@ +{ + "1": { + "type": "int", + "label": "1-bit indicator of quality", + "values": { + "0": "Good", + "1": "Suspect or bad" + } + }, + "2": { + "type": "int", + "label": "2-bit indicator of quality", + "values": { + "0" : "Good", + "1" : "Slightly suspect", + "2" : "Highly suspect", + "3" : "Bad" + } + }, + "5": { + "type": "int", + "label": "8-bit indicator of quality", + "values": { + "0": "Data checked and declared good", + "1": "Data checked and declared suspect", + "2": "Data checked and declared aggregated", + "3": "Data checked and declared out of instrument range", + "4": "Data checked, declared aggregated, and out of instrument range", + "5": "Parameter is not measured at the station", + "6": "Daily value not provided", + "7": "Data unchecked", + "255": "Missing (QC info not available)" + } + }, + "6": { + "type": "int", + "label": "4-bit indicator of quality control class according to GTSPP", + "values": { + "0": "Unqualified", + "1": "Correct value (all checks passed)", + "2": "Probably good but value inconsistent with statistics (differ from climatology)", + "3": "Probably bad (spike, gradient, ... if other tests passed)", + "4": "Bad value, impossible value (out of scale, vertical instability, constant profile)", + "5": "Value modified during quality control", + "8": "Interpolated value", + "9": "Missing value" + } + }, + "7": { + "type": "float", + "label": "Percentage confidence", + "units": "%" + }, + "8": { + "type": "int", + "label": "2-bit indicator of quality (alt)", + "values": { + "0": "Not suspected", + "1": "Suspected", + "2": "Reserved", + "3": "Information not required" + } + }, + "9": { + "type": "int", + "label": "Status of ancillary data", + "values": { + "0": "Data present, good, collocated", + "1": "Data available but of degraded quality and not used", + "2": "No spatiotemporally collocated data available", + "15": "Missing value" + } + }, + "21": { + "type": "int", + "label": "1-bit indicator of correction", + "values": { + "0": "Original value", + "1": "Substituted/corrected value" + } + } +} \ No newline at end of file diff --git a/bufr2geojson/resources/schemas/wccdm-obs.json b/bufr2geojson/resources/schemas/wccdm-obs.json new file mode 100644 index 0000000..5dd3e04 --- /dev/null +++ b/bufr2geojson/resources/schemas/wccdm-obs.json @@ -0,0 +1,320 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "http://schemas.wmo.int/cdm-obs/2024/wccdm-obs.json", + "title": "WCCDM-OBS record definition", + "description": "JSON schema for the representation of WCCDM-OBS", + "definitions": { + "TM_Period": { + "type": "string", + "pattern": "^(\\d{4}-\\d{2}-\\d{2}[ tT]{1}\\d{2}:\\d{2}:\\d{2}[zZ]{0,1})/(\\d{4}-\\d{2}-\\d{2}[ tT]{1}\\d{2}:\\d{2}:\\d{2}[zZ]{0,1})$" + }, + "TM_Instant": { + "anyOf": [ + { + "type": "string", + "format": "date-time" + }, + { + "type": "string", + "format": "date" + } + ] + }, + "httpURI": { + "oneOf": [ + { + "type": "string", + "format": "uri" + }, + { + "type": "null" + } + ] + }, + "Collection": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "FeatureCollection" + ] + }, + "features": { + "type": "array", + "items": { + "$ref": "#/definitions/Observation" + } + } + } + }, + "Observation": { + "type": "object", + "required": [ + "conformsTo", + "id", + "type", + "geometry", + "properties" + ], + "properties": { + "conformsTo": { + "type": "array", + "items": { + "$ref": "#/definitions/httpURI" + }, + "minItems": 1 + }, + "id": { + "$ref": "#/definitions/httpURI" + }, + "type": { + "type": "string", + "enum": [ + "Feature" + ] + }, + "geometry": { + "$ref": "#/definitions/Geometry" + }, + "properties": { + "$ref": "#/definitions/Properties" + } + } + }, + "Geometry": { + "type": "object", + "required": [ + "type", + "coordinates" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "Point" + ] + }, + "coordinates": { + "type": "array", + "items": { + "type": "number" + }, + "minItems": 2 + } + } + }, + "QualityFlag": { + "type": "object", + "required": [ + "inScheme", + "flag", + "flagValue" + ], + "properties": { + "flag": { + "$ref": "#/definitions/httpURI" + }, + "flagValue": { + "$ref": "#/definitions/httpURI" + }, + "inScheme": { + "$ref": "#/definitions/httpURI" + } + } + }, + "Properties": { + "type": "object", + "required": [ + "host", + "observedProperty", + "observingProcedure", + "phenomenonTime", + "resultTime", + "resultQuality", + "result", + "parameter", + "featureOfInterest" + ], + "properties": { + "host": { + "$ref": "#/definitions/httpURI" + }, + "observer": { + "$ref": "#/definitions/httpURI" + }, + "observationType": { + "$ref": "#/definitions/httpURI" + }, + "observedProperty": { + "$ref": "#/definitions/httpURI" + }, + "observingProcedure": { + "$ref": "#/definitions/httpURI" + }, + "phenomenonTime": { + "anyOf": [ + { + "$ref": "#/definitions/TM_Period" + }, + { + "$ref": "#/definitions/TM_Instant" + } + ] + }, + "resultTime": { + "$ref": "#/definitions/TM_Instant" + }, + "validTime": { + "oneOf": [ + {"$ref": "#/definitions/TM_Period"}, + { "type": "null"} + ] + }, + "resultQuality": { + "type": "array", + "items": { + "$ref": "#/definitions/QualityFlag" + }, + "minItems": 1 + }, + "result": { + "$ref": "#/definitions/Result" + }, + "parameter": { + "$ref": "#/definitions/Parameter" + }, + "featureOfInterest": { + "type": "array", + "items": { + "$ref": "#/definitions/Feature" + }, + "minItems": 1 + } + } + }, + "Result": { + "type": "object", + "required": [ + "value", + "units" + ], + "properties": { + "value": {}, + "units": { + "$ref": "#/definitions/httpURI" + }, + "standardUncertainty": { + "oneOf": [ + {"type": "number"}, + {"type": "null"} + ] + } + } + }, + "Parameter": { + "type": "object", + "required": [ + "status", + "version", + "isMemberOf" + ], + "properties": { + "hasProvenance": { + "oneOf": [ + {"$ref": "#/definitions/httpURI"}, + {"type": "object"} + ] + }, + "status": { + "$ref": "#/definitions/httpURI" + }, + "version": { + "type": "integer" + }, + "comment": { + "oneOf": [ + {"type": "string"}, + {"type": "null"} + ] + }, + "reportType": { + "$ref": "#/definitions/httpURI" + }, + "reportIdentifier": { + "$ref": "#/definitions/httpURI" + }, + "isMemberOf": { + "$ref": "#/definitions/httpURI" + }, + "additionalProperties": {} + } + }, + "Feature": { + "type": "object", + "required": [ + "id", + "label", + "relation" + ], + "properties": { + "id": { + "$ref": "#/definitions/httpURI" + }, + "label": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ] + }, + "relation": { + "oneOf": [ + { + "type": "string", + "enum": ["proximate", "ultimate"] + }, + { + "type": "null" + } + ] + } + } + } + }, + "type": "object", + "required": [ + "conformsTo", + "id", + "type", + "geometry", + "properties" + ], + "properties": { + "conformsTo": { + "type": "array", + "items": { + "$ref": "#/definitions/httpURI" + }, + "minItems": 1 + }, + "id": { + "$ref": "#/definitions/httpURI" + }, + "type": { + "type": "string", + "enum": [ + "Feature" + ] + }, + "geometry": { + "$ref": "#/definitions/Geometry" + }, + "properties": { + "$ref": "#/definitions/Properties" + } + } +} \ No newline at end of file diff --git a/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml b/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml deleted file mode 100644 index 234c6fa..0000000 --- a/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml +++ /dev/null @@ -1,102 +0,0 @@ -$schema: 'https://json-schema.org/draft/2020-12/schema' -$id: 'https://raw.githubusercontent.com/wmo-im/bufr2geojson/main/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml' -title: WMO GeoJSON profile for observations and measurements -description: WMO GeoJSON profile for observations and measurements -allOf: - - $ref: 'https://raw.githubusercontent.com/opengeospatial/ogcapi-features/master/core/openapi/schemas/featureGeoJSON.yaml' -properties: - conformsTo: - items: - enum: - - 'http://www.wmo.int/spec/om-profile-1/1.0/req/geojson' - properties: - type: object - properties: - identifier: - type: string - description: feature identifier - wigos_station_identifier: - type: string - description: WIGOS station identifier for station making observations - phenomenonTime: - type: string - format: date-time - description: Date and time that feature was observed or measured - resultTime: - type: string - format: date-time - description: Date and time result first became available - name: - type: string - description: Name of parameter observed or measured - description: - type: - - string - - 'null' - description: Text representation of value if non-numeric - units: - type: string - description: Units that value is reported in - value: - type: number - description: 'Value of the parameter reported if numeric, use description if non-numeric' - metadata: - type: array - items: - type: object - properties: - name: - type: string - description: Name of metadata associated with observation - description: - type: - - string - - 'null' - description: Text representation of value if non-numeric - units: - type: string - description: Units value report in if numeric - value: - type: - - number - - 'null' - description: 'Numerical value of metadata element, use description field if non-numeric' - required: - - phenomenonTime - - name - - value - - units - _meta: - type: object -examples: - - conformsTo: - - 'http://www.wmo.int/spec/om-profile-1/1.0/req/geojson' - properties: - identifier: string - wigos_station_identifier: string - phenomenonTime: '2019-08-24T14:15:22Z' - resultTime: '2019-08-24T14:15:22Z' - name: string - description: string - units: string - value: 0 - metadata: - - name: string - description: string - units: string - value: 0 - _meta: {} - type: Feature - geometry: - type: Point - coordinates: - - 0 - - 0 - id: string - links: - - href: 'http://data.example.com/buildings/123' - rel: alternate - type: application/geo+json - hreflang: en - title: 'Trierer Strasse 70, 53115 Bonn' - length: 0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2b58ede..892ccf9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +attrs>=22.2.0 cfunits click eccodes diff --git a/tests/test_bufr2geojson.py b/tests/test_bufr2geojson.py index 0a9f086..1bc5b19 100644 --- a/tests/test_bufr2geojson.py +++ b/tests/test_bufr2geojson.py @@ -22,10 +22,10 @@ from __future__ import annotations import base64 import itertools +import json from jsonschema import validate, FormatChecker import pytest -import yaml from bufr2geojson import RESOURCES, strip2, transform @@ -74,55 +74,154 @@ def multimsg_bufr(): @pytest.fixture def geojson_schema(): - with open(f"{RESOURCES}/schemas/wmo-om-profile-geojson.yaml") as fh: - return yaml.load(fh, Loader=yaml.SafeLoader) + with open(f"{RESOURCES}/schemas/wccdm-obs.json") as fh: + return json.load(fh) @pytest.fixture def geojson_output(): return { - 'id': 'WIGOS_0-20000-0-03951_20220320T210000-0-13', - 'conformsTo': ['http://www.wmo.int/spec/om-profile-1/1.0/req/geojson'], - 'reportId': 'WIGOS_0-20000-0-03951_20220320T210000-0', - 'type': 'Feature', - 'geometry': { - 'type': 'Point', - 'coordinates': [ + "id": "1ec58338aab209c8ab22f05309315b71-0", + "conformsTo": ["https://wis.wmo.int/spec/wccdm-obs/1/conf/observation"], # noqa + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ -9.42, 51.47, - 20.0 + 21.0 ] }, - 'properties': { - 'wigos_station_identifier': '0-20000-0-03951', - 'phenomenonTime': '2022-03-20T21:00:00Z', - 'resultTime': '2022-03-20T21:00:00Z', - 'name': 'non_coordinate_pressure', - 'value': 1019.3, - 'units': 'hPa', - 'description': None, - 'metadata': [ + "properties": { + "host": "0-20000-0-03951", + "observer": None, + "observationType": "http://www.opengis.net/def/observationType/OGC-OM/2.0/OM_Measurement", # noqa + "observedProperty": "non_coordinate_pressure", + "observingProcedure": "http://codes.wmo.int/wmdr/SourceOfObservation/unknown", # noqa + "phenomenonTime": "2022-03-20T21:00:00Z", + "resultTime": "2024-12-19 00:00:00", + "validTime": None, + "result": { + "value": 1019.3, + "units": "hPa", + "standardUncertainty": None + }, + "resultQuality": [ { - 'name': 'station_or_site_name', - 'value': None, - 'units': 'CCITT IA5', - 'description': 'SHERKIN ISLAND' - }, - { - 'name': 'station_type', - 'value': 0, - 'units': 'CODE TABLE', - 'description': 'AUTOMATIC STATION' + "inScheme": None, + "flag": None, + "flagValue": None + } + ], + "parameter": { + "hasProvenance": { + "prefix": { + "prov": "http://www.w3.org/ns/prov#", + "schema": "https://schema.org/" + }, + "entity": { + "0-20000-0-03951": { + "prov:type": "schema:DigitalDocument", + "prov:label": "Input data file", + "schema:encodingFormat": "application/bufr" + }, + "1ec58338aab209c8ab22f05309315b71-0": { + "prov:type": "observation", + "prov:label": "Observation 0 from subset 0 of message 1" # noqa + } + }, + "wasDerivedFrom": { + "_:wdf": { + "prov:generatedEntity": "1ec58338aab209c8ab22f05309315b71-0", # noqa + "prov:usedEntity": "0-20000-0-03951", + "prov:activity": "_:bufr2geojson" + } + }, + "activity": { + "_:bufr2geojson": { + "prov:type": "prov:Activity", + "prov:label": "Data transformation using version 0.7.dev0 of bufr2geojson", # noqa + "prov:endTime": "2024-12-19 00:00:00" + } + } }, + "status": None, + "version": 0, + "comment": None, + "reportType": "000001", + "reportIdentifier": "1ec58338aab209c8ab22f05309315b71", + "isMemberOf": None, + "additionalProperties": { + "identification": { + "station_or_site_name": { + "value": "SHERKIN ISLAND" + } + }, + "instrumentation": { + "station_type": { + "value": { + "codetable": "http://codes.wmo.int/bufr4/codeflag/0-02-001", # noqa + "entry": "0", + "description": "AUTOMATIC STATION" + } + }, + "height_of_station_ground_above_mean_sea_level": { + "value": 20.0, + "units": "m", + "description": None + }, + "height_of_barometer_above_mean_sea_level": { + "value": 21.0, + "units": "m", + "description": None + } + }, + "qualifiers": {}, + "processing": {}, + "monitoring": {}, + "quality": {}, + "associated_field": {}, + "BUFR_element": "010004", + "zCoordinate": { + "name": "height_above_local_ground", + "value": 1.0, + "units": "m" + }, + "BUFRheaders": { + "edition": 4, + "masterTableNumber": 0, + "bufrHeaderCentre": 233, + "bufrHeaderSubCentre": 0, + "updateSequenceNumber": 0, + "dataCategory": 0, + "internationalDataSubCategory": 1, + "dataSubCategory": 0, + "masterTablesVersionNumber": 14, + "localTablesVersionNumber": 0, + "typicalYear": 2022, + "typicalMonth": 3, + "typicalDay": 20, + "typicalHour": 21, + "typicalMinute": 0, + "typicalSecond": 0, + "typicalDate": "20220320", + "typicalTime": "210000", + "numberOfSubsets": 1, + "observedData": 1, + "compressedData": 0, + "subsetNumber": 1, + "sequence": "307080" + } + } + }, + "featureOfInterest": [ { - 'name': 'height_of_barometer_above_mean_sea_level', - 'value': 21.0, - 'units': 'm', - 'description': None + "id": None, + "label": None, + "relation": None } ], - 'index': 13, - 'fxxyyy': '010004' + "index": 0 } } @@ -133,6 +232,8 @@ def test_multi(multimsg_bufr): icount = 0 for res in results: for key, val in res.items(): + if key != "geojson": + continue icount += 1 assert icount == 48 @@ -145,20 +246,24 @@ def test_transform(geojson_schema, geojson_output): # validate against JSON Schema for message in messages1: - geojson_dict = list(message.values())[0]['geojson'] + geojson_dict = message['geojson'] assert isinstance(geojson_dict, dict) - print("Validating GeoJSON") _ = validate(geojson_dict, geojson_schema, format_checker=WSI_FORMATCHECKER) + print("==========================================") print("Messages validated against schema") + print("==========================================") # validate content message = next(messages2) - assert 'WIGOS_0-20000-0-03951_20220320T210000-0-13' in message - print("Message found in result") - geojson = message['WIGOS_0-20000-0-03951_20220320T210000-0-13']['geojson'] # noqa + geojson = message['geojson'] # noqa + geojson['properties']['parameter']['hasProvenance']['activity']['_:bufr2geojson']['prov:endTime'] = "2024-12-19 00:00:00" # noqa + geojson['properties']['resultTime'] = "2024-12-19 00:00:00" + for k, v in geojson.items(): + assert v == geojson_output[k] assert geojson == geojson_output + print("Message matches expected value")