Skip to content

Commit

Permalink
Restructured dataset checks for no vars
Browse files Browse the repository at this point in the history
  • Loading branch information
phynes-sensiblecode committed Aug 22, 2022
1 parent a46aa39 commit de35dc7
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 35 deletions.
58 changes: 28 additions & 30 deletions bin/ons_csv_to_ctb_json_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,21 +305,6 @@ def datasets(self):
dataset_variables = dataset_to_variables.get(
dataset_mnemonic, DatasetVariables([], [], []))

first_source_database = dataset_variables.databases[0] if \
dataset_variables.databases else None
database_mnemonic = pre_built_database \
if pre_built_database else first_source_database

if database_mnemonic:
if database_mnemonic not in database_observation_type:
database_observation_type[database_mnemonic] = observation_type_code
elif database_observation_type[database_mnemonic] != observation_type_code:
self.recoverable_error(
f'Reading {self.full_filename(filename)}:{row_num} {dataset_mnemonic} '
f'has different observation type {observation_type_code} from other '
f'datasets in database {database_mnemonic}: '
f'{database_observation_type[database_mnemonic]}')

dataset['Related_Datasets'] = dataset_to_related_datasets.get(dataset_mnemonic, [])
dataset['Census_Releases'] = dataset_to_releases.get(dataset_mnemonic, [])
dataset['Publications'] = dataset_to_publications.get(dataset_mnemonic, [])
Expand All @@ -329,23 +314,25 @@ def datasets(self):
dataset['Alternate_Geographic_Variables'] = alternate_geog_variables
all_classifications = dataset_variables.classifications + alternate_geog_variables

# If the dataset is public then ensure that there is at least one classification and
# that all the classifications are also public.
if dataset['Security_Mnemonic'] == PUBLIC_SECURITY_MNEMONIC:
if not dataset_variables.classifications:
if not dataset_variables.classifications:
self.recoverable_error(
f'Reading {self.full_filename(filename)}:{row_num} {dataset_mnemonic} '
'has no associated classifications or geographic variable')
drop_dataset = True
else:
# dataset_variables.databases will not be empty if
# dataset_variables.classifications is not empty
database_mnemonic = pre_built_database \
if pre_built_database else dataset_variables.databases[0]

if database_mnemonic not in database_observation_type:
database_observation_type[database_mnemonic] = observation_type_code
elif database_observation_type[database_mnemonic] != observation_type_code:
self.recoverable_error(
f'Reading {self.full_filename(filename)}:{row_num} {dataset_mnemonic} '
'has no associated classifications or geographic variable')
drop_dataset = True

for classification in all_classifications:
if self.classifications[classification].private['Security_Mnemonic'] != \
PUBLIC_SECURITY_MNEMONIC:
self.recoverable_error(
f'Reading {self.full_filename(filename)}:{row_num} Public ONS '
f'dataset {dataset_mnemonic} has non-public classification '
f'{classification}')
drop_dataset = True
f'has different observation type {observation_type_code} from other '
f'datasets in database {database_mnemonic}: '
f'{database_observation_type[database_mnemonic]}')

if not pre_built_database and len(dataset_variables.databases) > 1:
self.recoverable_error(
Expand All @@ -354,6 +341,17 @@ def datasets(self):
'Destination_Pre_Built_Database_Mnemonic and has classifications '
f'from multiple databases: {dataset_variables.databases}')

# If the dataset is public then ensure that all the classifications are also public
if dataset['Security_Mnemonic'] == PUBLIC_SECURITY_MNEMONIC:
for classification in all_classifications:
if self.classifications[classification].private['Security_Mnemonic'] != \
PUBLIC_SECURITY_MNEMONIC:
self.recoverable_error(
f'Reading {self.full_filename(filename)}:{row_num} Public ONS '
f'dataset {dataset_mnemonic} has non-public classification '
f'{classification}')
drop_dataset = True

if drop_dataset:
logging.warning(
f'Reading {self.full_filename(filename)}:{row_num} dropping record')
Expand Down
73 changes: 72 additions & 1 deletion test/expected/table-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -379,5 +379,76 @@
}
}
]
},
{
"name": "DS_TAB2",
"datasetName": "DB_TAB",
"vars": [
"CLASS1 (Codebook)"
],
"ref": [
{
"lang": "en",
"label": "DS_TAB2 Title",
"description": "DS_TAB2 Description",
"meta": {
"Dataset_Mnemonic_2011": null,
"Last_Updated": null,
"Version": "1",
"Geographic_Coverage": "Everywhere",
"Dataset_Population": "Everyone",
"Statistical_Unit": {
"Statistical_Unit": "Houses",
"Statistical_Unit_Description": "House Description"
},
"Contact": null,
"Observation_Type": {
"Observation_Type_Code": "AMT",
"Observation_Type_Label": "Amount",
"Observation_Type_Description": "A financial amount",
"Decimal_Places": "2",
"Prefix": "\u00a3",
"Suffix": "GBP",
"FillTrailingSpaces": "Y",
"NegativeSign": "-"
},
"Related_Datasets": [],
"Census_Releases": [],
"Publications": [],
"Alternate_Geographic_Variables": []
}
},
{
"lang": "cy",
"label": "DS_TAB2 Title",
"description": "DS_TAB2 Description",
"meta": {
"Dataset_Mnemonic_2011": null,
"Last_Updated": null,
"Version": "1",
"Geographic_Coverage": "Everywhere",
"Dataset_Population": "Everyone",
"Statistical_Unit": {
"Statistical_Unit": "Houses",
"Statistical_Unit_Description": "House Description"
},
"Contact": null,
"Observation_Type": {
"Observation_Type_Code": "AMT",
"Observation_Type_Label": "Amount",
"Observation_Type_Description": "A financial amount",
"Decimal_Places": "2",
"Prefix": "\u00a3",
"Suffix": "GBP",
"FillTrailingSpaces": "Y",
"NegativeSign": "-"
},
"Related_Datasets": [],
"Census_Releases": [],
"Publications": [],
"Alternate_Geographic_Variables": []
}
}
]
}
]
]
12 changes: 8 additions & 4 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def test_private_classification(self):
{'Dataset_Mnemonic': 'DS2', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS3', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS4', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS_TAB', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS}],
{'Dataset_Mnemonic': 'DS_TAB', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS_TAB2', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS}],
f'^Reading {FILENAME}:2 Public ONS dataset DS_PRIV has non-public classification CLASS_PRIV$')

def test_no_variables(self):
Expand All @@ -75,7 +76,8 @@ def test_no_variables(self):
{'Dataset_Mnemonic': 'DS3', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS4', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS5', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS_TAB', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS}],
{'Dataset_Mnemonic': 'DS_TAB', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS_TAB2', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS}],
f'^Reading {FILENAME}:7 DS5 has no associated classifications or geographic variable$')

def test_pre_built_database_is_not_tabular(self):
Expand All @@ -85,7 +87,8 @@ def test_pre_built_database_is_not_tabular(self):
{'Dataset_Mnemonic': 'DS2', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS3', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS4', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS_TAB', 'Security_Mnemonic': 'PUB', 'Destination_Pre_Built_Database_Mnemonic': 'DB1', **COMMON_FIELDS}],
{'Dataset_Mnemonic': 'DS_TAB', 'Security_Mnemonic': 'PUB', 'Destination_Pre_Built_Database_Mnemonic': 'DB1', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS_TAB2', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS}],
f'^Reading {FILENAME}:7 DS_TAB has Destination_Pre_Built_Database_Mnemonic DB1 which has invalid Database_Type_Code: MICRODATA$')

def test_different_observation_type_code(self):
Expand All @@ -108,7 +111,8 @@ def test_multiple_databases_not_pre_built(self):
{'Dataset_Mnemonic': 'DS2', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS3', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS4', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS_TAB', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS}],
{'Dataset_Mnemonic': 'DS_TAB', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS},
{'Dataset_Mnemonic': 'DS_TAB2', 'Security_Mnemonic': 'PUB', **COMMON_FIELDS}],
f"^Reading {FILENAME}:7 DS_TAB has an empty value for Destination_Pre_Built_Database_Mnemonic and has classifications from multiple databases: \['DB1', 'DB2']$")


Expand Down
1 change: 1 addition & 0 deletions test/testdata/Dataset.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ DS3,3,DS3 Title,DS3 Title (Welsh),DS3 Description,DS3 Description (Welsh),People
DS_PRIV,4,DS_PRIV Title,DS_PRIV Title (Welsh),DS_PRIV Description,DS_PRIV Description (Welsh),People,DS_PRIV 2011,Everywhere,Everywhere (Welsh),Everyone,Everyone (Welsh),Today,CLASS,Y,1,1,,
DS4,5,DS4 Title,,DS4 Description,,Houses,,Everywhere,,Everyone,,,PUB,N,,1,,
DS_TAB,6,DS_TAB Title,,DS_TAB Description,,Houses,,Everywhere,,Everyone,,,PUB,N,,1,AMT,DB_TAB
DS_TAB2,7,DS_TAB2 Title,,DS_TAB2 Description,,Houses,,Everywhere,,Everyone,,,PUB,N,,1,AMT,DB_TAB
1 change: 1 addition & 0 deletions test/testdata/Dataset_Variable.csv
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ CLASS2,DS4,5,1,VAR2,N,DB1
,DS1,10,,GEO2,N,DB1
CLASS1,DS_TAB,11,1,VAR1,,DB1
CLASS4A,DS_TAB,12,2,VAR4,,DB2
CLASS1,DS_TAB2,13,1,VAR1,,DB1
1 change: 1 addition & 0 deletions test/testdata/Observation_Type.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Id,Observation_Type_Code,Observation_Type_Label,Observation_Type_Description,Decimal_Places,Prefix,Suffix,FillTrailingSpaces,NegativeSign
1,AMT,Amount,A financial amount,2,£,GBP,Y,-
2,PCT,Percentage,A percentage amount,2,,%,Y,-

0 comments on commit de35dc7

Please sign in to comment.