diff --git a/common/dataset_statistics.py b/common/dataset_statistics.py index 36a63e9..7a081d1 100644 --- a/common/dataset_statistics.py +++ b/common/dataset_statistics.py @@ -50,6 +50,8 @@ def __init__( def get_status(self): self.public = "N" if self["private"] else "Y" + self.internal_resources = 0 + self.external_resources = 0 self.data_link = "" self.data_type = "" requestable = self.dataset.is_requestable() @@ -62,6 +64,11 @@ def get_status(self): resource = resources[0] self.data_link = resource["url"] self.data_type = resource["url_type"] + for resource in resources: + if resource["url_type"] == "api": + self.external_resources += 1 + else: + self.internal_resources += 1 self.archived = "Y" if self["archived"] else "N" if self.public == "N" or self.requestable == "Y" or self.archived == "Y": self.exclude_from_stats = "Y" diff --git a/get_org_stats.py b/get_org_stats.py index b12debb..7c9aadd 100644 --- a/get_org_stats.py +++ b/get_org_stats.py @@ -47,6 +47,8 @@ def main(downloads, output_dir, **ignore): logger.info("Obtaining organisations data") organisations = downloads.get_all_organisations() total_public = 0 + total_public_internal = 0 + total_public_external = 0 total_updated_by_cod = 0 total_updated_by_script = 0 total_lm_fresh = 0 @@ -156,6 +158,8 @@ def main(downloads, output_dir, **ignore): organisation["public datasets"] += 1 total_public += 1 is_public_not_requestable_archived = True + total_public_internal += datasetstats.internal_resources + total_public_external += datasetstats.external_resources downloads_last_3months = dataset_3m_downloads.get(dataset["id"], 0) organisation["downloads last 90 days"] += downloads_last_3months @@ -426,7 +430,9 @@ def get_number_percentage(organisation, key): filepath = join(output_dir, "total_stats.csv") logger.info(f"Writing totals to {filepath}") headers = [ - "Public - request & archive", + "Public - Request & Archive", + "Public Internal Resources", + "Public External Resources", "Updated by COD", "Updated by Script", "Quarterly % API OKR", @@ -440,6 +446,8 @@ def get_number_percentage(organisation, key): rows = [ [ total_public, + total_public_internal, + total_public_external, total_updated_by_cod, total_updated_by_script, quarterly_api_okr, diff --git a/requirements.txt b/requirements.txt index ace3d70..5281875 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -hdx-python-api==6.3.5 +hdx-python-api==6.3.7 mixpanel-utils diff --git a/tests/fixtures/total_stats.csv b/tests/fixtures/total_stats.csv index c299289..54f4efc 100644 --- a/tests/fixtures/total_stats.csv +++ b/tests/fixtures/total_stats.csv @@ -1,2 +1,2 @@ -Public - request & archive,Updated by COD,Updated by Script,Quarterly % API OKR,Last Modified Fresh,Last Modified Not Fresh,Quarterly % Last Modified Fresh OKR,End Date Up to Date,End Date Out Of Date,Quarterly % End Date Up To Date OKR -20823,583,16594,80,12685,8135,61,5955,9987,37 +Public - Request & Archive,Public Internal Resources,Public External Resources,Updated by COD,Updated by Script,Quarterly % API OKR,Last Modified Fresh,Last Modified Not Fresh,Quarterly % Last Modified Fresh OKR,End Date Up to Date,End Date Out Of Date,Quarterly % End Date Up To Date OKR +20823,54050,110729,583,16594,80,12685,8135,61,5955,9987,37