From 0b0b3a4e20bb655c8a53fd887db21089a3a68d7d Mon Sep 17 00:00:00 2001 From: Prashanth R Date: Fri, 24 Feb 2023 17:23:46 -0800 Subject: [PATCH] Support both absolute and relative rankings for time-delta queries (#2322) Also, reduce the number of tiles before "show more" from 6 to 3. ![image](https://user-images.githubusercontent.com/4375037/221324822-a2bdaa95-94ec-40ff-9918-c7cfcca4cff6.png) ![image](https://user-images.githubusercontent.com/4375037/221324812-55e502f8-4ab3-4705-a3c9-2ee429cd3d1e.png) --- .../query_6/chart_config.json | 542 +++++++++++++++++- .../demo_feb2023/query_5/chart_config.json | 116 +++- .../fulfillment/time_delta_across_places.py | 47 +- .../nl/fulfillment/time_delta_across_vars.py | 47 +- server/lib/nl/utils.py | 96 +++- server/tests/lib/nl/fulfiller_test.py | 18 +- server/tests/lib/nl/test_utterance.py | 79 ++- server/tests/lib/nl/utils_test.py | 18 +- .../constants/app/nl_interface_constants.ts | 2 +- 9 files changed, 873 insertions(+), 92 deletions(-) diff --git a/server/integration_tests/test_data/demo2_cities_feb2023/query_6/chart_config.json b/server/integration_tests/test_data/demo2_cities_feb2023/query_6/chart_config.json index 7f710057e1..f3b613c2c5 100644 --- a/server/integration_tests/test_data/demo2_cities_feb2023/query_6/chart_config.json +++ b/server/integration_tests/test_data/demo2_cities_feb2023/query_6/chart_config.json @@ -3,6 +3,175 @@ "categories": [ { "blocks": [ + { + "columns": [ + { + "tiles": [ + { + "placeDcidOverride": "country/DZA", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Algeria", + "type": "LINE" + }, + { + "placeDcidOverride": "country/LBY", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Libya", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MAR", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Morocco", + "type": "LINE" + }, + { + "placeDcidOverride": "country/TUN", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Tunisia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/ERI", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Eritrea", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MLI", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Mali", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MWI", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Malawi", + "type": "LINE" + }, + { + "placeDcidOverride": "country/SEN", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Senegal", + "type": "LINE" + }, + { + "placeDcidOverride": "country/GMB", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Gambia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/SSD", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in South Sudan", + "type": "LINE" + }, + { + "placeDcidOverride": "country/CMR", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Cameroon", + "type": "LINE" + }, + { + "placeDcidOverride": "country/SLE", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Sierra Leone", + "type": "LINE" + }, + { + "placeDcidOverride": "country/EGY", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Egypt", + "type": "LINE" + }, + { + "placeDcidOverride": "country/CPV", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Cape Verde", + "type": "LINE" + }, + { + "placeDcidOverride": "country/NER", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Niger", + "type": "LINE" + }, + { + "placeDcidOverride": "country/ETH", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Ethiopia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/STP", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in S\u00e3o Tom\u00e9 and Pr\u00edncipe", + "type": "LINE" + }, + { + "placeDcidOverride": "country/COM", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Comoros", + "type": "LINE" + }, + { + "placeDcidOverride": "country/AGO", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Angola", + "type": "LINE" + }, + { + "placeDcidOverride": "country/BFA", + "statVarKey": [ + "LifeExpectancy_Person" + ], + "title": "Life Expectancy in Burkina Faso", + "type": "LINE" + } + ] + } + ], + "title": "Increase over time (by absolute change)" + }, { "columns": [ { @@ -169,7 +338,177 @@ } ] } - ] + ], + "title": "Increase over time (by percent change)" + }, + { + "columns": [ + { + "tiles": [ + { + "placeDcidOverride": "country/TUN", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Tunisia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/LBY", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Libya", + "type": "LINE" + }, + { + "placeDcidOverride": "country/DZA", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Algeria", + "type": "LINE" + }, + { + "placeDcidOverride": "country/SEN", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Senegal", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MLI", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Mali", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MWI", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Malawi", + "type": "LINE" + }, + { + "placeDcidOverride": "country/GMB", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Gambia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/ETH", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Ethiopia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/ERI", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Eritrea", + "type": "LINE" + }, + { + "placeDcidOverride": "country/LBR", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Liberia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MAR", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Morocco", + "type": "LINE" + }, + { + "placeDcidOverride": "country/NER", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Niger", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MDG", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Madagascar", + "type": "LINE" + }, + { + "placeDcidOverride": "country/GAB", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Gabon", + "type": "LINE" + }, + { + "placeDcidOverride": "country/RWA", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Rwanda", + "type": "LINE" + }, + { + "placeDcidOverride": "country/CPV", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Cape Verde", + "type": "LINE" + }, + { + "placeDcidOverride": "country/BFA", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Burkina Faso", + "type": "LINE" + }, + { + "placeDcidOverride": "country/SSD", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in South Sudan", + "type": "LINE" + }, + { + "placeDcidOverride": "country/GIN", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Guinea", + "type": "LINE" + }, + { + "placeDcidOverride": "country/BEN", + "statVarKey": [ + "LifeExpectancy_Person_Female" + ], + "title": "Female Life Expectancy in Benin", + "type": "LINE" + } + ] + } + ], + "title": "Increase over time (by absolute change)" }, { "columns": [ @@ -337,7 +676,177 @@ } ] } - ] + ], + "title": "Increase over time (by percent change)" + }, + { + "columns": [ + { + "tiles": [ + { + "placeDcidOverride": "country/TUN", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Tunisia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MLI", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Mali", + "type": "LINE" + }, + { + "placeDcidOverride": "country/LBR", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Liberia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/DZA", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Algeria", + "type": "LINE" + }, + { + "placeDcidOverride": "country/GMB", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Gambia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/LBY", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Libya", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MAR", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Morocco", + "type": "LINE" + }, + { + "placeDcidOverride": "country/SEN", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Senegal", + "type": "LINE" + }, + { + "placeDcidOverride": "country/ETH", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Ethiopia", + "type": "LINE" + }, + { + "placeDcidOverride": "country/BFA", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Burkina Faso", + "type": "LINE" + }, + { + "placeDcidOverride": "country/ERI", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Eritrea", + "type": "LINE" + }, + { + "placeDcidOverride": "country/GIN", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Guinea", + "type": "LINE" + }, + { + "placeDcidOverride": "country/NER", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Niger", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MDG", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Madagascar", + "type": "LINE" + }, + { + "placeDcidOverride": "country/GAB", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Gabon", + "type": "LINE" + }, + { + "placeDcidOverride": "country/SSD", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in South Sudan", + "type": "LINE" + }, + { + "placeDcidOverride": "country/RWA", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Rwanda", + "type": "LINE" + }, + { + "placeDcidOverride": "country/MWI", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Malawi", + "type": "LINE" + }, + { + "placeDcidOverride": "country/BEN", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Benin", + "type": "LINE" + }, + { + "placeDcidOverride": "country/SLE", + "statVarKey": [ + "LifeExpectancy_Person_Male" + ], + "title": "Male Life Expectancy in Sierra Leone", + "type": "LINE" + } + ] + } + ], + "title": "Increase over time (by absolute change)" }, { "columns": [ @@ -505,7 +1014,25 @@ } ] } - ] + ], + "title": "Increase over time (by percent change)" + }, + { + "columns": [ + { + "tiles": [ + { + "placeDcidOverride": "country/SYC", + "statVarKey": [ + "Count_Death_AsAFractionOfCount_Person" + ], + "title": "Deaths Per Capita in Seychelles", + "type": "LINE" + } + ] + } + ], + "title": "Increase over time (by absolute change)" }, { "columns": [ @@ -521,7 +1048,8 @@ } ] } - ] + ], + "title": "Increase over time (by percent change)" } ], "statVarSpec": { @@ -546,15 +1074,15 @@ ], "metadata": { "placeDcid": [ - "country/MLI" + "country/DZA" ] } }, "context": {}, "debug": {}, "place": { - "dcid": "country/MLI", - "name": "Mali", + "dcid": "country/DZA", + "name": "Algeria", "place_type": "Country" } } \ No newline at end of file diff --git a/server/integration_tests/test_data/demo_feb2023/query_5/chart_config.json b/server/integration_tests/test_data/demo_feb2023/query_5/chart_config.json index 2fac992864..13960437cd 100644 --- a/server/integration_tests/test_data/demo_feb2023/query_5/chart_config.json +++ b/server/integration_tests/test_data/demo_feb2023/query_5/chart_config.json @@ -3,6 +3,120 @@ "categories": [ { "blocks": [ + { + "columns": [ + { + "tiles": [ + { + "statVarKey": [ + "Count_Worker_NAICSHealthCareSocialAssistance" + ], + "title": "Health Care and Social Assistance Industry in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "dc/p69tpsldf99h7" + ], + "title": "Retail Trade in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSConstruction" + ], + "title": "Construction Industry in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSAccommodationFoodServices" + ], + "title": "Accommodation and Food Services Industry in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSAdministrativeSupportWasteManagementRemediationServices" + ], + "title": "Administrative and Support and Waste Management Services Industry in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSProfessionalScientificTechnicalServices" + ], + "title": "Professional, Scientific, and Technical Services in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSEducationalServices" + ], + "title": "Educational Services Industry in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSFinanceInsurance" + ], + "title": "Finance and Insurance Industry in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSOtherServices" + ], + "title": "Other Services, Except Public Administration in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSArtsEntertainmentRecreation" + ], + "title": "Arts, Entertainment, and Recreation Industry in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSRealEstateRentalLeasing" + ], + "title": "Real Estate and Rental and Leasing in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSWholesaleTrade" + ], + "title": "Wholesale Trade in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSPublicAdministration" + ], + "title": "Public Administration in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSInformation" + ], + "title": "Information Industry in Placer County", + "type": "LINE" + }, + { + "statVarKey": [ + "Count_Worker_NAICSMiningQuarryingOilGasExtraction" + ], + "title": "Mining, Quarrying, and Oil and Gas Extraction Industry in Placer County", + "type": "LINE" + } + ] + } + ], + "title": "Increase over time (by absolute change)" + }, { "columns": [ { @@ -115,7 +229,7 @@ ] } ], - "title": "Categories of Jobs" + "title": "Increase over time (by percent change)" } ], "statVarSpec": { diff --git a/server/lib/nl/fulfillment/time_delta_across_places.py b/server/lib/nl/fulfillment/time_delta_across_places.py index 8db5e1c246..7e3b2fdd4c 100644 --- a/server/lib/nl/fulfillment/time_delta_across_places.py +++ b/server/lib/nl/fulfillment/time_delta_across_places.py @@ -17,6 +17,7 @@ from server.lib.nl import utils from server.lib.nl.detection import Place +from server.lib.nl.detection import TimeDeltaType from server.lib.nl.fulfillment.base import add_chart_to_utterance from server.lib.nl.fulfillment.base import ChartVars from server.lib.nl.fulfillment.base import populate_charts @@ -88,26 +89,38 @@ def _populate_cb(state: PopulateState, chart_vars: ChartVars, dcid2place = {c.dcid: c for c in child_places} dcids = list(dcid2place.keys()) - ranked_child_dcids = utils.rank_places_by_growth_rate( + direction = state.time_delta_types[0] + ranked_children = utils.rank_places_by_series_growth( places=dcids, sv=chart_vars.svs[0], - growth_direction=state.time_delta_types[0], + growth_direction=direction, rank_order=rank_order) - utils.update_counter(state.uttr.counters, 'time-delta_reranked_places', { - 'orig': dcids, - 'ranked': ranked_child_dcids, - }) - ranked_child_places = [] - for d in ranked_child_dcids: - ranked_child_places.append(dcid2place[d]) + utils.update_counter( + state.uttr.counters, 'time-delta_reranked_places', { + 'orig': dcids, + 'ranked_abs': ranked_children.abs, + 'ranked_pct': ranked_children.pct, + }) + block_id = chart_vars.block_id + i = 0 + for ranked_dcids in [ranked_children.abs, ranked_children.pct]: + ranked_places = [] + for d in ranked_dcids: + ranked_places.append(dcid2place[d]) - # No per-capita charts. - chart_vars.include_percapita = False - # Override the "main-place" (i.e., parent) with the child place. - chart_vars.set_place_override_for_line = True - for p in ranked_child_places[:_MAX_PLACES_TO_RETURN]: - logging.info('Processing %s' % p.name) - found |= add_chart_to_utterance(ChartType.TIMELINE_CHART, state, chart_vars, - [p], chart_origin) + # No per-capita charts. + chart_vars.include_percapita = False + chart_vars.block_id = block_id + # Override the "main-place" (i.e., parent) with the child place. + chart_vars.set_place_override_for_line = True + chart_vars.title = utils.get_time_delta_title( + direction=direction, is_absolute=True if i == 0 else False) + for p in ranked_places[:_MAX_PLACES_TO_RETURN]: + found |= add_chart_to_utterance(ChartType.TIMELINE_CHART, state, + chart_vars, [p], chart_origin) + # Avoid having the second set of charts use the same block_id than + # others. + block_id += 10 + i += 1 return found diff --git a/server/lib/nl/fulfillment/time_delta_across_vars.py b/server/lib/nl/fulfillment/time_delta_across_vars.py index e0f04ee1be..855dce4209 100644 --- a/server/lib/nl/fulfillment/time_delta_across_vars.py +++ b/server/lib/nl/fulfillment/time_delta_across_vars.py @@ -72,21 +72,34 @@ def _populate_cb(state: PopulateState, chart_vars: ChartVars, # Compute time-delta ranks. rank_order = state.ranking_types[0] if state.ranking_types else None logging.info('Attempting to compute growth rate stats') - ranked_svs = utils.rank_svs_by_growth_rate( - place=places[0].dcid, - svs=chart_vars.svs, - growth_direction=state.time_delta_types[0], - rank_order=rank_order) - utils.update_counter(state.uttr.counters, - 'time-delta-across-vars_reranked_svs', { - 'orig': chart_vars.svs, - 'ranked': ranked_svs, - }) - for sv in ranked_svs: - cv = chart_vars - cv.svs = [sv] - cv.response_type = "growth chart" - # TODO: desc string should take into account rank order - found |= add_chart_to_utterance(ChartType.TIMELINE_CHART, state, cv, places, - chart_origin) + + direction = state.time_delta_types[0] + ranked_lists = utils.rank_svs_by_series_growth(place=places[0].dcid, + svs=chart_vars.svs, + growth_direction=direction, + rank_order=rank_order) + + utils.update_counter( + state.uttr.counters, 'time-delta-across-vars_reranked_svs', { + 'orig': chart_vars.svs, + 'ranked_abs': ranked_lists.abs, + 'ranked_pct': ranked_lists.pct, + }) + + block_id = chart_vars.block_id + i = 0 + for ranked_svs in [ranked_lists.abs, ranked_lists.pct]: + for sv in ranked_svs: + cv = chart_vars + cv.svs = [sv] + cv.block_id = block_id + cv.title = utils.get_time_delta_title( + direction=direction, is_absolute=True if i == 0 else False) + found |= add_chart_to_utterance(ChartType.TIMELINE_CHART, state, cv, + places, chart_origin) + # Avoid having the second set of charts use the same block_id than + # others. + block_id += 10 + i += 1 + return found diff --git a/server/lib/nl/utils.py b/server/lib/nl/utils.py index c3babffbc9..e2f3c0409a 100644 --- a/server/lib/nl/utils.py +++ b/server/lib/nl/utils.py @@ -20,7 +20,7 @@ import os import random import re -from typing import Dict, List, Set, Union +from typing import Dict, List, NamedTuple, Set, Union import server.lib.nl.constants as constants import server.lib.nl.detection as detection @@ -328,11 +328,23 @@ def has_series_with_single_datapoint(place: str, svs: List[str]): return False +# List of vars or places ranked by abs and pct growth. +class GrowthRankedLists(NamedTuple): + abs: List[str] + pct: List[str] + + +# Raw abs and pct growth +class GrowthRanks(NamedTuple): + abs: float + pct: float + + # Given an SV and list of places, this API ranks the places # per the growth rate of the time-series. -def rank_places_by_growth_rate(places: List[str], sv: str, - growth_direction: detection.TimeDeltaType, - rank_order: detection.RankingType) -> List[str]: +def rank_places_by_series_growth( + places: List[str], sv: str, growth_direction: detection.TimeDeltaType, + rank_order: detection.RankingType) -> GrowthRankedLists: series_data = util.series_core(entities=places, variables=[sv], all_facets=False) @@ -347,31 +359,37 @@ def rank_places_by_growth_rate(places: List[str], sv: str, continue try: - net_growth_rate = compute_growth_rate(series) + net_growth = compute_series_growth(series) except Exception as e: logging.error('Growth rate computation failed: %s', str(e)) continue - if net_growth_rate > 0 and growth_direction != detection.TimeDeltaType.INCREASE: + if net_growth.abs > 0 and growth_direction != detection.TimeDeltaType.INCREASE: continue - if net_growth_rate < 0 and growth_direction != detection.TimeDeltaType.DECREASE: + if net_growth.abs < 0 and growth_direction != detection.TimeDeltaType.DECREASE: continue - places_with_vals.append((place, net_growth_rate)) + places_with_vals.append((place, net_growth)) - places_with_vals = sorted(places_with_vals, - key=lambda pair: pair[1], - reverse=_TIME_DELTA_SORT_MAP[(growth_direction, - rank_order)]) - logging.info(places_with_vals) - return [p for p, _ in places_with_vals] + places_with_vals_by_abs = sorted( + places_with_vals, + key=lambda pair: pair[1].abs, + reverse=_TIME_DELTA_SORT_MAP[(growth_direction, rank_order)]) + places_with_vals_by_pct = sorted( + places_with_vals, + key=lambda pair: pair[1].pct, + reverse=_TIME_DELTA_SORT_MAP[(growth_direction, rank_order)]) + return GrowthRankedLists( + abs=[sv for sv, _ in places_with_vals_by_abs], + pct=[sv for sv, _ in places_with_vals_by_pct], + ) # Given a place and a list of existing SVs, this API ranks the SVs # per the growth rate of the time-series. -def rank_svs_by_growth_rate(place: str, svs: List[str], - growth_direction: detection.TimeDeltaType, - rank_order: detection.RankingType) -> List[str]: +def rank_svs_by_series_growth( + place: str, svs: List[str], growth_direction: detection.TimeDeltaType, + rank_order: detection.RankingType) -> GrowthRankedLists: series_data = util.series_core(entities=[place], variables=svs, all_facets=False) @@ -385,28 +403,35 @@ def rank_svs_by_growth_rate(place: str, svs: List[str], continue try: - net_growth_rate = compute_growth_rate(series) + net_growth = compute_series_growth(series) except Exception as e: logging.error('Growth rate computation failed: %s', str(e)) continue - if net_growth_rate > 0 and growth_direction != detection.TimeDeltaType.INCREASE: + if net_growth.abs > 0 and growth_direction != detection.TimeDeltaType.INCREASE: continue - if net_growth_rate < 0 and growth_direction != detection.TimeDeltaType.DECREASE: + if net_growth.abs < 0 and growth_direction != detection.TimeDeltaType.DECREASE: continue - svs_with_vals.append((sv, net_growth_rate)) + svs_with_vals.append((sv, net_growth)) - svs_with_vals = sorted(svs_with_vals, - key=lambda pair: pair[1], - reverse=_TIME_DELTA_SORT_MAP[(growth_direction, - rank_order)]) - logging.info(svs_with_vals) - return [sv for sv, _ in svs_with_vals] + svs_with_vals_by_abs = sorted(svs_with_vals, + key=lambda pair: pair[1].abs, + reverse=_TIME_DELTA_SORT_MAP[(growth_direction, + rank_order)]) + svs_with_vals_by_pct = sorted(svs_with_vals, + key=lambda pair: pair[1].pct, + reverse=_TIME_DELTA_SORT_MAP[(growth_direction, + rank_order)]) + return GrowthRankedLists( + abs=[sv for sv, _ in svs_with_vals_by_abs], + pct=[sv for sv, _ in svs_with_vals_by_pct], + ) # Computes net growth-rate for a time-series including only recent (since 2012) observations. -def compute_growth_rate(series: List[Dict]) -> float: +# Returns a pair of +def compute_series_growth(series: List[Dict]) -> GrowthRanks: latest = None earliest = None # TODO: Apparently series is ordered, so simplify. @@ -427,7 +452,7 @@ def compute_growth_rate(series: List[Dict]) -> float: def _compute_growth(earliest: Dict, latest: Dict, - series: List[Dict]) -> datetime.date: + series: List[Dict]) -> GrowthRanks: eparts = earliest['date'].split('-') lparts = latest['date'].split('-') @@ -453,7 +478,9 @@ def _compute_growth(earliest: Dict, latest: Dict, earliest['date']) # Compute % growth per day start = 0.000001 if earliest['value'] == 0 else earliest['value'] - return float(val_delta) / (float(date_delta.days) * start) + pct = float(val_delta) / (float(date_delta.days) * start) + abs = float(val_delta) / float(date_delta.days) + return GrowthRanks(abs=abs, pct=pct) def _datestr_to_date(datestr: str) -> datetime.date: @@ -808,3 +835,12 @@ def new_session_id() -> str: rand = random.randrange(1000) # Prefix randomness since session_id gets used as BT key return str(rand) + '_' + str(micros) + + +def get_time_delta_title(direction: detection.TimeDeltaType, + is_absolute: bool) -> str: + return ' '.join([ + 'Increase' if direction == detection.TimeDeltaType.INCREASE else + 'Decrease', 'over time', + '(by absolute change)' if is_absolute else '(by percent change)' + ]) diff --git a/server/tests/lib/nl/fulfiller_test.py b/server/tests/lib/nl/fulfiller_test.py index 74d9da1989..e65a592888 100644 --- a/server/tests/lib/nl/fulfiller_test.py +++ b/server/tests/lib/nl/fulfiller_test.py @@ -359,7 +359,7 @@ def test_ranking_across_svs(self, mock_sv_existence, mock_single_datapoint, # This follows up on test_simple(). It relies on topic as well. # Example: [what are the most grown agricultural things?] @patch.object(variable, 'extend_svs') - @patch.object(utils, 'rank_svs_by_growth_rate') + @patch.object(utils, 'rank_svs_by_series_growth') @patch.object(base, '_build_chart_vars') @patch.object(utils, 'sv_existence_for_places') def test_time_delta(self, mock_sv_existence, mock_topic_to_svs, mock_rank_svs, @@ -385,11 +385,17 @@ def test_time_delta(self, mock_sv_existence, mock_topic_to_svs, mock_rank_svs, 'FarmInventory_Rice', 'FarmInventory_Wheat', 'FarmInventory_Barley' ]] # Differently order result - mock_rank_svs.return_value = [ - 'FarmInventory_Barley', - 'FarmInventory_Rice', - 'FarmInventory_Wheat', - ] + mock_rank_svs.return_value = utils.GrowthRankedLists( + pct=[ + 'FarmInventory_Barley', + 'FarmInventory_Rice', + 'FarmInventory_Wheat', + ], + abs=[ + 'FarmInventory_Rice', + 'FarmInventory_Barley', + 'FarmInventory_Wheat', + ]) # Pass in just simple utterance got = _run(detection, [SIMPLE_UTTR]) diff --git a/server/tests/lib/nl/test_utterance.py b/server/tests/lib/nl/test_utterance.py index 51a262977b..76d77df114 100644 --- a/server/tests/lib/nl/test_utterance.py +++ b/server/tests/lib/nl/test_utterance.py @@ -541,62 +541,125 @@ 'ranked_charts': [{ 'attr': { 'block_id': 2, - 'chart_type': 'growth chart', + 'chart_type': '', 'class': ChartOriginType.PRIMARY_CHART, 'description': '', 'include_percapita': False, 'place_type': None, 'ranking_types': [], 'source_topic': 'dc/topic/Agriculture', - 'title': '' + 'title': 'Increase over time (by absolute ' + 'change)' }, 'chart_type': ChartType.TIMELINE_CHART, + 'event': None, 'places': [{ 'dcid': 'geoId/06', 'name': 'Foo Place', 'place_type': 'State' }], + 'svs': ['FarmInventory_Rice'] + }, { + 'attr': { + 'block_id': 2, + 'chart_type': '', + 'class': ChartOriginType.PRIMARY_CHART, + 'description': '', + 'include_percapita': False, + 'place_type': None, + 'ranking_types': [], + 'source_topic': 'dc/topic/Agriculture', + 'title': 'Increase over time (by absolute ' + 'change)' + }, + 'chart_type': ChartType.TIMELINE_CHART, 'event': None, + 'places': [{ + 'dcid': 'geoId/06', + 'name': 'Foo Place', + 'place_type': 'State' + }], 'svs': ['FarmInventory_Barley'] }, { 'attr': { 'block_id': 2, - 'chart_type': 'growth chart', + 'chart_type': '', 'class': ChartOriginType.PRIMARY_CHART, 'description': '', 'include_percapita': False, 'place_type': None, 'ranking_types': [], 'source_topic': 'dc/topic/Agriculture', - 'title': '' + 'title': 'Increase over time (by absolute ' + 'change)' + }, + 'chart_type': ChartType.TIMELINE_CHART, + 'event': None, + 'places': [{ + 'dcid': 'geoId/06', + 'name': 'Foo Place', + 'place_type': 'State' + }], + 'svs': ['FarmInventory_Wheat'] + }, { + 'attr': { + 'block_id': 12, + 'chart_type': '', + 'class': ChartOriginType.PRIMARY_CHART, + 'description': '', + 'include_percapita': False, + 'place_type': None, + 'ranking_types': [], + 'source_topic': 'dc/topic/Agriculture', + 'title': 'Increase over time (by percent change)' }, 'chart_type': ChartType.TIMELINE_CHART, + 'event': None, 'places': [{ 'dcid': 'geoId/06', 'name': 'Foo Place', 'place_type': 'State' }], + 'svs': ['FarmInventory_Barley'] + }, { + 'attr': { + 'block_id': 12, + 'chart_type': '', + 'class': ChartOriginType.PRIMARY_CHART, + 'description': '', + 'include_percapita': False, + 'place_type': None, + 'ranking_types': [], + 'source_topic': 'dc/topic/Agriculture', + 'title': 'Increase over time (by percent change)' + }, + 'chart_type': ChartType.TIMELINE_CHART, 'event': None, + 'places': [{ + 'dcid': 'geoId/06', + 'name': 'Foo Place', + 'place_type': 'State' + }], 'svs': ['FarmInventory_Rice'] }, { 'attr': { - 'block_id': 2, - 'chart_type': 'growth chart', + 'block_id': 12, + 'chart_type': '', 'class': ChartOriginType.PRIMARY_CHART, 'description': '', 'include_percapita': False, 'place_type': None, 'ranking_types': [], 'source_topic': 'dc/topic/Agriculture', - 'title': '' + 'title': 'Increase over time (by percent change)' }, 'chart_type': ChartType.TIMELINE_CHART, + 'event': None, 'places': [{ 'dcid': 'geoId/06', 'name': 'Foo Place', 'place_type': 'State' }], - 'event': None, 'svs': ['FarmInventory_Wheat'] }], 'svs': ['dc/topic/AgricultureProduction'], diff --git a/server/tests/lib/nl/utils_test.py b/server/tests/lib/nl/utils_test.py index ce92d7676b..c6fe966b86 100644 --- a/server/tests/lib/nl/utils_test.py +++ b/server/tests/lib/nl/utils_test.py @@ -133,7 +133,9 @@ def test_year(self): }, ] # (20 - 10) / (2 years * 10) - self.assertEqual(0.0013698630136986301, utils.compute_growth_rate(s)) + gr = utils.compute_series_growth(s) + self.assertEqual(0.0013698630136986301, gr.pct) + self.assertEqual(0.0136986301369863, gr.abs) def test_month_unadjusted(self): s = [ @@ -151,7 +153,9 @@ def test_month_unadjusted(self): }, ] # (10 - 20) / (24 months * 20) - self.assertEqual(-0.0006849315068493151, utils.compute_growth_rate(s)) + gr = utils.compute_series_growth(s) + self.assertEqual(-0.0006849315068493151, gr.pct) + self.assertEqual(-0.0136986301369863, gr.abs) # Here we will pick 2017-06 instead of 2017-01 to match the latest month (2017-06), # and thus same result as before. @@ -183,7 +187,9 @@ def test_month_adjusted(self): }, ] # (10 - 20) / (24 months * 20) - self.assertEqual(-0.0006849315068493151, utils.compute_growth_rate(s)) + gr = utils.compute_series_growth(s) + self.assertEqual(-0.0006849315068493151, gr.pct) + self.assertEqual(-0.0136986301369863, gr.abs) def test_day(self): s = [ @@ -201,7 +207,9 @@ def test_day(self): }, ] # (20 - 10) / (2 years * 10) - self.assertEqual(0.0013698630136986301, utils.compute_growth_rate(s)) + gr = utils.compute_series_growth(s) + self.assertEqual(0.0013698630136986301, gr.pct) + self.assertEqual(0.0136986301369863, gr.abs) def test_error(self): s = [ @@ -219,6 +227,6 @@ def test_error(self): }, ] with self.assertRaises(ValueError) as context: - utils.compute_growth_rate(s) + utils.compute_series_growth(s) self.assertTrue( 'Dates have different granularity' in str(context.exception)) diff --git a/static/js/constants/app/nl_interface_constants.ts b/static/js/constants/app/nl_interface_constants.ts index b8fc3f36ed..110ca185ed 100644 --- a/static/js/constants/app/nl_interface_constants.ts +++ b/static/js/constants/app/nl_interface_constants.ts @@ -22,7 +22,7 @@ export const NL_SMALL_TILE_CLASS = "tile-sm"; export const NL_MED_TILE_CLASS = "tile-md"; export const NL_LARGE_TILE_CLASS = "tile-lg"; // Number of tiles to show. -export const NL_NUM_TILES_SHOWN = 6; +export const NL_NUM_TILES_SHOWN = 3; export const NL_SOURCE_REPLACEMENTS = { "https://www.datacommons.org/": "https://www.google.com", "https://datacommons.org/": "https://www.google.com",