From 6927767771d6f41ae84530fdd65b465fa8a0dd44 Mon Sep 17 00:00:00 2001 From: lindsay stevens Date: Fri, 10 Jan 2025 05:17:48 +1100 Subject: [PATCH] chg: improve choices handling and instances generation - for choices, an Itemset class is introduced which allows calculating and storing metadata about the choice list more consistently. In particular, working out whether itext is required only once. - the initial motivation was the if block in survey.py line 391, which for a form with many choice lists resulted in a large slow down due to something approaching O(n*m) + costs from string join/split. - this allowed removing: - "_itemset*" properties from MultipleChoiceQuestion - _search_lists property from Survey - functions is_label_dynamic and has_dynamic_label - in the course of implementing this it became clear that actually the Tag class never has choices so code for that is removed. The Tag class is like an Option with respect to OsmUploadQuestion. Added some OSM-related test cases to check it's not broken. - similarly since choices/itext are handled by Survey, the MultipleChoiceQuestion class doesn't need choices children either, just a reference to the Itemset for XML and JSON output. - for instances generation, the above choices changes allowed simplification of _generate_static_instances, and otherwise the changes are mainly to avoid repeating checks or using intermediate lists by instead using generators/yield as much as possible. - test_j2x_creation.py / test_j2x_question.py / strings.ini - updated these tests since they are using internal APIs in a way that diverges significantly from what xls2json currently emits - For example test_select_one_question_multilingual had multi-lang choice labels but the expected XML string had a reference like "/test/qname/a:label" which implies choice itemsets aren't shared which has not been the case for a while. - tried to make these tests more useful by adding xpath assertions, and unlike other tests using ss_structure they may be useful for validating/showing what dict structure xlsforms can be provided as. - test_j2x_xform_build_preparation.py - removed this test since it's not clear what the expectation is. If it was intended to check that identical choice lists from separate questions are merged, then that functionality doesn't exist, and the choices should not be provided separately per question anyway. - test_dynamic_default.py / test_translations.py - updated performance test figures. - translation test benefits most from the choices changes because it has lots of choice lists. Increase in memory most likely due to Itemset instances that now wrap each choice list. - dynamic_default slightly faster due to the instances changes and earlier commits today (e.g. not calling xml_label_or_hint twice for input/text questions, etc). --- pyxform/builder.py | 24 +- pyxform/question.py | 198 ++++------ pyxform/survey.py | 423 +++++++++------------- pyxform/survey_element.py | 16 +- pyxform/utils.py | 21 +- tests/fixtures/strings.ini | 4 - tests/test_dynamic_default.py | 12 +- tests/test_j2x_creation.py | 27 +- tests/test_j2x_question.py | 194 ++++++---- tests/test_j2x_xform_build_preparation.py | 42 --- tests/test_osm.py | 180 +++++++++ tests/test_translations.py | 12 +- 12 files changed, 603 insertions(+), 550 deletions(-) delete mode 100644 tests/test_j2x_xform_build_preparation.py diff --git a/pyxform/builder.py b/pyxform/builder.py index 67189f1a..2b699de9 100644 --- a/pyxform/builder.py +++ b/pyxform/builder.py @@ -146,16 +146,20 @@ def _create_question_from_dict( ) if question_class: - if const.CHOICES in d and choices: - return question_class( - question_type_dictionary=question_type_dictionary, - choices=choices.get(d[const.ITEMSET], d[const.CHOICES]), - **{k: v for k, v in d.items() if k != const.CHOICES}, - ) - else: - return question_class( - question_type_dictionary=question_type_dictionary, **d - ) + if choices: + d_choices = d.get(const.CHOICES, d.get(const.CHILDREN)) + if d_choices: + return question_class( + question_type_dictionary=question_type_dictionary, + **{ + k: v + for k, v in d.items() + if k not in {const.CHOICES, const.CHILDREN} + }, + choices=choices.get(d[const.ITEMSET], d_choices), + ) + + return question_class(question_type_dictionary=question_type_dictionary, **d) return () diff --git a/pyxform/question.py b/pyxform/question.py index a5ceb9f2..6ebac4c4 100644 --- a/pyxform/question.py +++ b/pyxform/question.py @@ -3,6 +3,7 @@ """ import os.path +import re from collections.abc import Callable, Generator, Iterable from itertools import chain from typing import TYPE_CHECKING @@ -16,12 +17,12 @@ EXTERNAL_INSTANCE_EXTENSIONS, ) from pyxform.errors import PyXFormError +from pyxform.parsing.expression import RE_ANY_PYXFORM_REF from pyxform.question_type_dictionary import QUESTION_TYPE_DICT from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement from pyxform.utils import ( PYXFORM_REFERENCE_REGEX, DetachableElement, - coalesce, combine_lists, default_is_dynamic, node, @@ -32,9 +33,6 @@ QUESTION_EXTRA_FIELDS = ( - "_itemset_dyn_label", - "_itemset_has_media", - "_itemset_multi_language", "_qtd_defaults", "_qtd_kwargs", "action", @@ -55,7 +53,7 @@ QUESTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *QUESTION_EXTRA_FIELDS) SELECT_QUESTION_EXTRA_FIELDS = ( - constants.CHILDREN, + constants.CHOICES, constants.ITEMSET, constants.LIST_NAME_U, ) @@ -65,15 +63,12 @@ OSM_QUESTION_FIELDS = (*QUESTION_FIELDS, *SELECT_QUESTION_EXTRA_FIELDS) OPTION_EXTRA_FIELDS = ( - "_choice_itext_id", + "_choice_itext_ref", constants.MEDIA, "sms_option", ) OPTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *OPTION_EXTRA_FIELDS) -TAG_EXTRA_FIELDS = (constants.CHILDREN,) -TAG_FIELDS = (*SURVEY_ELEMENT_FIELDS, *TAG_EXTRA_FIELDS) - class Question(SurveyElement): __slots__ = QUESTION_EXTRA_FIELDS @@ -291,33 +286,18 @@ def __init__( sms_option: str | None = None, **kwargs, ): - self._choice_itext_id: str | None = None + self._choice_itext_ref: str | None = None self.media: dict | None = media self.sms_option: str | None = sms_option super().__init__(name=name, label=label, **kwargs) - def xml_value(self): - return node("value", self.name) - - def xml(self, survey: "Survey"): - item = node("item") - item.appendChild(self.xml_label(survey=survey)) - item.appendChild(self.xml_value()) - - return item - def validate(self): pass def xml_control(self, survey: "Survey"): raise NotImplementedError() - def _translation_path(self, display_element): - if self._choice_itext_id is not None: - return self._choice_itext_id - return super()._translation_path(display_element=display_element) - def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: to_delete = (k for k in self.get_slot_names() if k.startswith("_")) if delete_keys is not None: @@ -325,6 +305,41 @@ def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: return super().to_json_dict(delete_keys=to_delete) +class Itemset: + """Itemset details and metadata detection.""" + + __slots__ = ("name", "options", "requires_itext", "used_by_search") + + def __init__(self, name: str, choices: Iterable[dict]): + self.requires_itext: bool = False + self.used_by_search: bool = False + self.name: str = name + self.options: tuple[Option, ...] = tuple(o for o in self.get_options(choices)) + + def get_options(self, choices: Iterable[dict]) -> Generator[Option, None, None]: + requires_itext = False + for c in choices: + option = Option(**c) + if not requires_itext: + # Media: dict of image, audio, etc. Defaults to None. + if option.media: + requires_itext = True + else: + choice_label = option.label + label_is_dict = isinstance(choice_label, dict) + # Multi-language: dict of labels etc per language. Can be just a string. + if label_is_dict: + requires_itext = True + # Dynamic label: string contains a pyxform reference. + elif ( + choice_label + and re.search(RE_ANY_PYXFORM_REF, choice_label) is not None + ): + requires_itext = True + yield option + self.requires_itext = requires_itext + + class MultipleChoiceQuestion(Question): __slots__ = SELECT_QUESTION_EXTRA_FIELDS @@ -335,53 +350,21 @@ def get_slot_names() -> tuple[str, ...]: def __init__( self, itemset: str | None = None, list_name: str | None = None, **kwargs ): - # Internals - self._itemset_dyn_label: bool = False - self._itemset_has_media: bool = False - self._itemset_multi_language: bool = False + if not itemset and not list_name: + raise PyXFormError( + "Arguments 'itemset' and 'list_name' must not both be None or empty." + ) # Structure - self.children: tuple[Option, ...] | None = None + self.choices: Itemset | None = None self.itemset: str | None = itemset self.list_name: str | None = list_name - # Notice that choices can be specified under choices or children. - # I'm going to try to stick to just choices. - # Aliases in the json format will make it more difficult - # to use going forward. - kw_choices = kwargs.pop(constants.CHOICES, None) - kw_children = kwargs.pop(constants.CHILDREN, None) - choices = coalesce(kw_choices, kw_children) - if isinstance(choices, tuple) and isinstance(next(iter(choices)), Option): - self.children = choices - elif choices: - self.children = tuple( - Option(**c) for c in combine_lists(kw_choices, kw_children) - ) + choices = kwargs.pop(constants.CHOICES, None) + if isinstance(choices, Itemset): + self.choices = choices super().__init__(**kwargs) - def validate(self): - Question.validate(self) - if self.children: - for child in self.children: - child.validate() - - def iter_descendants( - self, - condition: Callable[["SurveyElement"], bool] | None = None, - iter_into_section_items: bool = False, - ) -> Generator["SurveyElement", None, None]: - if condition is None: - yield self - elif condition(self): - yield self - if iter_into_section_items and self.children: - for e in self.children: - yield from e.iter_descendants( - condition=condition, - iter_into_section_items=iter_into_section_items, - ) - def build_xml(self, survey: "Survey"): if self.bind["type"] not in {"string", "odk:rank"}: raise PyXFormError("""Invalid value for `self.bind["type"]`.""") @@ -403,21 +386,18 @@ def build_xml(self, survey: "Survey"): itemset_value_ref = self.parameters.get("value", itemset_value_ref) itemset_label_ref = self.parameters.get("label", itemset_label_ref) - multi_language = self._itemset_multi_language - has_media = self._itemset_has_media - has_dyn_label = self._itemset_dyn_label is_previous_question = bool(PYXFORM_REFERENCE_REGEX.search(self.itemset)) if file_extension in EXTERNAL_INSTANCE_EXTENSIONS: pass - elif not multi_language and not has_media and not has_dyn_label: + elif self.choices and self.choices.requires_itext: itemset = self.itemset + itemset_label_ref = "jr:itext(itextId)" else: itemset = self.itemset - itemset_label_ref = "jr:itext(itextId)" choice_filter = self.choice_filter - if choice_filter is not None: + if choice_filter: choice_filter = survey.insert_xpaths( choice_filter, self, True, is_previous_question ) @@ -460,63 +440,43 @@ def build_xml(self, survey: "Survey"): nodeset += ")" - itemset_children = [ - node("value", ref=itemset_value_ref), - node("label", ref=itemset_label_ref), - ] - result.appendChild(node("itemset", *itemset_children, nodeset=nodeset)) - elif self.children: - for child in self.children: - result.appendChild(child.xml(survey=survey)) + result.appendChild( + node( + "itemset", + node("value", ref=itemset_value_ref), + node("label", ref=itemset_label_ref), + nodeset=nodeset, + ) + ) + elif self.choices: + # Options processing specific to XLSForms using the "search()" function. + # The _choice_itext_ref is prepared by Survey._redirect_is_search_itext. + itemset = self.choices + if itemset.used_by_search: + for option in itemset.options: + if itemset.requires_itext: + label_node = node("label", ref=option._choice_itext_ref) + elif self.label: + label, output_inserted = survey.insert_output_values( + option.label, option + ) + label_node = node("label", label, toParseString=output_inserted) + else: + label_node = node("label") + result.appendChild( + node("item", label_node, node("value", option.name)) + ) return result class Tag(SurveyElement): - __slots__ = TAG_EXTRA_FIELDS - @staticmethod def get_slot_names() -> tuple[str, ...]: - return TAG_FIELDS - - def __init__(self, name: str, label: str | dict | None = None, **kwargs): - self.children: tuple[Option, ...] | None = None - - kw_choices = kwargs.pop(constants.CHOICES, None) - kw_children = kwargs.pop(constants.CHILDREN, None) - choices = coalesce(kw_choices, kw_children) - if isinstance(choices, tuple) and isinstance(next(iter(choices)), Option): - self.children = choices - elif choices: - self.children = tuple( - Option(**c) for c in combine_lists(kw_choices, kw_children) - ) - super().__init__(name=name, label=label, **kwargs) - - def iter_descendants( - self, - condition: Callable[["SurveyElement"], bool] | None = None, - iter_into_section_items: bool = False, - ) -> Generator["SurveyElement", None, None]: - if condition is None: - yield self - elif condition(self): - yield self - if iter_into_section_items and self.children: - for e in self.children: - yield from e.iter_descendants( - condition=condition, - iter_into_section_items=iter_into_section_items, - ) + return SURVEY_ELEMENT_FIELDS def xml(self, survey: "Survey"): - result = node("tag", key=self.name) - result.appendChild(self.xml_label(survey=survey)) - if self.children: - for choice in self.children: - result.appendChild(choice.xml(survey=survey)) - - return result + return node("tag", self.xml_label(survey=survey), key=self.name) def validate(self): pass diff --git a/pyxform/survey.py b/pyxform/survey.py index a36d60fd..5e29740a 100644 --- a/pyxform/survey.py +++ b/pyxform/survey.py @@ -20,7 +20,7 @@ from pyxform.instance import SurveyInstance from pyxform.parsing.expression import has_last_saved from pyxform.parsing.instance_expression import replace_with_output -from pyxform.question import MultipleChoiceQuestion, Option, Question, Tag +from pyxform.question import Itemset, MultipleChoiceQuestion, Option, Question, Tag from pyxform.section import SECTION_EXTRA_FIELDS, Section from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement from pyxform.utils import ( @@ -28,7 +28,6 @@ LAST_SAVED_INSTANCE_NAME, DetachableElement, escape_text_for_xml, - has_dynamic_label, node, ) from pyxform.validators import enketo_validate, odk_validate @@ -43,6 +42,7 @@ ) RE_PULLDATA = re.compile(r"(pulldata\s*\(\s*)(.*?),") SEARCH_FUNCTION_REGEX = re.compile(r"search\(.*?\)") +SELECT_TYPES = set(aliases.select) class InstanceInfo: @@ -170,22 +170,12 @@ def _get_steps_and_target_xpath(context_parent, xpath_parent, include_parent=Fal return (None, None) -@lru_cache(maxsize=128) -def is_label_dynamic(label: str) -> bool: - return ( - label is not None - and isinstance(label, str) - and re.search(BRACKETED_TAG_REGEX, label) is not None - ) - - def recursive_dict(): return defaultdict(recursive_dict) SURVEY_EXTRA_FIELDS = ( "_created", - "_search_lists", "_translations", "_xpath", "add_none_option", @@ -236,14 +226,13 @@ def get_slot_names() -> tuple[str, ...]: def __init__(self, **kwargs): # Internals self._created: datetime.now = datetime.now() - self._search_lists: set = set() self._translations: recursive_dict = recursive_dict() self._xpath: dict[str, Section | Question | None] = {} # Structure # attribute is for custom instance attrs from settings e.g. attribute::abc:xyz self.attribute: dict | None = None - self.choices: dict[str, tuple[Option, ...]] | None = None + self.choices: dict[str, Itemset] | None = None self.entity_features: list[str] | None = None self.setgeopoint_by_triggering_ref: dict[str, list[str]] = {} self.setvalues_by_triggering_ref: dict[str, list[str]] = {} @@ -279,11 +268,9 @@ def __init__(self, **kwargs): self.sms_separator: str | None = None choices = kwargs.pop("choices", None) - if choices is not None: + if choices and isinstance(choices, dict): self.choices = { - list_name: tuple( - c if isinstance(c, Option) else Option(**c) for c in values - ) + list_name: Itemset(name=list_name, choices=values) for list_name, values in choices.items() } kwargs[constants.TYPE] = constants.SURVEY @@ -380,80 +367,60 @@ def get_trigger_values_for_question_name(self, question_name: str, trigger_type: elif trigger_type == "setgeopoint": return self.setgeopoint_by_triggering_ref.get(f"${{{question_name}}}") - def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo: + def _generate_static_instances( + self, list_name: str, itemset: Itemset + ) -> InstanceInfo: """ Generate elements for static data (e.g. choices for selects) """ - instance_element_list = [] - has_media = bool(choice_list[0].get("media")) - has_dyn_label = has_dynamic_label(choice_list) - multi_language = False - if isinstance(self._translations, dict): - choices = ( - True - for items in self._translations.values() - for k, v in items.items() - if v.get(constants.TYPE, "") == constants.CHOICE - and "-".join(k.split("-")[:-1]) == list_name - ) - try: - if next(choices): - multi_language = True - except StopIteration: - pass - for idx, choice in enumerate(choice_list): - choice_element_list = [] + def choice_nodes(idx, choice): # Add a unique id to the choice element in case there are itext references - if multi_language or has_media or has_dyn_label: - itext_id = f"{list_name}-{idx}" - choice_element_list.append(node("itextId", itext_id)) - - for name, value in choice.items(): - if not value: - continue - elif name != "label" and isinstance(value, str): - choice_element_list.append(node(name, value)) - elif name == "extra_data" and isinstance(value, dict): - for k, v in value.items(): - choice_element_list.append(node(k, v)) - elif ( - not multi_language - and not has_media - and not has_dyn_label - and isinstance(value, str) - and name == "label" - ): - choice_element_list.append(node(name, value)) - - instance_element_list.append(node("item", *choice_element_list)) + if itemset.requires_itext: + yield node("itextId", f"{list_name}-{idx}") + yield node(constants.NAME, choice.name) + choice_label = choice.label + if not itemset.requires_itext and isinstance(choice_label, str): + yield node(constants.LABEL, choice_label) + choice_extra_data = choice.extra_data + if choice_extra_data and isinstance(choice_extra_data, dict): + for k, v in choice_extra_data.items(): + yield node(k, v) + choice_sms_option = choice.sms_option + if choice_sms_option and isinstance(choice_sms_option, str): + yield node("sms_option", choice_sms_option) + + def instance_nodes(choices): + for idx, choice in enumerate(choices): + yield node("item", choice_nodes(idx, choice)) return InstanceInfo( type="choice", context="survey", name=list_name, src=None, - instance=node("instance", node("root", *instance_element_list), id=list_name), + instance=node( + "instance", + node("root", instance_nodes(itemset.options)), + id=list_name, + ), ) @staticmethod - def _generate_external_instances(element: SurveyElement) -> InstanceInfo | None: - if isinstance(element, ExternalInstance): - name = element["name"] - extension = element["type"].split("-")[0] - prefix = "file-csv" if extension == "csv" else "file" - src = f"jr://{prefix}/{name}.{extension}" - return InstanceInfo( - type="external", - context="[type: {t}, name: {n}]".format( - t=element["parent"]["type"], n=element["parent"]["name"] - ), - name=name, - src=src, - instance=node("instance", id=name, src=src), - ) - - return None + def _generate_external_instances(element: ExternalInstance) -> InstanceInfo: + name = element["name"] + extension = element["type"].split("-")[0] + prefix = "file-csv" if extension == "csv" else "file" + src = f"jr://{prefix}/{name}.{extension}" + return InstanceInfo( + type="external", + context="[type: {t}, name: {n}]".format( + t=element["parent"]["type"], n=element["parent"]["name"] + ), + name=name, + src=src, + instance=node("instance", id=name, src=src), + ) @staticmethod def _validate_external_instances(instances) -> None: @@ -483,14 +450,14 @@ def _validate_external_instances(instances) -> None: raise ValidationError("\n".join(errors)) @staticmethod - def _generate_pulldata_instances(element: SurveyElement) -> list[InstanceInfo] | None: + def _generate_pulldata_instances( + element: Question | Section, + ) -> Generator[InstanceInfo, None, None]: def get_pulldata_functions(element): """ Returns a list of different pulldata(... function strings if pulldata function is defined at least once for any of: calculate, constraint, readonly, required, relevant - - :param: element (pyxform.survey.Survey): """ functions_present = [] for formula_name in constants.EXTERNAL_INSTANCES: @@ -515,24 +482,20 @@ def get_pulldata_functions(element): return functions_present - def get_instance_info(element, file_id): + def get_instance_info(elem, file_id): uri = f"jr://file-csv/{file_id}.csv" + parent = elem.parent return InstanceInfo( type="pulldata", - context="[type: {t}, name: {n}]".format( - t=element["parent"]["type"], n=element["parent"]["name"] - ), + context=f"[type: {parent.type}, name: {parent.name}]", name=file_id, src=uri, instance=node("instance", id=file_id, src=uri), ) - if isinstance(element, Option | ExternalInstance | Tag | Survey): - return None pulldata_usages = get_pulldata_functions(element) if len(pulldata_usages) > 0: - pulldata_instances = [] for usage in pulldata_usages: for call_match in re.finditer(RE_PULLDATA, usage): groups = call_match.groups() @@ -540,40 +503,32 @@ def get_instance_info(element, file_id): first_argument = ( # first argument to pulldata() groups[1].replace("'", "").replace('"', "").strip() ) - pulldata_instances.append( - get_instance_info(element, first_argument) - ) - return pulldata_instances - return None + yield get_instance_info(element, first_argument) @staticmethod - def _generate_from_file_instances(element: SurveyElement) -> InstanceInfo | None: - if not isinstance(element, MultipleChoiceQuestion) or element.itemset is None: + def _generate_from_file_instances( + element: MultipleChoiceQuestion, + ) -> InstanceInfo | None: + itemset = element.itemset + if not itemset: return None - itemset = element.get("itemset") file_id, ext = os.path.splitext(itemset) if itemset and ext in EXTERNAL_INSTANCE_EXTENSIONS: file_ext = "file" if ext in {".xml", ".geojson"} else f"file-{ext[1:]}" uri = f"jr://{file_ext}/{itemset}" return InstanceInfo( type="file", - context="[type: {t}, name: {n}]".format( - t=element["parent"]["type"], n=element["parent"]["name"] - ), + context=f"[type: {element.parent.type}, name: {element.parent.name}]", name=file_id, src=uri, instance=node("instance", id=file_id, src=uri), ) - return None - @staticmethod - def _generate_last_saved_instance(element: SurveyElement) -> bool: + def _generate_last_saved_instance(element: Question) -> bool: """ True if a last-saved instance should be generated, false otherwise. """ - if not isinstance(element, Question): - return False if has_last_saved(element.default): return True if has_last_saved(element.choice_filter): @@ -633,49 +588,59 @@ def _generate_instances(self) -> Generator[DetachableElement, None, None]: - `select_one_external`: implicitly relies on a `itemsets.csv` file and uses XPath-like expressions for querying. """ - instances = [] - generate_last_saved = False - for i in self.iter_descendants(): - i_ext = self._generate_external_instances(element=i) - i_pull = self._generate_pulldata_instances(element=i) - i_file = self._generate_from_file_instances(element=i) - if not generate_last_saved: - generate_last_saved = self._generate_last_saved_instance(element=i) - for x in [i_ext, i_pull, i_file]: - if x is not None: - instances += x if isinstance(x, list) else [x] - - if generate_last_saved: - instances += [self._get_last_saved_instance()] - - # Append last so the choice instance is excluded on a name clash. - if self.choices: - for name, value in self.choices.items(): - if name not in self._search_lists: - instances += [ - self._generate_static_instances(list_name=name, choice_list=value) - ] + + def get_element_instances(): + generate_last_saved = False + for i in self.iter_descendants(): + if isinstance(i, Question): + yield from self._generate_pulldata_instances(element=i) + if isinstance(i, MultipleChoiceQuestion): + i_file = self._generate_from_file_instances(element=i) + if i_file: + yield i_file + if not generate_last_saved: + generate_last_saved = self._generate_last_saved_instance( + element=i + ) + elif isinstance(i, Section): + yield from self._generate_pulldata_instances(element=i) + elif isinstance(i, ExternalInstance): + yield self._generate_external_instances(element=i) + + if generate_last_saved: + yield self._get_last_saved_instance() + + # Append last so the choice instance is excluded on a name clash. + if self.choices: + for k, v in self.choices.items(): + if not v.used_by_search: + yield self._generate_static_instances(list_name=k, itemset=v) + + instances = tuple(get_element_instances()) # Check that external instances have unique names. if instances: - ext_only = [x for x in instances if x.type == "external"] - self._validate_external_instances(instances=ext_only) + self._validate_external_instances( + instances=(x for x in instances if x.type == "external") + ) seen = {} for i in instances: - if i.name in seen and seen[i.name].src != i.src: - # Instance id exists with different src URI -> error. - msg = ( - "The same instance id will be generated for different " - "external instance source URIs. Please check the form." - f" Instance name: '{i.name}', Existing type: '{seen[i.name].type}', " - f"Existing URI: '{seen[i.name].src}', Duplicate type: '{i.type}', " - f"Duplicate URI: '{i.src}', Duplicate context: '{i.context}'." - ) - raise PyXFormError(msg) - elif i.name in seen and seen[i.name].src == i.src: - # Instance id exists with same src URI -> ok, don't duplicate. - continue + prior = seen.get(i.name) + if prior: + if prior.src != i.src: + # Instance id exists with different src URI -> error. + msg = ( + "The same instance id will be generated for different " + "external instance source URIs. Please check the form." + f" Instance name: '{i.name}', Existing type: '{prior.type}', " + f"Existing URI: '{prior.src}', Duplicate type: '{i.type}', " + f"Duplicate URI: '{i.src}', Duplicate context: '{i.context}'." + ) + raise PyXFormError(msg) + else: + # Instance id exists with same src URI -> ok, don't duplicate. + continue else: # Instance doesn't exist yet -> add it. yield i.instance @@ -786,7 +751,7 @@ def _add_to_nested_dict(self, dicty, path, value): dicty[path[0]] = {} self._add_to_nested_dict(dicty[path[0]], path[1:], value) - def _redirect_is_search_itext(self, element: Question) -> bool: + def _redirect_is_search_itext(self, element: MultipleChoiceQuestion) -> bool: """ For selects using the "search()" function, redirect itext for in-line items. @@ -801,29 +766,29 @@ def _redirect_is_search_itext(self, element: Question) -> bool: :param element: A select type question. :return: If True, the element uses the search function. """ + is_search = False try: - is_search = bool( - SEARCH_FUNCTION_REGEX.search( - element[constants.CONTROL][constants.APPEARANCE] - ) - ) + appearance = element.control[constants.APPEARANCE] + if appearance and len(appearance) > 7: + is_search = bool(SEARCH_FUNCTION_REGEX.search(appearance)) except (KeyError, TypeError): - is_search = False + pass if is_search: - file_id, ext = os.path.splitext(element[constants.ITEMSET]) - if ext in EXTERNAL_INSTANCE_EXTENSIONS: + ext = os.path.splitext(element.itemset)[1] + if ext and ext in EXTERNAL_INSTANCE_EXTENSIONS: msg = ( - f"Question '{element[constants.NAME]}' is a select from file type, " + f"Question '{element.name}' is a select from file type, " "using 'search()'. This combination is not supported. " "Remove the 'search()' usage, or change the select type." ) raise PyXFormError(msg) - if self.choices: - element.children = self.choices.get(element[constants.ITEMSET], None) - element[constants.ITEMSET] = "" - if element.children is not None: - for i, opt in enumerate(element.children): - opt["_choice_itext_id"] = f"{element[constants.LIST_NAME_U]}-{i}" + + element.itemset = "" + itemset = element.choices + if not itemset.used_by_search: + itemset.used_by_search = True + for i, opt in enumerate(itemset.options): + opt._choice_itext_ref = f"jr:itext('{itemset.name}-{i}')" return is_search def _setup_translations(self): @@ -832,58 +797,35 @@ def _setup_translations(self): setup media and itext functions """ - def _setup_choice_translations( - name, choice_value, itext_id - ) -> Generator[tuple[list[str], str], None, None]: - for media_or_lang, value in choice_value.items(): - if isinstance(value, dict): - for language, val in value.items(): - yield ([language, itext_id, media_or_lang], val) - elif name == constants.MEDIA: - yield ([self.default_language, itext_id, media_or_lang], value) - else: - yield ([media_or_lang, itext_id, "long"], value) + def get_choice_content(name, idx, choice): + itext_id = f"{name}-{idx}" - itemsets_multi_language = set() - itemsets_has_media = set() - itemsets_has_dyn_label = set() + choice_label = choice.label + if choice_label: + if isinstance(choice_label, dict): + for lang, value in choice_label.items(): + if isinstance(value, dict): + for language, val in value.items(): + yield ([language, itext_id, lang], val) + else: + yield ([lang, itext_id, "long"], value) + else: + yield ([self.default_language, itext_id, "long"], choice_label) + + choice_media = choice.media + if choice_media: + for media, value in choice_media.items(): + if isinstance(value, dict): + for language, val in value.items(): + yield ([language, itext_id, media], val) + else: + yield ([self.default_language, itext_id, media], value) def get_choices(): - for list_name, choice_list in self.choices.items(): - multi_language = False - has_media = False - dyn_label = False - choices = [] - for idx, choice in enumerate(choice_list): - for col_name, choice_value in choice.items(): - lang_choice = None - if not choice_value: - continue - if col_name == constants.MEDIA: - has_media = True - lang_choice = choice_value - elif col_name == constants.LABEL: - if isinstance(choice_value, dict): - lang_choice = choice_value - multi_language = True - else: - lang_choice = {self.default_language: choice_value} - if is_label_dynamic(choice_value): - dyn_label = True - if lang_choice is not None: - # e.g. (label, {"default": "Yes"}, "consent", 0) - choices.append((col_name, lang_choice, list_name, idx)) - if multi_language or has_media or dyn_label: - if multi_language: - itemsets_multi_language.add(list_name) - if has_media: - itemsets_has_media.add(list_name) - if dyn_label: - itemsets_has_dyn_label.add(list_name) - for c in choices: - yield from _setup_choice_translations( - c[0], c[1], f"{c[2]}-{c[3]}" - ) + for name, itemset in self.choices.items(): + if itemset.requires_itext: + for idx, choice in enumerate(itemset.options): + yield from get_choice_content(name, idx, choice) if self.choices: for path, value in get_choices(): @@ -891,53 +833,40 @@ def get_choices(): leaf_value = {last_path: value, constants.TYPE: constants.CHOICE} self._add_to_nested_dict(self._translations, path, leaf_value) - select_types = set(aliases.select) search_lists = set() non_search_lists = set() for element in self.iter_descendants( condition=lambda i: isinstance(i, Question | Section) ): if isinstance(element, MultipleChoiceQuestion): - if element.itemset is not None: - element._itemset_multi_language = ( - element.itemset in itemsets_multi_language - ) - element._itemset_has_media = element.itemset in itemsets_has_media - element._itemset_dyn_label = element.itemset in itemsets_has_dyn_label - - if element.type in select_types: - select_ref = (element[constants.NAME], element[constants.LIST_NAME_U]) - if self._redirect_is_search_itext(element=element): - search_lists.add(select_ref) - self._search_lists.add(element[constants.LIST_NAME_U]) - else: - non_search_lists.add(select_ref) - - # Skip creation of translations for choices in selects. The creation of these - # translations is done above in this function. - parent = element.get("parent") - if parent is not None and parent[constants.TYPE] not in select_types: - for d in element.get_translations(self.default_language): - translation_path = d["path"] - form = "long" - - if "guidance_hint" in d["path"]: - translation_path = d["path"].replace("guidance_hint", "hint") - form = "guidance" - - self._translations[d["lang"]][translation_path] = self._translations[ - d["lang"] - ].get(translation_path, {}) - - self._translations[d["lang"]][translation_path].update( - { - form: { - "text": d["text"], - "output_context": d["output_context"], - }, - constants.TYPE: constants.QUESTION, - } - ) + select_ref = (element.name, element.list_name) + if self._redirect_is_search_itext(element=element): + search_lists.add(select_ref) + else: + non_search_lists.add(select_ref) + + # Create translations questions. + for d in element.get_translations(self.default_language): + translation_path = d["path"] + form = "long" + + if "guidance_hint" in d["path"]: + translation_path = d["path"].replace("guidance_hint", "hint") + form = "guidance" + + self._translations[d["lang"]][translation_path] = self._translations[ + d["lang"] + ].get(translation_path, {}) + + self._translations[d["lang"]][translation_path].update( + { + form: { + "text": d["text"], + "output_context": d["output_context"], + }, + constants.TYPE: constants.QUESTION, + } + ) for q_name, list_name in search_lists: choice_refs = [f"'{q}'" for q, c in non_search_lists if c == list_name] diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py index a15bbaf0..509228f8 100644 --- a/pyxform/survey_element.py +++ b/pyxform/survey_element.py @@ -299,10 +299,18 @@ def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict: ] choices = result.pop("choices", None) if choices: - result["choices"] = { - list_name: [o.to_json_dict(delete_keys=("parent",)) for o in options] - for list_name, options in choices.items() - } + if isinstance(choices, dict): + result["choices"] = { + list_name: [ + o.to_json_dict(delete_keys=("parent",)) for o in itemset.options + ] + for list_name, itemset in choices.items() + } + else: + result["children"] = [ + o.to_json_dict(delete_keys=("parent",)) for o in choices.options + ] + # Translation items with "output_context" have circular references. if "_translations" in result: for lang in result["_translations"].values(): diff --git a/pyxform/utils.py b/pyxform/utils.py index 3ca4ab93..f5562ba5 100644 --- a/pyxform/utils.py +++ b/pyxform/utils.py @@ -43,7 +43,7 @@ class DetachableElement(Element): """ def __init__(self, *args, **kwargs): - Element.__init__(self, *args, **kwargs) + super().__init__(*args, **kwargs) self.ownerDocument = None def writexml(self, writer, indent="", addindent="", newl=""): @@ -226,7 +226,7 @@ def default_is_dynamic(element_default, element_type=None): * Contains arithmetic operator, including 'div' and 'mod' (except '-' for 'date' type). * Contains brackets, parentheses or braces. """ - if not isinstance(element_default, str): + if not element_default or not isinstance(element_default, str): return False tokens, _ = parse_expression(element_default) @@ -251,23 +251,6 @@ def default_is_dynamic(element_default, element_type=None): return False -def has_dynamic_label(choice_list: "list[dict[str, str]]") -> bool: - """ - If the first or second choice label includes a reference, we must use itext. - - Check the first two choices in case first is something like "Other". - """ - for c in choice_list[:2]: - choice_label = c.get("label") - if ( - choice_label is not None - and isinstance(choice_label, str) - and re.search(BRACKETED_TAG_REGEX, choice_label) is not None - ): - return True - return False - - def levenshtein_distance(a: str, b: str) -> int: """ Calculate Levenshtein distance between two strings. diff --git a/tests/fixtures/strings.ini b/tests/fixtures/strings.ini index 90dfedd3..5ca67d8a 100644 --- a/tests/fixtures/strings.ini +++ b/tests/fixtures/strings.ini @@ -5,14 +5,10 @@ test_answers_can_be_imported_from_xml =