From 6927767771d6f41ae84530fdd65b465fa8a0dd44 Mon Sep 17 00:00:00 2001
From: lindsay stevens <lstevens@getodk.org>
Date: Fri, 10 Jan 2025 05:17:48 +1100
Subject: [PATCH] chg: improve choices handling and instances generation

- for choices, an Itemset class is introduced which allows calculating
  and storing metadata about the choice list more consistently. In
  particular, working out whether itext is required only once.
  - the initial motivation was the if block in survey.py line 391, which
    for a form with many choice lists resulted in a large slow down due
    to something approaching O(n*m) + costs from string join/split.
  - this allowed removing:
    - "_itemset*" properties from MultipleChoiceQuestion
    - _search_lists property from Survey
    - functions is_label_dynamic and has_dynamic_label
  - in the course of implementing this it became clear that actually
    the Tag class never has choices so code for that is removed. The
    Tag class is like an Option with respect to OsmUploadQuestion. Added
    some OSM-related test cases to check it's not broken.
  - similarly since choices/itext are handled by Survey, the
    MultipleChoiceQuestion class doesn't need choices children either,
    just a reference to the Itemset for XML and JSON output.
- for instances generation, the above choices changes allowed
  simplification of _generate_static_instances, and otherwise the
  changes are mainly to avoid repeating checks or using intermediate
  lists by instead using generators/yield as much as possible.
- test_j2x_creation.py / test_j2x_question.py / strings.ini
  - updated these tests since they are using internal APIs in a way that
    diverges significantly from what xls2json currently emits
    - For example test_select_one_question_multilingual had multi-lang
      choice labels but the expected XML string had a reference like
      "/test/qname/a:label" which implies choice itemsets aren't shared
      which has not been the case for a while.
  - tried to make these tests more useful by adding xpath assertions,
    and unlike other tests using ss_structure they may be useful for
    validating/showing what dict structure xlsforms can be provided as.
- test_j2x_xform_build_preparation.py
  - removed this test since it's not clear what the expectation is. If
    it was intended to check that identical choice lists from separate
    questions are merged, then that functionality doesn't exist, and
    the choices should not be provided separately per question anyway.
- test_dynamic_default.py / test_translations.py
  - updated performance test figures.
  - translation test benefits most from the choices changes because it
    has lots of choice lists. Increase in memory most likely due to
    Itemset instances that now wrap each choice list.
  - dynamic_default slightly faster due to the instances changes and
    earlier commits today (e.g. not calling xml_label_or_hint twice for
    input/text questions, etc).
---
 pyxform/builder.py                        |  24 +-
 pyxform/question.py                       | 198 ++++------
 pyxform/survey.py                         | 423 +++++++++-------------
 pyxform/survey_element.py                 |  16 +-
 pyxform/utils.py                          |  21 +-
 tests/fixtures/strings.ini                |   4 -
 tests/test_dynamic_default.py             |  12 +-
 tests/test_j2x_creation.py                |  27 +-
 tests/test_j2x_question.py                | 194 ++++++----
 tests/test_j2x_xform_build_preparation.py |  42 ---
 tests/test_osm.py                         | 180 +++++++++
 tests/test_translations.py                |  12 +-
 12 files changed, 603 insertions(+), 550 deletions(-)
 delete mode 100644 tests/test_j2x_xform_build_preparation.py

diff --git a/pyxform/builder.py b/pyxform/builder.py
index 67189f1a..2b699de9 100644
--- a/pyxform/builder.py
+++ b/pyxform/builder.py
@@ -146,16 +146,20 @@ def _create_question_from_dict(
         )
 
         if question_class:
-            if const.CHOICES in d and choices:
-                return question_class(
-                    question_type_dictionary=question_type_dictionary,
-                    choices=choices.get(d[const.ITEMSET], d[const.CHOICES]),
-                    **{k: v for k, v in d.items() if k != const.CHOICES},
-                )
-            else:
-                return question_class(
-                    question_type_dictionary=question_type_dictionary, **d
-                )
+            if choices:
+                d_choices = d.get(const.CHOICES, d.get(const.CHILDREN))
+                if d_choices:
+                    return question_class(
+                        question_type_dictionary=question_type_dictionary,
+                        **{
+                            k: v
+                            for k, v in d.items()
+                            if k not in {const.CHOICES, const.CHILDREN}
+                        },
+                        choices=choices.get(d[const.ITEMSET], d_choices),
+                    )
+
+            return question_class(question_type_dictionary=question_type_dictionary, **d)
 
         return ()
 
diff --git a/pyxform/question.py b/pyxform/question.py
index a5ceb9f2..6ebac4c4 100644
--- a/pyxform/question.py
+++ b/pyxform/question.py
@@ -3,6 +3,7 @@
 """
 
 import os.path
+import re
 from collections.abc import Callable, Generator, Iterable
 from itertools import chain
 from typing import TYPE_CHECKING
@@ -16,12 +17,12 @@
     EXTERNAL_INSTANCE_EXTENSIONS,
 )
 from pyxform.errors import PyXFormError
+from pyxform.parsing.expression import RE_ANY_PYXFORM_REF
 from pyxform.question_type_dictionary import QUESTION_TYPE_DICT
 from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement
 from pyxform.utils import (
     PYXFORM_REFERENCE_REGEX,
     DetachableElement,
-    coalesce,
     combine_lists,
     default_is_dynamic,
     node,
@@ -32,9 +33,6 @@
 
 
 QUESTION_EXTRA_FIELDS = (
-    "_itemset_dyn_label",
-    "_itemset_has_media",
-    "_itemset_multi_language",
     "_qtd_defaults",
     "_qtd_kwargs",
     "action",
@@ -55,7 +53,7 @@
 QUESTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *QUESTION_EXTRA_FIELDS)
 
 SELECT_QUESTION_EXTRA_FIELDS = (
-    constants.CHILDREN,
+    constants.CHOICES,
     constants.ITEMSET,
     constants.LIST_NAME_U,
 )
@@ -65,15 +63,12 @@
 OSM_QUESTION_FIELDS = (*QUESTION_FIELDS, *SELECT_QUESTION_EXTRA_FIELDS)
 
 OPTION_EXTRA_FIELDS = (
-    "_choice_itext_id",
+    "_choice_itext_ref",
     constants.MEDIA,
     "sms_option",
 )
 OPTION_FIELDS = (*SURVEY_ELEMENT_FIELDS, *OPTION_EXTRA_FIELDS)
 
-TAG_EXTRA_FIELDS = (constants.CHILDREN,)
-TAG_FIELDS = (*SURVEY_ELEMENT_FIELDS, *TAG_EXTRA_FIELDS)
-
 
 class Question(SurveyElement):
     __slots__ = QUESTION_EXTRA_FIELDS
@@ -291,33 +286,18 @@ def __init__(
         sms_option: str | None = None,
         **kwargs,
     ):
-        self._choice_itext_id: str | None = None
+        self._choice_itext_ref: str | None = None
         self.media: dict | None = media
         self.sms_option: str | None = sms_option
 
         super().__init__(name=name, label=label, **kwargs)
 
-    def xml_value(self):
-        return node("value", self.name)
-
-    def xml(self, survey: "Survey"):
-        item = node("item")
-        item.appendChild(self.xml_label(survey=survey))
-        item.appendChild(self.xml_value())
-
-        return item
-
     def validate(self):
         pass
 
     def xml_control(self, survey: "Survey"):
         raise NotImplementedError()
 
-    def _translation_path(self, display_element):
-        if self._choice_itext_id is not None:
-            return self._choice_itext_id
-        return super()._translation_path(display_element=display_element)
-
     def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict:
         to_delete = (k for k in self.get_slot_names() if k.startswith("_"))
         if delete_keys is not None:
@@ -325,6 +305,41 @@ def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict:
         return super().to_json_dict(delete_keys=to_delete)
 
 
+class Itemset:
+    """Itemset details and metadata detection."""
+
+    __slots__ = ("name", "options", "requires_itext", "used_by_search")
+
+    def __init__(self, name: str, choices: Iterable[dict]):
+        self.requires_itext: bool = False
+        self.used_by_search: bool = False
+        self.name: str = name
+        self.options: tuple[Option, ...] = tuple(o for o in self.get_options(choices))
+
+    def get_options(self, choices: Iterable[dict]) -> Generator[Option, None, None]:
+        requires_itext = False
+        for c in choices:
+            option = Option(**c)
+            if not requires_itext:
+                # Media: dict of image, audio, etc. Defaults to None.
+                if option.media:
+                    requires_itext = True
+                else:
+                    choice_label = option.label
+                    label_is_dict = isinstance(choice_label, dict)
+                    # Multi-language: dict of labels etc per language. Can be just a string.
+                    if label_is_dict:
+                        requires_itext = True
+                    # Dynamic label: string contains a pyxform reference.
+                    elif (
+                        choice_label
+                        and re.search(RE_ANY_PYXFORM_REF, choice_label) is not None
+                    ):
+                        requires_itext = True
+            yield option
+        self.requires_itext = requires_itext
+
+
 class MultipleChoiceQuestion(Question):
     __slots__ = SELECT_QUESTION_EXTRA_FIELDS
 
@@ -335,53 +350,21 @@ def get_slot_names() -> tuple[str, ...]:
     def __init__(
         self, itemset: str | None = None, list_name: str | None = None, **kwargs
     ):
-        # Internals
-        self._itemset_dyn_label: bool = False
-        self._itemset_has_media: bool = False
-        self._itemset_multi_language: bool = False
+        if not itemset and not list_name:
+            raise PyXFormError(
+                "Arguments 'itemset' and 'list_name' must not both be None or empty."
+            )
 
         # Structure
-        self.children: tuple[Option, ...] | None = None
+        self.choices: Itemset | None = None
         self.itemset: str | None = itemset
         self.list_name: str | None = list_name
 
-        # Notice that choices can be specified under choices or children.
-        # I'm going to try to stick to just choices.
-        # Aliases in the json format will make it more difficult
-        # to use going forward.
-        kw_choices = kwargs.pop(constants.CHOICES, None)
-        kw_children = kwargs.pop(constants.CHILDREN, None)
-        choices = coalesce(kw_choices, kw_children)
-        if isinstance(choices, tuple) and isinstance(next(iter(choices)), Option):
-            self.children = choices
-        elif choices:
-            self.children = tuple(
-                Option(**c) for c in combine_lists(kw_choices, kw_children)
-            )
+        choices = kwargs.pop(constants.CHOICES, None)
+        if isinstance(choices, Itemset):
+            self.choices = choices
         super().__init__(**kwargs)
 
-    def validate(self):
-        Question.validate(self)
-        if self.children:
-            for child in self.children:
-                child.validate()
-
-    def iter_descendants(
-        self,
-        condition: Callable[["SurveyElement"], bool] | None = None,
-        iter_into_section_items: bool = False,
-    ) -> Generator["SurveyElement", None, None]:
-        if condition is None:
-            yield self
-        elif condition(self):
-            yield self
-        if iter_into_section_items and self.children:
-            for e in self.children:
-                yield from e.iter_descendants(
-                    condition=condition,
-                    iter_into_section_items=iter_into_section_items,
-                )
-
     def build_xml(self, survey: "Survey"):
         if self.bind["type"] not in {"string", "odk:rank"}:
             raise PyXFormError("""Invalid value for `self.bind["type"]`.""")
@@ -403,21 +386,18 @@ def build_xml(self, survey: "Survey"):
                 itemset_value_ref = self.parameters.get("value", itemset_value_ref)
                 itemset_label_ref = self.parameters.get("label", itemset_label_ref)
 
-            multi_language = self._itemset_multi_language
-            has_media = self._itemset_has_media
-            has_dyn_label = self._itemset_dyn_label
             is_previous_question = bool(PYXFORM_REFERENCE_REGEX.search(self.itemset))
 
             if file_extension in EXTERNAL_INSTANCE_EXTENSIONS:
                 pass
-            elif not multi_language and not has_media and not has_dyn_label:
+            elif self.choices and self.choices.requires_itext:
                 itemset = self.itemset
+                itemset_label_ref = "jr:itext(itextId)"
             else:
                 itemset = self.itemset
-                itemset_label_ref = "jr:itext(itextId)"
 
             choice_filter = self.choice_filter
-            if choice_filter is not None:
+            if choice_filter:
                 choice_filter = survey.insert_xpaths(
                     choice_filter, self, True, is_previous_question
                 )
@@ -460,63 +440,43 @@ def build_xml(self, survey: "Survey"):
 
                     nodeset += ")"
 
-            itemset_children = [
-                node("value", ref=itemset_value_ref),
-                node("label", ref=itemset_label_ref),
-            ]
-            result.appendChild(node("itemset", *itemset_children, nodeset=nodeset))
-        elif self.children:
-            for child in self.children:
-                result.appendChild(child.xml(survey=survey))
+            result.appendChild(
+                node(
+                    "itemset",
+                    node("value", ref=itemset_value_ref),
+                    node("label", ref=itemset_label_ref),
+                    nodeset=nodeset,
+                )
+            )
+        elif self.choices:
+            # Options processing specific to XLSForms using the "search()" function.
+            # The _choice_itext_ref is prepared by Survey._redirect_is_search_itext.
+            itemset = self.choices
+            if itemset.used_by_search:
+                for option in itemset.options:
+                    if itemset.requires_itext:
+                        label_node = node("label", ref=option._choice_itext_ref)
+                    elif self.label:
+                        label, output_inserted = survey.insert_output_values(
+                            option.label, option
+                        )
+                        label_node = node("label", label, toParseString=output_inserted)
+                    else:
+                        label_node = node("label")
+                    result.appendChild(
+                        node("item", label_node, node("value", option.name))
+                    )
 
         return result
 
 
 class Tag(SurveyElement):
-    __slots__ = TAG_EXTRA_FIELDS
-
     @staticmethod
     def get_slot_names() -> tuple[str, ...]:
-        return TAG_FIELDS
-
-    def __init__(self, name: str, label: str | dict | None = None, **kwargs):
-        self.children: tuple[Option, ...] | None = None
-
-        kw_choices = kwargs.pop(constants.CHOICES, None)
-        kw_children = kwargs.pop(constants.CHILDREN, None)
-        choices = coalesce(kw_choices, kw_children)
-        if isinstance(choices, tuple) and isinstance(next(iter(choices)), Option):
-            self.children = choices
-        elif choices:
-            self.children = tuple(
-                Option(**c) for c in combine_lists(kw_choices, kw_children)
-            )
-        super().__init__(name=name, label=label, **kwargs)
-
-    def iter_descendants(
-        self,
-        condition: Callable[["SurveyElement"], bool] | None = None,
-        iter_into_section_items: bool = False,
-    ) -> Generator["SurveyElement", None, None]:
-        if condition is None:
-            yield self
-        elif condition(self):
-            yield self
-        if iter_into_section_items and self.children:
-            for e in self.children:
-                yield from e.iter_descendants(
-                    condition=condition,
-                    iter_into_section_items=iter_into_section_items,
-                )
+        return SURVEY_ELEMENT_FIELDS
 
     def xml(self, survey: "Survey"):
-        result = node("tag", key=self.name)
-        result.appendChild(self.xml_label(survey=survey))
-        if self.children:
-            for choice in self.children:
-                result.appendChild(choice.xml(survey=survey))
-
-        return result
+        return node("tag", self.xml_label(survey=survey), key=self.name)
 
     def validate(self):
         pass
diff --git a/pyxform/survey.py b/pyxform/survey.py
index a36d60fd..5e29740a 100644
--- a/pyxform/survey.py
+++ b/pyxform/survey.py
@@ -20,7 +20,7 @@
 from pyxform.instance import SurveyInstance
 from pyxform.parsing.expression import has_last_saved
 from pyxform.parsing.instance_expression import replace_with_output
-from pyxform.question import MultipleChoiceQuestion, Option, Question, Tag
+from pyxform.question import Itemset, MultipleChoiceQuestion, Option, Question, Tag
 from pyxform.section import SECTION_EXTRA_FIELDS, Section
 from pyxform.survey_element import SURVEY_ELEMENT_FIELDS, SurveyElement
 from pyxform.utils import (
@@ -28,7 +28,6 @@
     LAST_SAVED_INSTANCE_NAME,
     DetachableElement,
     escape_text_for_xml,
-    has_dynamic_label,
     node,
 )
 from pyxform.validators import enketo_validate, odk_validate
@@ -43,6 +42,7 @@
 )
 RE_PULLDATA = re.compile(r"(pulldata\s*\(\s*)(.*?),")
 SEARCH_FUNCTION_REGEX = re.compile(r"search\(.*?\)")
+SELECT_TYPES = set(aliases.select)
 
 
 class InstanceInfo:
@@ -170,22 +170,12 @@ def _get_steps_and_target_xpath(context_parent, xpath_parent, include_parent=Fal
     return (None, None)
 
 
-@lru_cache(maxsize=128)
-def is_label_dynamic(label: str) -> bool:
-    return (
-        label is not None
-        and isinstance(label, str)
-        and re.search(BRACKETED_TAG_REGEX, label) is not None
-    )
-
-
 def recursive_dict():
     return defaultdict(recursive_dict)
 
 
 SURVEY_EXTRA_FIELDS = (
     "_created",
-    "_search_lists",
     "_translations",
     "_xpath",
     "add_none_option",
@@ -236,14 +226,13 @@ def get_slot_names() -> tuple[str, ...]:
     def __init__(self, **kwargs):
         # Internals
         self._created: datetime.now = datetime.now()
-        self._search_lists: set = set()
         self._translations: recursive_dict = recursive_dict()
         self._xpath: dict[str, Section | Question | None] = {}
 
         # Structure
         # attribute is for custom instance attrs from settings e.g. attribute::abc:xyz
         self.attribute: dict | None = None
-        self.choices: dict[str, tuple[Option, ...]] | None = None
+        self.choices: dict[str, Itemset] | None = None
         self.entity_features: list[str] | None = None
         self.setgeopoint_by_triggering_ref: dict[str, list[str]] = {}
         self.setvalues_by_triggering_ref: dict[str, list[str]] = {}
@@ -279,11 +268,9 @@ def __init__(self, **kwargs):
         self.sms_separator: str | None = None
 
         choices = kwargs.pop("choices", None)
-        if choices is not None:
+        if choices and isinstance(choices, dict):
             self.choices = {
-                list_name: tuple(
-                    c if isinstance(c, Option) else Option(**c) for c in values
-                )
+                list_name: Itemset(name=list_name, choices=values)
                 for list_name, values in choices.items()
             }
         kwargs[constants.TYPE] = constants.SURVEY
@@ -380,80 +367,60 @@ def get_trigger_values_for_question_name(self, question_name: str, trigger_type:
         elif trigger_type == "setgeopoint":
             return self.setgeopoint_by_triggering_ref.get(f"${{{question_name}}}")
 
-    def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo:
+    def _generate_static_instances(
+        self, list_name: str, itemset: Itemset
+    ) -> InstanceInfo:
         """
         Generate <instance> elements for static data (e.g. choices for selects)
         """
-        instance_element_list = []
-        has_media = bool(choice_list[0].get("media"))
-        has_dyn_label = has_dynamic_label(choice_list)
-        multi_language = False
-        if isinstance(self._translations, dict):
-            choices = (
-                True
-                for items in self._translations.values()
-                for k, v in items.items()
-                if v.get(constants.TYPE, "") == constants.CHOICE
-                and "-".join(k.split("-")[:-1]) == list_name
-            )
-            try:
-                if next(choices):
-                    multi_language = True
-            except StopIteration:
-                pass
 
-        for idx, choice in enumerate(choice_list):
-            choice_element_list = []
+        def choice_nodes(idx, choice):
             # Add a unique id to the choice element in case there are itext references
-            if multi_language or has_media or has_dyn_label:
-                itext_id = f"{list_name}-{idx}"
-                choice_element_list.append(node("itextId", itext_id))
-
-            for name, value in choice.items():
-                if not value:
-                    continue
-                elif name != "label" and isinstance(value, str):
-                    choice_element_list.append(node(name, value))
-                elif name == "extra_data" and isinstance(value, dict):
-                    for k, v in value.items():
-                        choice_element_list.append(node(k, v))
-                elif (
-                    not multi_language
-                    and not has_media
-                    and not has_dyn_label
-                    and isinstance(value, str)
-                    and name == "label"
-                ):
-                    choice_element_list.append(node(name, value))
-
-            instance_element_list.append(node("item", *choice_element_list))
+            if itemset.requires_itext:
+                yield node("itextId", f"{list_name}-{idx}")
+            yield node(constants.NAME, choice.name)
+            choice_label = choice.label
+            if not itemset.requires_itext and isinstance(choice_label, str):
+                yield node(constants.LABEL, choice_label)
+            choice_extra_data = choice.extra_data
+            if choice_extra_data and isinstance(choice_extra_data, dict):
+                for k, v in choice_extra_data.items():
+                    yield node(k, v)
+            choice_sms_option = choice.sms_option
+            if choice_sms_option and isinstance(choice_sms_option, str):
+                yield node("sms_option", choice_sms_option)
+
+        def instance_nodes(choices):
+            for idx, choice in enumerate(choices):
+                yield node("item", choice_nodes(idx, choice))
 
         return InstanceInfo(
             type="choice",
             context="survey",
             name=list_name,
             src=None,
-            instance=node("instance", node("root", *instance_element_list), id=list_name),
+            instance=node(
+                "instance",
+                node("root", instance_nodes(itemset.options)),
+                id=list_name,
+            ),
         )
 
     @staticmethod
-    def _generate_external_instances(element: SurveyElement) -> InstanceInfo | None:
-        if isinstance(element, ExternalInstance):
-            name = element["name"]
-            extension = element["type"].split("-")[0]
-            prefix = "file-csv" if extension == "csv" else "file"
-            src = f"jr://{prefix}/{name}.{extension}"
-            return InstanceInfo(
-                type="external",
-                context="[type: {t}, name: {n}]".format(
-                    t=element["parent"]["type"], n=element["parent"]["name"]
-                ),
-                name=name,
-                src=src,
-                instance=node("instance", id=name, src=src),
-            )
-
-        return None
+    def _generate_external_instances(element: ExternalInstance) -> InstanceInfo:
+        name = element["name"]
+        extension = element["type"].split("-")[0]
+        prefix = "file-csv" if extension == "csv" else "file"
+        src = f"jr://{prefix}/{name}.{extension}"
+        return InstanceInfo(
+            type="external",
+            context="[type: {t}, name: {n}]".format(
+                t=element["parent"]["type"], n=element["parent"]["name"]
+            ),
+            name=name,
+            src=src,
+            instance=node("instance", id=name, src=src),
+        )
 
     @staticmethod
     def _validate_external_instances(instances) -> None:
@@ -483,14 +450,14 @@ def _validate_external_instances(instances) -> None:
             raise ValidationError("\n".join(errors))
 
     @staticmethod
-    def _generate_pulldata_instances(element: SurveyElement) -> list[InstanceInfo] | None:
+    def _generate_pulldata_instances(
+        element: Question | Section,
+    ) -> Generator[InstanceInfo, None, None]:
         def get_pulldata_functions(element):
             """
             Returns a list of different pulldata(... function strings if
             pulldata function is defined at least once for any of:
             calculate, constraint, readonly, required, relevant
-
-            :param: element (pyxform.survey.Survey):
             """
             functions_present = []
             for formula_name in constants.EXTERNAL_INSTANCES:
@@ -515,24 +482,20 @@ def get_pulldata_functions(element):
 
             return functions_present
 
-        def get_instance_info(element, file_id):
+        def get_instance_info(elem, file_id):
             uri = f"jr://file-csv/{file_id}.csv"
+            parent = elem.parent
 
             return InstanceInfo(
                 type="pulldata",
-                context="[type: {t}, name: {n}]".format(
-                    t=element["parent"]["type"], n=element["parent"]["name"]
-                ),
+                context=f"[type: {parent.type}, name: {parent.name}]",
                 name=file_id,
                 src=uri,
                 instance=node("instance", id=file_id, src=uri),
             )
 
-        if isinstance(element, Option | ExternalInstance | Tag | Survey):
-            return None
         pulldata_usages = get_pulldata_functions(element)
         if len(pulldata_usages) > 0:
-            pulldata_instances = []
             for usage in pulldata_usages:
                 for call_match in re.finditer(RE_PULLDATA, usage):
                     groups = call_match.groups()
@@ -540,40 +503,32 @@ def get_instance_info(element, file_id):
                         first_argument = (  # first argument to pulldata()
                             groups[1].replace("'", "").replace('"', "").strip()
                         )
-                        pulldata_instances.append(
-                            get_instance_info(element, first_argument)
-                        )
-            return pulldata_instances
-        return None
+                        yield get_instance_info(element, first_argument)
 
     @staticmethod
-    def _generate_from_file_instances(element: SurveyElement) -> InstanceInfo | None:
-        if not isinstance(element, MultipleChoiceQuestion) or element.itemset is None:
+    def _generate_from_file_instances(
+        element: MultipleChoiceQuestion,
+    ) -> InstanceInfo | None:
+        itemset = element.itemset
+        if not itemset:
             return None
-        itemset = element.get("itemset")
         file_id, ext = os.path.splitext(itemset)
         if itemset and ext in EXTERNAL_INSTANCE_EXTENSIONS:
             file_ext = "file" if ext in {".xml", ".geojson"} else f"file-{ext[1:]}"
             uri = f"jr://{file_ext}/{itemset}"
             return InstanceInfo(
                 type="file",
-                context="[type: {t}, name: {n}]".format(
-                    t=element["parent"]["type"], n=element["parent"]["name"]
-                ),
+                context=f"[type: {element.parent.type}, name: {element.parent.name}]",
                 name=file_id,
                 src=uri,
                 instance=node("instance", id=file_id, src=uri),
             )
 
-        return None
-
     @staticmethod
-    def _generate_last_saved_instance(element: SurveyElement) -> bool:
+    def _generate_last_saved_instance(element: Question) -> bool:
         """
         True if a last-saved instance should be generated, false otherwise.
         """
-        if not isinstance(element, Question):
-            return False
         if has_last_saved(element.default):
             return True
         if has_last_saved(element.choice_filter):
@@ -633,49 +588,59 @@ def _generate_instances(self) -> Generator[DetachableElement, None, None]:
         - `select_one_external`: implicitly relies on a `itemsets.csv` file and
           uses XPath-like expressions for querying.
         """
-        instances = []
-        generate_last_saved = False
-        for i in self.iter_descendants():
-            i_ext = self._generate_external_instances(element=i)
-            i_pull = self._generate_pulldata_instances(element=i)
-            i_file = self._generate_from_file_instances(element=i)
-            if not generate_last_saved:
-                generate_last_saved = self._generate_last_saved_instance(element=i)
-            for x in [i_ext, i_pull, i_file]:
-                if x is not None:
-                    instances += x if isinstance(x, list) else [x]
-
-        if generate_last_saved:
-            instances += [self._get_last_saved_instance()]
-
-        # Append last so the choice instance is excluded on a name clash.
-        if self.choices:
-            for name, value in self.choices.items():
-                if name not in self._search_lists:
-                    instances += [
-                        self._generate_static_instances(list_name=name, choice_list=value)
-                    ]
+
+        def get_element_instances():
+            generate_last_saved = False
+            for i in self.iter_descendants():
+                if isinstance(i, Question):
+                    yield from self._generate_pulldata_instances(element=i)
+                    if isinstance(i, MultipleChoiceQuestion):
+                        i_file = self._generate_from_file_instances(element=i)
+                        if i_file:
+                            yield i_file
+                    if not generate_last_saved:
+                        generate_last_saved = self._generate_last_saved_instance(
+                            element=i
+                        )
+                elif isinstance(i, Section):
+                    yield from self._generate_pulldata_instances(element=i)
+                elif isinstance(i, ExternalInstance):
+                    yield self._generate_external_instances(element=i)
+
+            if generate_last_saved:
+                yield self._get_last_saved_instance()
+
+            # Append last so the choice instance is excluded on a name clash.
+            if self.choices:
+                for k, v in self.choices.items():
+                    if not v.used_by_search:
+                        yield self._generate_static_instances(list_name=k, itemset=v)
+
+        instances = tuple(get_element_instances())
 
         # Check that external instances have unique names.
         if instances:
-            ext_only = [x for x in instances if x.type == "external"]
-            self._validate_external_instances(instances=ext_only)
+            self._validate_external_instances(
+                instances=(x for x in instances if x.type == "external")
+            )
 
         seen = {}
         for i in instances:
-            if i.name in seen and seen[i.name].src != i.src:
-                # Instance id exists with different src URI -> error.
-                msg = (
-                    "The same instance id will be generated for different "
-                    "external instance source URIs. Please check the form."
-                    f" Instance name: '{i.name}', Existing type: '{seen[i.name].type}', "
-                    f"Existing URI: '{seen[i.name].src}', Duplicate type: '{i.type}', "
-                    f"Duplicate URI: '{i.src}', Duplicate context: '{i.context}'."
-                )
-                raise PyXFormError(msg)
-            elif i.name in seen and seen[i.name].src == i.src:
-                # Instance id exists with same src URI -> ok, don't duplicate.
-                continue
+            prior = seen.get(i.name)
+            if prior:
+                if prior.src != i.src:
+                    # Instance id exists with different src URI -> error.
+                    msg = (
+                        "The same instance id will be generated for different "
+                        "external instance source URIs. Please check the form."
+                        f" Instance name: '{i.name}', Existing type: '{prior.type}', "
+                        f"Existing URI: '{prior.src}', Duplicate type: '{i.type}', "
+                        f"Duplicate URI: '{i.src}', Duplicate context: '{i.context}'."
+                    )
+                    raise PyXFormError(msg)
+                else:
+                    # Instance id exists with same src URI -> ok, don't duplicate.
+                    continue
             else:
                 # Instance doesn't exist yet -> add it.
                 yield i.instance
@@ -786,7 +751,7 @@ def _add_to_nested_dict(self, dicty, path, value):
             dicty[path[0]] = {}
         self._add_to_nested_dict(dicty[path[0]], path[1:], value)
 
-    def _redirect_is_search_itext(self, element: Question) -> bool:
+    def _redirect_is_search_itext(self, element: MultipleChoiceQuestion) -> bool:
         """
         For selects using the "search()" function, redirect itext for in-line items.
 
@@ -801,29 +766,29 @@ def _redirect_is_search_itext(self, element: Question) -> bool:
         :param element: A select type question.
         :return: If True, the element uses the search function.
         """
+        is_search = False
         try:
-            is_search = bool(
-                SEARCH_FUNCTION_REGEX.search(
-                    element[constants.CONTROL][constants.APPEARANCE]
-                )
-            )
+            appearance = element.control[constants.APPEARANCE]
+            if appearance and len(appearance) > 7:
+                is_search = bool(SEARCH_FUNCTION_REGEX.search(appearance))
         except (KeyError, TypeError):
-            is_search = False
+            pass
         if is_search:
-            file_id, ext = os.path.splitext(element[constants.ITEMSET])
-            if ext in EXTERNAL_INSTANCE_EXTENSIONS:
+            ext = os.path.splitext(element.itemset)[1]
+            if ext and ext in EXTERNAL_INSTANCE_EXTENSIONS:
                 msg = (
-                    f"Question '{element[constants.NAME]}' is a select from file type, "
+                    f"Question '{element.name}' is a select from file type, "
                     "using 'search()'. This combination is not supported. "
                     "Remove the 'search()' usage, or change the select type."
                 )
                 raise PyXFormError(msg)
-            if self.choices:
-                element.children = self.choices.get(element[constants.ITEMSET], None)
-                element[constants.ITEMSET] = ""
-                if element.children is not None:
-                    for i, opt in enumerate(element.children):
-                        opt["_choice_itext_id"] = f"{element[constants.LIST_NAME_U]}-{i}"
+
+            element.itemset = ""
+            itemset = element.choices
+            if not itemset.used_by_search:
+                itemset.used_by_search = True
+                for i, opt in enumerate(itemset.options):
+                    opt._choice_itext_ref = f"jr:itext('{itemset.name}-{i}')"
         return is_search
 
     def _setup_translations(self):
@@ -832,58 +797,35 @@ def _setup_translations(self):
         setup media and itext functions
         """
 
-        def _setup_choice_translations(
-            name, choice_value, itext_id
-        ) -> Generator[tuple[list[str], str], None, None]:
-            for media_or_lang, value in choice_value.items():
-                if isinstance(value, dict):
-                    for language, val in value.items():
-                        yield ([language, itext_id, media_or_lang], val)
-                elif name == constants.MEDIA:
-                    yield ([self.default_language, itext_id, media_or_lang], value)
-                else:
-                    yield ([media_or_lang, itext_id, "long"], value)
+        def get_choice_content(name, idx, choice):
+            itext_id = f"{name}-{idx}"
 
-        itemsets_multi_language = set()
-        itemsets_has_media = set()
-        itemsets_has_dyn_label = set()
+            choice_label = choice.label
+            if choice_label:
+                if isinstance(choice_label, dict):
+                    for lang, value in choice_label.items():
+                        if isinstance(value, dict):
+                            for language, val in value.items():
+                                yield ([language, itext_id, lang], val)
+                        else:
+                            yield ([lang, itext_id, "long"], value)
+                else:
+                    yield ([self.default_language, itext_id, "long"], choice_label)
+
+            choice_media = choice.media
+            if choice_media:
+                for media, value in choice_media.items():
+                    if isinstance(value, dict):
+                        for language, val in value.items():
+                            yield ([language, itext_id, media], val)
+                    else:
+                        yield ([self.default_language, itext_id, media], value)
 
         def get_choices():
-            for list_name, choice_list in self.choices.items():
-                multi_language = False
-                has_media = False
-                dyn_label = False
-                choices = []
-                for idx, choice in enumerate(choice_list):
-                    for col_name, choice_value in choice.items():
-                        lang_choice = None
-                        if not choice_value:
-                            continue
-                        if col_name == constants.MEDIA:
-                            has_media = True
-                            lang_choice = choice_value
-                        elif col_name == constants.LABEL:
-                            if isinstance(choice_value, dict):
-                                lang_choice = choice_value
-                                multi_language = True
-                            else:
-                                lang_choice = {self.default_language: choice_value}
-                                if is_label_dynamic(choice_value):
-                                    dyn_label = True
-                        if lang_choice is not None:
-                            # e.g. (label, {"default": "Yes"}, "consent", 0)
-                            choices.append((col_name, lang_choice, list_name, idx))
-                if multi_language or has_media or dyn_label:
-                    if multi_language:
-                        itemsets_multi_language.add(list_name)
-                    if has_media:
-                        itemsets_has_media.add(list_name)
-                    if dyn_label:
-                        itemsets_has_dyn_label.add(list_name)
-                    for c in choices:
-                        yield from _setup_choice_translations(
-                            c[0], c[1], f"{c[2]}-{c[3]}"
-                        )
+            for name, itemset in self.choices.items():
+                if itemset.requires_itext:
+                    for idx, choice in enumerate(itemset.options):
+                        yield from get_choice_content(name, idx, choice)
 
         if self.choices:
             for path, value in get_choices():
@@ -891,53 +833,40 @@ def get_choices():
                 leaf_value = {last_path: value, constants.TYPE: constants.CHOICE}
                 self._add_to_nested_dict(self._translations, path, leaf_value)
 
-        select_types = set(aliases.select)
         search_lists = set()
         non_search_lists = set()
         for element in self.iter_descendants(
             condition=lambda i: isinstance(i, Question | Section)
         ):
             if isinstance(element, MultipleChoiceQuestion):
-                if element.itemset is not None:
-                    element._itemset_multi_language = (
-                        element.itemset in itemsets_multi_language
-                    )
-                    element._itemset_has_media = element.itemset in itemsets_has_media
-                    element._itemset_dyn_label = element.itemset in itemsets_has_dyn_label
-
-                if element.type in select_types:
-                    select_ref = (element[constants.NAME], element[constants.LIST_NAME_U])
-                    if self._redirect_is_search_itext(element=element):
-                        search_lists.add(select_ref)
-                        self._search_lists.add(element[constants.LIST_NAME_U])
-                    else:
-                        non_search_lists.add(select_ref)
-
-            # Skip creation of translations for choices in selects. The creation of these
-            # translations is done above in this function.
-            parent = element.get("parent")
-            if parent is not None and parent[constants.TYPE] not in select_types:
-                for d in element.get_translations(self.default_language):
-                    translation_path = d["path"]
-                    form = "long"
-
-                    if "guidance_hint" in d["path"]:
-                        translation_path = d["path"].replace("guidance_hint", "hint")
-                        form = "guidance"
-
-                    self._translations[d["lang"]][translation_path] = self._translations[
-                        d["lang"]
-                    ].get(translation_path, {})
-
-                    self._translations[d["lang"]][translation_path].update(
-                        {
-                            form: {
-                                "text": d["text"],
-                                "output_context": d["output_context"],
-                            },
-                            constants.TYPE: constants.QUESTION,
-                        }
-                    )
+                select_ref = (element.name, element.list_name)
+                if self._redirect_is_search_itext(element=element):
+                    search_lists.add(select_ref)
+                else:
+                    non_search_lists.add(select_ref)
+
+            # Create translations questions.
+            for d in element.get_translations(self.default_language):
+                translation_path = d["path"]
+                form = "long"
+
+                if "guidance_hint" in d["path"]:
+                    translation_path = d["path"].replace("guidance_hint", "hint")
+                    form = "guidance"
+
+                self._translations[d["lang"]][translation_path] = self._translations[
+                    d["lang"]
+                ].get(translation_path, {})
+
+                self._translations[d["lang"]][translation_path].update(
+                    {
+                        form: {
+                            "text": d["text"],
+                            "output_context": d["output_context"],
+                        },
+                        constants.TYPE: constants.QUESTION,
+                    }
+                )
 
         for q_name, list_name in search_lists:
             choice_refs = [f"'{q}'" for q, c in non_search_lists if c == list_name]
diff --git a/pyxform/survey_element.py b/pyxform/survey_element.py
index a15bbaf0..509228f8 100644
--- a/pyxform/survey_element.py
+++ b/pyxform/survey_element.py
@@ -299,10 +299,18 @@ def to_json_dict(self, delete_keys: Iterable[str] | None = None) -> dict:
             ]
         choices = result.pop("choices", None)
         if choices:
-            result["choices"] = {
-                list_name: [o.to_json_dict(delete_keys=("parent",)) for o in options]
-                for list_name, options in choices.items()
-            }
+            if isinstance(choices, dict):
+                result["choices"] = {
+                    list_name: [
+                        o.to_json_dict(delete_keys=("parent",)) for o in itemset.options
+                    ]
+                    for list_name, itemset in choices.items()
+                }
+            else:
+                result["children"] = [
+                    o.to_json_dict(delete_keys=("parent",)) for o in choices.options
+                ]
+
         # Translation items with "output_context" have circular references.
         if "_translations" in result:
             for lang in result["_translations"].values():
diff --git a/pyxform/utils.py b/pyxform/utils.py
index 3ca4ab93..f5562ba5 100644
--- a/pyxform/utils.py
+++ b/pyxform/utils.py
@@ -43,7 +43,7 @@ class DetachableElement(Element):
     """
 
     def __init__(self, *args, **kwargs):
-        Element.__init__(self, *args, **kwargs)
+        super().__init__(*args, **kwargs)
         self.ownerDocument = None
 
     def writexml(self, writer, indent="", addindent="", newl=""):
@@ -226,7 +226,7 @@ def default_is_dynamic(element_default, element_type=None):
     * Contains arithmetic operator, including 'div' and 'mod' (except '-' for 'date' type).
     * Contains brackets, parentheses or braces.
     """
-    if not isinstance(element_default, str):
+    if not element_default or not isinstance(element_default, str):
         return False
 
     tokens, _ = parse_expression(element_default)
@@ -251,23 +251,6 @@ def default_is_dynamic(element_default, element_type=None):
     return False
 
 
-def has_dynamic_label(choice_list: "list[dict[str, str]]") -> bool:
-    """
-    If the first or second choice label includes a reference, we must use itext.
-
-    Check the first two choices in case first is something like "Other".
-    """
-    for c in choice_list[:2]:
-        choice_label = c.get("label")
-        if (
-            choice_label is not None
-            and isinstance(choice_label, str)
-            and re.search(BRACKETED_TAG_REGEX, choice_label) is not None
-        ):
-            return True
-    return False
-
-
 def levenshtein_distance(a: str, b: str) -> int:
     """
     Calculate Levenshtein distance between two strings.
diff --git a/tests/fixtures/strings.ini b/tests/fixtures/strings.ini
index 90dfedd3..5ca67d8a 100644
--- a/tests/fixtures/strings.ini
+++ b/tests/fixtures/strings.ini
@@ -5,14 +5,10 @@ test_answers_can_be_imported_from_xml = <?xml version='1.0' ?><data id="build_Wa
 [Json2XformQuestionValidationTests]
 test_question_type_string_control = <input ref="/test/enumerator_name"><label ref="jr:itext('/test/enumerator_name:label')"/></input>
 test_question_type_string_binding = <bind nodeset="/test/enumerator_name" type="string"/>
-test_select_one_question_multilingual_control = <select1 ref="/test/qname"><label ref="jr:itext('/test/qname:label')"/><item><label ref="jr:itext('/test/qname/a:label')"/><value>a</value></item><item><label ref="jr:itext('/test/qname/b:label')"/><value>b</value></item></select1>
-test_select_one_question_multilingual_binding = <bind nodeset="/test/qname" type="string"/>
 test_simple_integer_question_type_multilingual_control = <input ref="/test/integer_q"><label ref="jr:itext('/test/integer_q:label')"/></input>
 test_simple_integer_question_type_multilingual_binding = <bind nodeset="/test/integer_q" type="int"/>
 test_simple_date_question_type_multilingual_control = <input ref="/test/date_q"><label ref="jr:itext('/test/date_q:label')"/></input>
 test_simple_date_question_type_multilingual_binding = <bind nodeset="/test/date_q" type="date"/>
-test_simple_select_all_question_multilingual_control = <select ref="/test/select_all_q"><label ref="jr:itext('/test/select_all_q:label')"/><item><label ref="jr:itext('/test/select_all_q/f:label')"/><value>f</value></item><item><label ref="jr:itext('/test/select_all_q/g:label')"/><value>g</value></item><item><label ref="jr:itext('/test/select_all_q/h:label')"/><value>h</value></item></select>
-test_simple_select_all_question_multilingual_binding = <bind nodeset="/test/select_all_q" type="string"/>
 test_simple_decimal_question_multilingual_control = <input ref="/test/decimal_q"><label ref="jr:itext('/test/decimal_q:label')"/></input>
 test_simple_decimal_question_multilingual_binding = <bind nodeset="/test/decimal_q" type="decimal"/>
 
diff --git a/tests/test_dynamic_default.py b/tests/test_dynamic_default.py
index f737a97a..fcda82b4 100644
--- a/tests/test_dynamic_default.py
+++ b/tests/test_dynamic_default.py
@@ -779,11 +779,11 @@ def test_dynamic_default_performance__time(self):
         Results with Python 3.10.14 on VM with 2vCPU (i7-7700HQ) 1GB RAM, x questions
         each, average of 10 runs (seconds), with and without the check, per question:
         | num   | with   | without | peak RSS MB |
-        |   500 | 0.1626 |  0.1886 |          60 |
-        |  1000 | 0.3330 |  0.3916 |          63 |
-        |  2000 | 0.8675 |  0.7823 |          70 |
-        |  5000 | 1.7051 |  1.5653 |          91 |
-        | 10000 | 3.1097 |  3.8525 |         137 |
+        |   500 | 0.2203 |  0.1610 |          59 |
+        |  1000 | 0.2851 |  0.2580 |          63 |
+        |  2000 | 0.5001 |  0.5330 |          71 |
+        |  5000 | 1.2762 |  1.2931 |          92 |
+        | 10000 | 2.6226 |  2.6001 |         132 |
         """
         survey_header = """
         | survey |            |          |          |               |
@@ -793,7 +793,7 @@ def test_dynamic_default_performance__time(self):
         |        | text       | q{i}     | Q{i}     | if(../t2 = 'test', 1, 2) + 15 - int(1.2) |
         """
         process = Process(getpid())
-        for count in (500, 1000, 2000):
+        for count in (500, 1000, 2000, 5000, 10000):
             questions = "\n".join(question.format(i=i) for i in range(count))
             md = "".join((survey_header, questions))
 
diff --git a/tests/test_j2x_creation.py b/tests/test_j2x_creation.py
index f4b69989..7bb2f173 100644
--- a/tests/test_j2x_creation.py
+++ b/tests/test_j2x_creation.py
@@ -11,30 +11,27 @@
 
 class Json2XformVerboseSurveyCreationTests(TestCase):
     def test_survey_can_be_created_in_a_slightly_less_verbose_manner(self):
-        option_dict_array = [
-            {"name": "red", "label": "Red"},
-            {"name": "blue", "label": "Blue"},
-        ]
-
+        choices = {
+            "test": [
+                {"name": "red", "label": "Red"},
+                {"name": "blue", "label": "Blue"},
+            ]
+        }
+        s = Survey(name="Roses_are_Red", choices=choices)
         q = MultipleChoiceQuestion(
-            name="Favorite_Color", type="select one", choices=option_dict_array
+            name="Favorite_Color",
+            type="select one",
+            list_name="test",
         )
-        s = Survey(name="Roses_are_Red")
         s.add_child(q)
 
         expected_dict = {
             "name": "Roses_are_Red",
             "type": "survey",
             "children": [
-                {
-                    "name": "Favorite_Color",
-                    "type": "select one",
-                    "children": [
-                        {"label": "Red", "name": "red"},
-                        {"label": "Blue", "name": "blue"},
-                    ],
-                }
+                {"name": "Favorite_Color", "type": "select one", "list_name": "test"}
             ],
+            "choices": choices,
         }
 
         self.assertEqual(expected_dict, s.to_json_dict())
diff --git a/tests/test_j2x_question.py b/tests/test_j2x_question.py
index c2229d90..ed114b98 100644
--- a/tests/test_j2x_question.py
+++ b/tests/test_j2x_question.py
@@ -3,14 +3,14 @@
 """
 
 from collections.abc import Generator
-from unittest import TestCase
 
 from pyxform import Survey
 from pyxform.builder import create_survey_element_from_dict
 
+from tests.pyxform_test_case import PyxformTestCase
 from tests.utils import prep_class_config
-
-TESTING_BINDINGS = True
+from tests.xpath_helpers.choices import xpc
+from tests.xpath_helpers.questions import xpq
 
 
 def ctw(control):
@@ -25,7 +25,7 @@ def ctw(control):
     return control.toxml()
 
 
-class Json2XformQuestionValidationTests(TestCase):
+class Json2XformQuestionValidationTests(PyxformTestCase):
     maxDiff = None
     config = None
     cls_name = None
@@ -59,47 +59,91 @@ def test_question_type_string(self):
 
         self.s.add_child(q)
         self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_string_control_xml)
-
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_string_binding_xml
-            )
+        self.assertEqual(ctw(q.xml_bindings(survey=self.s)), expected_string_binding_xml)
 
     def test_select_one_question_multilingual(self):
-        """
-        Test the lowest common denominator of question types.
-        """
-        simple_select_one_json = {
-            "label": {"f": "ftext", "e": "etext"},
-            "type": "select one",
-            "name": "qname",
-            "choices": [
-                {"label": {"f": "fa", "e": "ea"}, "name": "a"},
-                {"label": {"f": "fb", "e": "eb"}, "name": "b"},
+        """Should be able to build a valid XForm from a dict with a multi-language select."""
+        survey = {
+            "type": "survey",
+            "name": "test_name",
+            "id_string": "data",
+            "children": [
+                {
+                    "label": {"f": "ftext", "e": "etext"},
+                    "type": "select one",
+                    "name": "q1",
+                    "itemset": "c1",
+                }
             ],
+            "choices": {
+                "c1": [
+                    {"label": {"f": "fa", "e": "ea"}, "name": "a"},
+                    {"label": {"f": "fb", "e": "eb"}, "name": "b"},
+                ]
+            },
         }
-
-        # I copied the response in, since this is not our method of testing
-        # valid return values.
-        expected_select_one_control_xml = self.config.get(
-            self.cls_name, "test_select_one_question_multilingual_control"
-        )
-
-        expected_select_one_binding_xml = self.config.get(
-            self.cls_name, "test_select_one_question_multilingual_binding"
+        self.assertPyxformXform(
+            survey=create_survey_element_from_dict(survey),
+            xml__xpath_match=[
+                # question has data binding, control, and itemset reference.
+                xpq.model_instance_item("q1"),
+                xpq.model_instance_bind("q1", "string"),
+                xpq.body_label_itext("select1", "q1"),
+                # itext has secondary choices instance, and labels for each language.
+                xpq.model_instance_exists("c1"),
+                xpq.model_itext_label("q1", "f", "ftext"),
+                xpq.model_itext_label("q1", "e", "etext"),
+                xpc.model_instance_choices_itext("c1", ("a", "b")),
+                xpc.model_itext_choice_text_label_by_pos("f", "c1", ("fa", "fb")),
+                xpc.model_itext_choice_text_label_by_pos("e", "c1", ("ea", "eb")),
+            ],
         )
 
-        q = create_survey_element_from_dict(simple_select_one_json)
-        self.s.add_child(q)
-        self.assertEqual(
-            ctw(q.xml_control(survey=self.s)), expected_select_one_control_xml
+    def test_select_one_question_multilingual__common_choices(self):
+        """Should be able to build a valid XForm from a dict where 2 questions use 1 choice list."""
+        survey = {
+            "type": "survey",
+            "name": "test_name",
+            "id_string": "data",
+            "children": [
+                {
+                    "label": "Q1",
+                    "type": "select one",
+                    "name": "q1",
+                    "itemset": "c1",
+                },
+                {
+                    "label": "Q2",
+                    "type": "select one",
+                    "name": "q2",
+                    "itemset": "c1",
+                },
+            ],
+            "choices": {
+                "c1": [
+                    {"label": {"f": "fa", "e": "ea"}, "name": "a"},
+                    {"label": {"f": "fb", "e": "eb"}, "name": "b"},
+                ]
+            },
+        }
+        self.assertPyxformXform(
+            survey=create_survey_element_from_dict(survey),
+            xml__xpath_match=[
+                # question has data binding, control, and itemset reference.
+                xpq.model_instance_item("q1"),
+                xpq.model_instance_bind("q1", "string"),
+                xpq.body_label_inline("select1", "q1", "Q1"),
+                xpq.model_instance_item("q2"),
+                xpq.model_instance_bind("q2", "string"),
+                xpq.body_label_inline("select1", "q2", "Q2"),
+                # itext has secondary choices instance, and labels for each language.
+                xpq.model_instance_exists("c1"),
+                xpc.model_instance_choices_itext("c1", ("a", "b")),
+                xpc.model_itext_choice_text_label_by_pos("f", "c1", ("fa", "fb")),
+                xpc.model_itext_choice_text_label_by_pos("e", "c1", ("ea", "eb")),
+            ],
         )
 
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_select_one_binding_xml
-            )
-
     def test_simple_integer_question_type_multilingual(self):
         """
         not sure how integer questions should show up.
@@ -124,11 +168,7 @@ def test_simple_integer_question_type_multilingual(self):
         self.s.add_child(q)
 
         self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_integer_control_xml)
-
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_integer_binding_xml
-            )
+        self.assertEqual(ctw(q.xml_bindings(survey=self.s)), expected_integer_binding_xml)
 
     def test_simple_date_question_type_multilingual(self):
         """
@@ -152,11 +192,7 @@ def test_simple_date_question_type_multilingual(self):
         q = create_survey_element_from_dict(simple_date_question)
         self.s.add_child(q)
         self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_date_control_xml)
-
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_date_binding_xml
-            )
+        self.assertEqual(ctw(q.xml_bindings(survey=self.s)), expected_date_binding_xml)
 
     def test_simple_phone_number_question_type_multilingual(self):
         """
@@ -199,36 +235,42 @@ def test_simple_select_all_question_multilingual(self):
         """
         not sure how select all questions should show up...
         """
-        simple_select_all_question = {
-            "label": {"f": "f choisit", "e": "e choose"},
-            "type": "select all that apply",
-            "name": "select_all_q",
-            "choices": [
-                {"label": {"f": "ff", "e": "ef"}, "name": "f"},
-                {"label": {"f": "fg", "e": "eg"}, "name": "g"},
-                {"label": {"f": "fh", "e": "eh"}, "name": "h"},
+        survey = {
+            "type": "survey",
+            "name": "test_name",
+            "id_string": "data",
+            "children": [
+                {
+                    "label": {"f": "ftext", "e": "etext"},
+                    "type": "select all that apply",
+                    "name": "q1",
+                    "itemset": "c1",
+                }
             ],
+            "choices": {
+                "c1": [
+                    {"label": {"f": "fa", "e": "ea"}, "name": "a"},
+                    {"label": {"f": "fb", "e": "eb"}, "name": "b"},
+                ]
+            },
         }
-
-        expected_select_all_control_xml = self.config.get(
-            self.cls_name, "test_simple_select_all_question_multilingual_control"
-        )
-
-        expected_select_all_binding_xml = self.config.get(
-            self.cls_name, "test_simple_select_all_question_multilingual_binding"
-        )
-
-        q = create_survey_element_from_dict(simple_select_all_question)
-        self.s.add_child(q)
-        self.assertEqual(
-            ctw(q.xml_control(survey=self.s)), expected_select_all_control_xml
+        self.assertPyxformXform(
+            survey=create_survey_element_from_dict(survey),
+            xml__xpath_match=[
+                # question has data binding, control, and itemset reference.
+                xpq.model_instance_item("q1"),
+                xpq.model_instance_bind("q1", "string"),
+                xpq.body_label_itext("select", "q1"),
+                # itext has secondary choices instance, and labels for each language.
+                xpq.model_instance_exists("c1"),
+                xpq.model_itext_label("q1", "f", "ftext"),
+                xpq.model_itext_label("q1", "e", "etext"),
+                xpc.model_instance_choices_itext("c1", ("a", "b")),
+                xpc.model_itext_choice_text_label_by_pos("f", "c1", ("fa", "fb")),
+                xpc.model_itext_choice_text_label_by_pos("e", "c1", ("ea", "eb")),
+            ],
         )
 
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_select_all_binding_xml
-            )
-
     def test_simple_decimal_question_multilingual(self):
         """
         not sure how decimal should show up.
@@ -251,8 +293,4 @@ def test_simple_decimal_question_multilingual(self):
         q = create_survey_element_from_dict(simple_decimal_question)
         self.s.add_child(q)
         self.assertEqual(ctw(q.xml_control(survey=self.s)), expected_decimal_control_xml)
-
-        if TESTING_BINDINGS:
-            self.assertEqual(
-                ctw(q.xml_bindings(survey=self.s)), expected_decimal_binding_xml
-            )
+        self.assertEqual(ctw(q.xml_bindings(survey=self.s)), expected_decimal_binding_xml)
diff --git a/tests/test_j2x_xform_build_preparation.py b/tests/test_j2x_xform_build_preparation.py
deleted file mode 100644
index c83d7440..00000000
--- a/tests/test_j2x_xform_build_preparation.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-Testing preparation of values for XForm exporting
-"""
-
-from unittest import TestCase
-
-from pyxform import MultipleChoiceQuestion, Survey
-
-
-class Json2XformExportingPrepTests(TestCase):
-    def test_dictionary_consolidates_duplicate_entries(self):
-        yes_or_no_dict_array = [
-            {"label": {"French": "Oui", "English": "Yes"}, "name": "yes"},
-            {"label": {"French": "Non", "English": "No"}, "name": "no"},
-        ]
-
-        first_yesno_question = MultipleChoiceQuestion(
-            name="yn_q1", choices=yes_or_no_dict_array, type="select one"
-        )
-        second_yesno_question = MultipleChoiceQuestion(
-            name="yn_q2", choices=yes_or_no_dict_array, type="select one"
-        )
-
-        s = Survey(name="yes_or_no_tests")
-        s.add_child(first_yesno_question)
-        s.add_child(second_yesno_question)
-
-        # begin the processes in survey.to_xml()
-        # 1. validate()
-        s.validate()
-
-        # 2. survey._build_options_list_from_descendants()
-        # options_list = s._build_options_list_from_descendants()
-        # Is this method called somewhere else now?
-
-        # desired_options_list = [first_yesno_question.children]
-
-        # todo: we need to think about whether we care about
-        # consolidating these options lists.
-        # self.assertEqual(options_list, desired_options_list)
-        # self.assertEqual(first_yesno_question._option_list_index_number, 0)
-        # self.assertEqual(second_yesno_question._option_list_index_number, 0)
diff --git a/tests/test_osm.py b/tests/test_osm.py
index 7740a21d..60b68cb8 100644
--- a/tests/test_osm.py
+++ b/tests/test_osm.py
@@ -3,6 +3,8 @@
 """
 
 from tests.pyxform_test_case import PyxformTestCase
+from tests.xpath_helpers.choices import xpc
+from tests.xpath_helpers.questions import xpq
 
 expected_xml_output = """
     <upload mediatype="osm/*" ref="/osm/osm_building">
@@ -54,3 +56,181 @@ def test_osm_type_with_list_underscore_name(self):
             """,
             xml__contains=[expected_xml_output],
         )
+
+    def test_osm_type_with_select(self):
+        """Should find that the OSM tags are output in the body and selects are as normal."""
+        md = """
+        | survey  |
+        |         | type              | name      | label    |
+        |         | osm               | osm_road  | Road     |
+        |         | osm building_tags | osm_build | Building |
+        |         | select_one c1     | q1        | Q1       |
+        | osm     |
+        |         | list_name     | name      | label |
+        |         | building_tags | name      | Name  |
+        |         | building_tags | addr:city | City  |
+        | choices |
+        |         | list_name | name | label |
+        |         | c1        | n1   | l1    |
+        |         | c1        | n2   | l2    |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                # q1 has data binding, control, and itemset reference.
+                xpq.model_instance_item("q1"),
+                xpq.model_instance_bind("q1", "string"),
+                xpq.body_label_inline("select1", "q1", "Q1"),
+                # q1 has secondary choices instance with inline labels.
+                xpc.model_instance_choices_label("c1", (("n1", "l1"), ("n2", "l2"))),
+                # osm_road has data binding and control
+                """
+                /h:html/h:head/x:model/x:instance/x:test_name[@id='data']/x:osm_road
+                """,
+                """
+                /h:html/h:head/x:model/x:bind[@nodeset='/test_name/osm_road' and @type='binary']
+                """,
+                """
+                /h:html/h:body/x:upload[
+                  @mediatype='osm/*' and @ref='/test_name/osm_road' and ./x:label[text()='Road']
+                ]
+                """,
+                # osm_build has data binding and control with tags from building_tags
+                """
+                /h:html/h:head/x:model/x:instance/x:test_name[@id='data']/x:osm_build
+                """,
+                """
+                /h:html/h:head/x:model/x:bind[@nodeset='/test_name/osm_build' and @type='binary']
+                """,
+                """
+                /h:html/h:body/x:upload[
+                  @mediatype='osm/*'
+                  and @ref='/test_name/osm_build'
+                  and ./x:label[text()='Building']
+                  and ./x:tag[@key='name' and ./x:label/text()='Name']
+                  and ./x:tag[@key='addr:city' and ./x:label/text()='City']
+                ]
+                """,
+            ],
+            # The OSM list names aren't output anywhere (tags copied inline).
+            xml__excludes=["room_tags"],
+        )
+
+    def test_osm_type_with_multiple_lists__separate(self):
+        """Should find that the OSM tags are output for the corresponding question."""
+        md = """
+        | survey  |
+        |         | type              | name      | label    |
+        |         | osm               | osm_road  | Road     |
+        |         | osm building_tags | osm_build | Building |
+        |         | osm room_tags     | osm_room  | Room     |
+        | osm     |
+        |         | list_name     | name       | label     |
+        |         | building_tags | name       | Name      |
+        |         | building_tags | addr:city  | City      |
+        |         | room_tags     | room:type  | Type      |
+        |         | room_tags     | habitable  | Habitable |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                # osm_road has data binding and control
+                """
+                /h:html/h:head/x:model/x:instance/x:test_name[@id='data']/x:osm_road
+                """,
+                """
+                /h:html/h:head/x:model/x:bind[@nodeset='/test_name/osm_road' and @type='binary']
+                """,
+                """
+                /h:html/h:body/x:upload[
+                  @mediatype='osm/*' and @ref='/test_name/osm_road' and ./x:label[text()='Road']
+                ]
+                """,
+                # osm_build has data binding and control with tags from building_tags
+                """
+                /h:html/h:head/x:model/x:instance/x:test_name[@id='data']/x:osm_build
+                """,
+                """
+                /h:html/h:head/x:model/x:bind[@nodeset='/test_name/osm_build' and @type='binary']
+                """,
+                """
+                /h:html/h:body/x:upload[
+                  @mediatype='osm/*'
+                  and @ref='/test_name/osm_build'
+                  and ./x:label[text()='Building']
+                  and ./x:tag[@key='name' and ./x:label/text()='Name']
+                  and ./x:tag[@key='addr:city' and ./x:label/text()='City']
+                ]
+                """,
+                # osm_room has data binding and control with tags from room_tags
+                """
+                /h:html/h:head/x:model/x:instance/x:test_name[@id='data']/x:osm_room
+                """,
+                """
+                /h:html/h:head/x:model/x:bind[@nodeset='/test_name/osm_room' and @type='binary']
+                """,
+                """
+                /h:html/h:body/x:upload[
+                  @mediatype='osm/*'
+                  and @ref='/test_name/osm_room'
+                  and ./x:label[text()='Room']
+                  and ./x:tag[@key='room:type' and ./x:label/text()='Type']
+                  and ./x:tag[@key='habitable' and ./x:label/text()='Habitable']
+                ]
+                """,
+            ],
+            # The OSM list names aren't output anywhere (tags copied inline).
+            xml__excludes=["building_tags", "room_tags"],
+        )
+
+    def test_osm_type_with_multiple_lists__shared(self):
+        """Should find that the OSM tags are output for the corresponding question."""
+        md = """
+        | survey  |
+        |         | type          | name       | label       |
+        |         | osm room_tags | osm_resi   | Residential |
+        |         | osm room_tags | osm_office | Office      |
+        | osm     |
+        |         | list_name     | name       | label     |
+        |         | room_tags     | room:type  | Type      |
+        |         | room_tags     | habitable  | Habitable |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                # osm_resi has data binding and control with tags from room_tags
+                """
+                /h:html/h:head/x:model/x:instance/x:test_name[@id='data']/x:osm_resi
+                """,
+                """
+                /h:html/h:head/x:model/x:bind[@nodeset='/test_name/osm_resi' and @type='binary']
+                """,
+                """
+                /h:html/h:body/x:upload[
+                  @mediatype='osm/*'
+                  and @ref='/test_name/osm_resi'
+                  and ./x:label[text()='Residential']
+                  and ./x:tag[@key='room:type' and ./x:label/text()='Type']
+                  and ./x:tag[@key='habitable' and ./x:label/text()='Habitable']
+                ]
+                """,
+                # osm_office has data binding and control with tags from room_tags
+                """
+                /h:html/h:head/x:model/x:instance/x:test_name[@id='data']/x:osm_office
+                """,
+                """
+                /h:html/h:head/x:model/x:bind[@nodeset='/test_name/osm_office' and @type='binary']
+                """,
+                """
+                /h:html/h:body/x:upload[
+                  @mediatype='osm/*'
+                  and @ref='/test_name/osm_office'
+                  and ./x:label[text()='Office']
+                  and ./x:tag[@key='room:type' and ./x:label/text()='Type']
+                  and ./x:tag[@key='habitable' and ./x:label/text()='Habitable']
+                ]
+                """,
+            ],
+            # The OSM list names aren't output anywhere (tags copied inline).
+            xml__excludes=["room_tags"],
+        )
diff --git a/tests/test_translations.py b/tests/test_translations.py
index d9533a69..e148dd39 100644
--- a/tests/test_translations.py
+++ b/tests/test_translations.py
@@ -404,11 +404,11 @@ def test_missing_translations_check_performance(self):
         with 2 choices each, average of 10 runs (seconds), with and without the check,
         per question:
         | num   | with   | without | peak RSS MB |
-        |   500 | 0.7427 |  0.8133 |          77 |
-        |  1000 | 1.7908 |  1.7777 |          94 |
-        |  2000 | 5.6719 |  4.8387 |         141 |
-        |  5000 | 20.452 |  19.502 |         239 |
-        | 10000 | 70.871 |  62.106 |         416 |
+        |   500 | 0.6467 |  0.5648 |          77 |
+        |  1000 | 1.1448 |  1.2868 |          94 |
+        |  2000 | 2.3626 |  2.1485 |         129 |
+        |  5000 | 5.9631 |  5.7911 |         247 |
+        | 10000 | 11.404 |  11.399 |         423 |
         """
         survey_header = """
         | survey |                 |        |                    |                   |
@@ -426,7 +426,7 @@ def test_missing_translations_check_performance(self):
         |         | c{i}        | nb   | lb-d  | lb-e       |
         """
         process = Process(getpid())
-        for count in (500, 1000, 2000):
+        for count in (500, 1000, 2000, 5000, 10000):
             questions = "\n".join(question.format(i=i) for i in range(count))
             choice_lists = "\n".join(choice_list.format(i=i) for i in range(count))
             md = "".join((survey_header, questions, choices_header, choice_lists))