From d536789c00ca11b60bed940377918c2ba933a62c Mon Sep 17 00:00:00 2001
From: Noa Aviel Dove <nadove@ucsc.edu>
Date: Fri, 11 Oct 2024 17:03:51 -0700
Subject: [PATCH 1/6] Refactor table-column lookup in AnVIL plugin

---
 src/azul/plugins/repository/tdr_anvil/__init__.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py
index e276fe6b8..7e867dfa0 100644
--- a/src/azul/plugins/repository/tdr_anvil/__init__.py
+++ b/src/azul/plugins/repository/tdr_anvil/__init__.py
@@ -687,11 +687,12 @@ def convert_column(value):
         else:
             return []
 
+    _schema_columns = {
+        table['name']: [column['name'] for column in table['columns']]
+        for table in anvil_schema['tables']
+    }
+
     def _columns(self, entity_type: EntityType) -> set[str]:
-        table = one(
-            table for table in anvil_schema['tables']
-            if table['name'] == f'anvil_{entity_type}'
-        )
-        entity_columns = {column['name'] for column in table['columns']}
-        entity_columns.add('datarepo_row_id')
-        return entity_columns
+        columns = set(self._schema_columns[f'anvil_{entity_type}'])
+        columns.add('datarepo_row_id')
+        return columns

From 989d07fc7c03fa5a943cc20369c16868e9734553 Mon Sep 17 00:00:00 2001
From: Noa Aviel Dove <nadove@ucsc.edu>
Date: Fri, 11 Oct 2024 16:05:51 -0700
Subject: [PATCH 2/6] Fix incorrect canned table names

---
 .../plugins/repository/tdr_anvil/__init__.py  |  5 +++--
 ...09d-46a4-b845-7584df39263b.tables.tdr.json | 20 +++++++++----------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py
index 7e867dfa0..c7811d3d4 100644
--- a/src/azul/plugins/repository/tdr_anvil/__init__.py
+++ b/src/azul/plugins/repository/tdr_anvil/__init__.py
@@ -379,8 +379,9 @@ def _bundle_entity(self, bundle_fqid: TDRAnvilBundleFQID) -> KeyReference:
         return bundle_entity
 
     def _full_table_name(self, source: TDRSourceSpec, table_name: str) -> str:
-        if not table_name.startswith('INFORMATION_SCHEMA'):
-            table_name = 'anvil_' + table_name
+        prefixed = f'anvil_{table_name}'
+        if prefixed in self._schema_columns:
+            table_name = prefixed
         return super()._full_table_name(source, table_name)
 
     def _consolidate_by_type(self, entities: Keys) -> MutableKeysByType:
diff --git a/test/indexer/data/6c87f0e1-509d-46a4-b845-7584df39263b.tables.tdr.json b/test/indexer/data/6c87f0e1-509d-46a4-b845-7584df39263b.tables.tdr.json
index 3c02b7242..422b461de 100644
--- a/test/indexer/data/6c87f0e1-509d-46a4-b845-7584df39263b.tables.tdr.json
+++ b/test/indexer/data/6c87f0e1-509d-46a4-b845-7584df39263b.tables.tdr.json
@@ -1,6 +1,6 @@
 {
     "tables": {
-        "activity": {
+        "anvil_activity": {
             "rows": [
                 {
                     "activity_id": "",
@@ -12,7 +12,7 @@
                 }
             ]
         },
-        "alignmentactivity": {
+        "anvil_alignmentactivity": {
             "rows": [
                 {
                     "activity_type": "",
@@ -25,7 +25,7 @@
                 }
             ]
         },
-        "assayactivity": {
+        "anvil_assayactivity": {
             "rows": [
                 {
                     "activity_type": "",
@@ -39,7 +39,7 @@
                 }
             ]
         },
-        "biosample": {
+        "anvil_biosample": {
             "rows": [
                 {
                     "anatomical_site": null,
@@ -63,7 +63,7 @@
                 }
             ]
         },
-        "dataset": {
+        "anvil_dataset": {
             "rows": [
                 {
                     "consent_group": [
@@ -89,7 +89,7 @@
                 }
             ]
         },
-        "diagnosis": {
+        "anvil_diagnosis": {
             "rows": [
                 {
                     "datarepo_row_id": "15d85d30-ad4a-4f50-87a8-a27f59dd1b5f",
@@ -135,7 +135,7 @@
                 }
             ]
         },
-        "donor": {
+        "anvil_donor": {
             "rows": [
                 {
                     "datarepo_row_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e",
@@ -153,7 +153,7 @@
                 }
             ]
         },
-        "file": {
+        "anvil_file": {
             "rows": [
                 {
                     "data_modality": [],
@@ -202,7 +202,7 @@
                 }
             ]
         },
-        "sequencingactivity": {
+        "anvil_sequencingactivity": {
             "rows": [
                 {
                     "activity_type": "Sequencing",
@@ -238,7 +238,7 @@
                 }
             ]
         },
-        "variantcallingactivity": {
+        "anvil_variantcallingactivity": {
             "rows": [
                 {
                     "activity_type": "",

From 1b40ff4b3d6e4b74f81f9fe4feee1722ae9edc1a Mon Sep 17 00:00:00 2001
From: Noa Aviel Dove <nadove@ucsc.edu>
Date: Wed, 16 Oct 2024 15:54:55 -0700
Subject: [PATCH 3/6] Use variable for fixed AnVIL version in test

---
 test/service/test_response_anvil.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/service/test_response_anvil.py b/test/service/test_response_anvil.py
index 2d8e2a0ff..9a176000c 100644
--- a/test/service/test_response_anvil.py
+++ b/test/service/test_response_anvil.py
@@ -1954,7 +1954,7 @@ def test_entity_indices(self):
                                 ],
                                 'file_name': '307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz',
                                 'is_supplementary': False,
-                                'version': '2022-06-01T00:00:00.000000Z',
+                                'version': self.version,
                                 'uuid': '15b76f9c-6b46-433f-851d-34e89f1b9ba6',
                                 'size': 213021639,
                                 'name': '307500.merged.matefixed.sorted.markeddups.recal.g.vcf.gz',
@@ -1965,7 +1965,7 @@ def test_entity_indices(self):
                                            f'1e269f04-4347-4188-b060-1dcc69e71d67',
                                 'url': str(self.base_url.set(
                                     path='/repository/files/15b76f9c-6b46-433f-851d-34e89f1b9ba6',
-                                    args=dict(catalog='test', version='2022-06-01T00:00:00.000000Z')
+                                    args=dict(catalog='test', version=self.version)
                                 ))
                             }
                         ]
@@ -2083,7 +2083,7 @@ def test_entity_indices(self):
                                 ],
                                 'file_name': '307500.merged.matefixed.sorted.markeddups.recal.bam',
                                 'is_supplementary': False,
-                                'version': '2022-06-01T00:00:00.000000Z',
+                                'version': self.version,
                                 'uuid': '3b17377b-16b1-431c-9967-e5d01fc5923f',
                                 'size': 3306845592,
                                 'name': '307500.merged.matefixed.sorted.markeddups.recal.bam',
@@ -2094,7 +2094,7 @@ def test_entity_indices(self):
                                            f'8b722e88-8103-49c1-b351-e64fa7c6ab37',
                                 'url': str(self.base_url.set(
                                     path='/repository/files/3b17377b-16b1-431c-9967-e5d01fc5923f',
-                                    args=dict(catalog='test', version='2022-06-01T00:00:00.000000Z')
+                                    args=dict(catalog='test', version=self.version)
                                 ))
                             }
                         ]

From 41562774572cc6f2617ca0dbaa5e2ca492b8c5d6 Mon Sep 17 00:00:00 2001
From: Noa Aviel Dove <nadove@ucsc.edu>
Date: Thu, 17 Oct 2024 01:41:13 -0700
Subject: [PATCH 4/6] Refactor enumeration of AnVIL activity types

---
 .../plugins/metadata/anvil/indexer/transform.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py
index 20107fb91..b30886839 100644
--- a/src/azul/plugins/metadata/anvil/indexer/transform.py
+++ b/src/azul/plugins/metadata/anvil/indexer/transform.py
@@ -402,13 +402,16 @@ def _file(self, file: EntityReference) -> MutableJSON:
     def _only_dataset(self) -> EntityReference:
         return one(self._entities_by_type['dataset'])
 
-    _activity_polymorphic_types = {
-        'activity',
-        'alignmentactivity',
-        'assayactivity',
-        'sequencingactivity',
-        'variantcallingactivity'
-    }
+    @cached_property
+    def _activity_polymorphic_types(self) -> AbstractSet[str]:
+        from azul.plugins.metadata.anvil import (
+            anvil_schema,
+        )
+        return {
+            table['name'].removeprefix('anvil_')
+            for table in anvil_schema['tables']
+            if table['name'].endswith('activity')
+        }
 
     @classmethod
     def inner_entity_id(cls, entity_type: EntityType, entity: JSON) -> EntityID:

From 55d2e2afb9b76624853c91c6861c73862c3c33fd Mon Sep 17 00:00:00 2001
From: Noa Aviel Dove <nadove@ucsc.edu>
Date: Thu, 17 Oct 2024 14:42:20 -0700
Subject: [PATCH 5/6] [r] Track table names in AnVIL bundles (#6639)

---
 src/azul/indexer/document.py                  |   9 +-
 src/azul/plugins/metadata/anvil/bundle.py     |   4 +
 .../metadata/anvil/indexer/transform.py       |  58 +++---
 .../plugins/repository/tdr_anvil/__init__.py  | 192 +++++++++---------
 ...2783-aeb6-afea-e022897f4dcf.tdr.anvil.json |   2 +-
 ...5d80-a242-accb-840921351cd5.tdr.anvil.json |   8 +-
 ...2-e274-affe-aabc-eb3db63ad068.results.json |  58 +++---
 ...e274-affe-aabc-eb3db63ad068.tdr.anvil.json |  46 ++---
 test/indexer/test_anvil.py                    |  12 +-
 test/integration_test.py                      |  18 +-
 test/service/test_manifest.py                 |   2 +-
 test/service/test_response_anvil.py           |   4 +-
 12 files changed, 205 insertions(+), 208 deletions(-)

diff --git a/src/azul/indexer/document.py b/src/azul/indexer/document.py
index db57cad72..3f2860f09 100644
--- a/src/azul/indexer/document.py
+++ b/src/azul/indexer/document.py
@@ -1528,13 +1528,10 @@ class Replica(Document[ReplicaCoordinates[E]]):
     """
 
     #: The type of replica, i.e., what sort of metadata document from the
-    #: underlying data repository we are storing a copy of. Conceptually related
-    #: to the entity type, but its value may be different from the entity type.
-    #: For example, AnVIL replicas use the name of the data table that contains
-    #: the entity, e.g. 'anvil_file', instead of just 'file'.
+    #: underlying data repository we are storing a copy of. In practice, this is
+    #: the same as `self.coordinates.entity.entity_type`, but this isn't
+    #: necessarily the case.
     #:
-    #: We can't model replica types as entity types because we want to hold all
-    #: replicas in a single index per catalog to facilitate quick retrieval.
     #: Typically, all replicas of the same type have similar shapes, just like
     #: contributions for entities of the same type. However, mixing replicas of
     #: different types results in an index containing documents of heterogeneous
diff --git a/src/azul/plugins/metadata/anvil/bundle.py b/src/azul/plugins/metadata/anvil/bundle.py
index e28801857..8df46cb8c 100644
--- a/src/azul/plugins/metadata/anvil/bundle.py
+++ b/src/azul/plugins/metadata/anvil/bundle.py
@@ -116,6 +116,10 @@ def to_entity_link(self,
 
 @attrs.define(kw_only=True)
 class AnvilBundle(Bundle[BUNDLE_FQID], ABC):
+    # The `entity_type` attribute of these keys contains the entities' BigQuery
+    # table name (e.g. `anvil_sequencingactivity`), not the entity type used for
+    # the contributions (e.g. `activities`). The metadata plugin converts from
+    # the former to the latter during transformation.
     entities: dict[EntityReference, MutableJSON] = attrs.field(factory=dict)
     links: set[EntityLink] = attrs.field(factory=set)
 
diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py
index b30886839..858899bd5 100644
--- a/src/azul/plugins/metadata/anvil/indexer/transform.py
+++ b/src/azul/plugins/metadata/anvil/indexer/transform.py
@@ -188,20 +188,26 @@ def _transform(self,
         raise NotImplementedError
 
     def _replicate(self, entity: EntityReference) -> tuple[str, JSON]:
-        return f'anvil_{entity.entity_type}', self.bundle.entities[entity]
+        return entity.entity_type, self.bundle.entities[entity]
 
-    def _pluralize(self, entity_type: str) -> str:
-        if entity_type == 'diagnosis':
+    def _convert_entity_type(self, entity_type: str) -> str:
+        assert entity_type == 'bundle' or entity_type.startswith('anvil_'), entity_type
+        if entity_type == 'anvil_diagnosis':
+            # Irregular plural form
             return 'diagnoses'
+        elif entity_type.endswith('activity'):
+            # Polymorphic. Could be `anvil_sequencingactivity`,
+            # `anvil_assayactivity`, `anvil_activity`, etc
+            return 'activities'
         else:
-            return pluralize(entity_type)
+            return pluralize(entity_type.removeprefix('anvil_'))
 
     def _contains(self,
                   partition: BundlePartition,
                   entity: EntityReference
                   ) -> bool:
         return (
-            self._pluralize(entity.entity_type).endswith(self.entity_type())
+            self._convert_entity_type(entity.entity_type) == self.entity_type()
             and partition.contains(UUID(entity.entity_id))
         )
 
@@ -358,7 +364,7 @@ def _activity(self, activity: EntityReference) -> MutableJSON:
         field_types = self._activity_types()
         common_fields = {
             'activity_table': activity.entity_type,
-            'activity_id': metadata[f'{activity.entity_type}_id']
+            'activity_id': metadata[f'{activity.entity_type.removeprefix("anvil_")}_id']
         }
         # Activities are unique in that they may not contain every field defined
         # in their field types due to polymorphism, so we need to pad the field
@@ -400,7 +406,7 @@ def _file(self, file: EntityReference) -> MutableJSON:
                             uuid=file.entity_id)
 
     def _only_dataset(self) -> EntityReference:
-        return one(self._entities_by_type['dataset'])
+        return one(self._entities_by_type['anvil_dataset'])
 
     @cached_property
     def _activity_polymorphic_types(self) -> AbstractSet[str]:
@@ -408,7 +414,7 @@ def _activity_polymorphic_types(self) -> AbstractSet[str]:
             anvil_schema,
         )
         return {
-            table['name'].removeprefix('anvil_')
+            table['name']
             for table in anvil_schema['tables']
             if table['name'].endswith('activity')
         }
@@ -465,11 +471,11 @@ def _transform(self, entity: EntityReference) -> Iterable[Contribution]:
                 self._entities_by_type[activity_type]
                 for activity_type in self._activity_polymorphic_types
             )),
-            biosamples=self._entities(self._biosample, self._entities_by_type['biosample']),
+            biosamples=self._entities(self._biosample, self._entities_by_type['anvil_biosample']),
             datasets=[self._dataset(self._only_dataset())],
-            diagnoses=self._entities(self._diagnosis, self._entities_by_type['diagnosis']),
-            donors=self._entities(self._donor, self._entities_by_type['donor']),
-            files=self._entities(self._file, self._entities_by_type['file'])
+            diagnoses=self._entities(self._diagnosis, self._entities_by_type['anvil_diagnosis']),
+            donors=self._entities(self._donor, self._entities_by_type['anvil_donor']),
+            files=self._entities(self._file, self._entities_by_type['anvil_file'])
         )
         yield self._contribution(contents, entity.entity_id)
 
@@ -517,11 +523,11 @@ def _transform(self, entity: EntityReference) -> Iterable[Contribution]:
         linked = self._linked_entities(entity)
         contents = dict(
             activities=[self._activity(entity)],
-            biosamples=self._entities(self._biosample, linked['biosample']),
+            biosamples=self._entities(self._biosample, linked['anvil_biosample']),
             datasets=[self._dataset(self._only_dataset())],
-            diagnoses=self._entities(self._diagnosis, linked['diagnosis']),
-            donors=self._entities(self._donor, linked['donor']),
-            files=self._entities(self._file, linked['file'])
+            diagnoses=self._entities(self._diagnosis, linked['anvil_diagnosis']),
+            donors=self._entities(self._donor, linked['anvil_donor']),
+            files=self._entities(self._file, linked['anvil_file'])
         )
         yield self._contribution(contents, entity.entity_id)
 
@@ -541,9 +547,9 @@ def _transform(self, entity: EntityReference) -> Iterable[Contribution]:
             )),
             biosamples=[self._biosample(entity)],
             datasets=[self._dataset(self._only_dataset())],
-            diagnoses=self._entities(self._diagnosis, linked['diagnosis']),
-            donors=self._entities(self._donor, linked['donor']),
-            files=self._entities(self._file, linked['file']),
+            diagnoses=self._entities(self._diagnosis, linked['anvil_diagnosis']),
+            donors=self._entities(self._donor, linked['anvil_donor']),
+            files=self._entities(self._file, linked['anvil_file']),
         )
         yield self._contribution(contents, entity.entity_id)
 
@@ -589,11 +595,11 @@ def _transform(self, entity: EntityReference) -> Iterable[Contribution]:
                 linked[activity_type]
                 for activity_type in self._activity_polymorphic_types
             )),
-            biosamples=self._entities(self._biosample, linked['biosample']),
+            biosamples=self._entities(self._biosample, linked['anvil_biosample']),
             datasets=[self._dataset(self._only_dataset())],
-            diagnoses=self._entities(self._diagnosis, linked['diagnosis']),
+            diagnoses=self._entities(self._diagnosis, linked['anvil_diagnosis']),
             donors=[self._donor(entity)],
-            files=self._entities(self._file, linked['file']),
+            files=self._entities(self._file, linked['anvil_file']),
         )
         yield self._contribution(contents, entity.entity_id)
 
@@ -613,10 +619,10 @@ def _transform(self,
                 linked[activity_type]
                 for activity_type in self._activity_polymorphic_types
             )),
-            biosamples=self._entities(self._biosample, linked['biosample']),
+            biosamples=self._entities(self._biosample, linked['anvil_biosample']),
             datasets=[self._dataset(self._only_dataset())],
-            diagnoses=self._entities(self._diagnosis, linked['diagnosis']),
-            donors=self._entities(self._donor, linked['donor']),
+            diagnoses=self._entities(self._diagnosis, linked['anvil_diagnosis']),
+            donors=self._entities(self._donor, linked['anvil_donor']),
             files=[self._file(entity)],
         )
         yield self._contribution(contents, entity.entity_id)
@@ -630,5 +636,5 @@ def _transform(self,
                     # redundant and impractically large. Therefore, we leave the
                     # hub IDs field empty for datasets and rely on the tenet
                     # that every file is an implicit hub of its parent dataset.
-                    file_hub=None if linked_entity.entity_type == 'dataset' else entity.entity_id,
+                    file_hub=None if linked_entity.entity_type == 'anvil_dataset' else entity.entity_id,
                 )
diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py
index c7811d3d4..530850366 100644
--- a/src/azul/plugins/repository/tdr_anvil/__init__.py
+++ b/src/azul/plugins/repository/tdr_anvil/__init__.py
@@ -72,7 +72,7 @@
 KeyLinks = set[KeyLink]
 
 
-class BundleEntityType(Enum):
+class BundleType(Enum):
     """
     AnVIL snapshots have no inherent notion of a "bundle". When indexing these
     snapshots, we dynamically construct bundles by selecting individual entities
@@ -110,22 +110,22 @@ class BundleEntityType(Enum):
     dataset fields during aggregation. This bundle contains only a single
     dataset entity with only the `description` field populated.
     """
-    primary: EntityType = 'biosample'
-    supplementary: EntityType = 'file'
-    duos: EntityType = 'dataset'
+    primary = 'anvil_biosample'
+    supplementary = 'anvil_file'
+    duos = 'anvil_dataset'
 
 
 class TDRAnvilBundleFQIDJSON(SourcedBundleFQIDJSON):
-    entity_type: str
+    table_name: str
 
 
 @attrs.frozen(kw_only=True)
 class TDRAnvilBundleFQID(TDRBundleFQID):
-    entity_type: BundleEntityType = attrs.field(converter=BundleEntityType)
+    table_name: BundleType = attrs.field(converter=BundleType)
 
     def to_json(self) -> TDRAnvilBundleFQIDJSON:
         return dict(super().to_json(),
-                    entity_type=self.entity_type.value)
+                    table_name=self.table_name.value)
 
 
 class TDRAnvilBundle(AnvilBundle[TDRAnvilBundleFQID], TDRBundle):
@@ -142,7 +142,7 @@ def add_entity(self,
         assert entity not in self.entities, entity
         metadata = dict(row,
                         version=version)
-        if entity.entity_type == 'file':
+        if entity.entity_type == 'anvil_file':
             drs_uri = row['file_ref']
             # Validate URI syntax
             DRSURI.parse(drs_uri)
@@ -172,10 +172,10 @@ def _version(self):
     def _count_subgraphs(self, source: TDRSourceSpec) -> int:
         rows = self._run_sql(f'''
             SELECT COUNT(*) AS count
-            FROM {backtick(self._full_table_name(source, BundleEntityType.primary.value))}
+            FROM {backtick(self._full_table_name(source, BundleType.primary.value))}
             UNION ALL
             SELECT COUNT(*) AS count
-            FROM {backtick(self._full_table_name(source, BundleEntityType.supplementary.value))}
+            FROM {backtick(self._full_table_name(source, BundleType.supplementary.value))}
             WHERE is_supplementary
         ''')
         return sum(row['count'] for row in rows)
@@ -185,15 +185,15 @@ def _list_bundles(self,
                       prefix: str
                       ) -> list[TDRAnvilBundleFQID]:
         spec = source.spec
-        primary = BundleEntityType.primary.value
-        supplementary = BundleEntityType.supplementary.value
-        duos = BundleEntityType.duos.value
+        primary = BundleType.primary.value
+        supplementary = BundleType.supplementary.value
+        duos = BundleType.duos.value
         rows = list(self._run_sql(f'''
-            SELECT datarepo_row_id, {primary!r} AS entity_type
+            SELECT datarepo_row_id, {primary!r} AS table_name
             FROM {backtick(self._full_table_name(spec, primary))}
             WHERE STARTS_WITH(datarepo_row_id, '{prefix}')
             UNION ALL
-            SELECT datarepo_row_id, {supplementary!r} AS entity_type
+            SELECT datarepo_row_id, {supplementary!r} AS table_name
             FROM {backtick(self._full_table_name(spec, supplementary))} AS supp
             WHERE supp.is_supplementary AND STARTS_WITH(datarepo_row_id, '{prefix}')
         ''' + (
@@ -201,7 +201,7 @@ def _list_bundles(self,
             if config.duos_service_url is None else
             f'''
             UNION ALL
-            SELECT datarepo_row_id, {duos!r} AS entity_type
+            SELECT datarepo_row_id, {duos!r} AS table_name
             FROM {backtick(self._full_table_name(spec, duos))}
             '''
         )))
@@ -218,7 +218,7 @@ def _list_bundles(self,
             # single dataset. This verification is performed independently and
             # concurrently for every partition, but only one partition actually
             # emits the bundle.
-            if row['entity_type'] == duos:
+            if row['table_name'] == duos:
                 require(0 == duos_count)
                 duos_count += 1
                 # Ensure that one partition will always contain the DUOS bundle
@@ -229,43 +229,43 @@ def _list_bundles(self,
                 source=source,
                 uuid=bundle_uuid,
                 version=self._version,
-                entity_type=BundleEntityType(row['entity_type'])
+                table_name=BundleType(row['table_name'])
             ))
         return bundles
 
     def resolve_bundle(self, fqid: SourcedBundleFQIDJSON) -> TDRAnvilBundleFQID:
-        if 'entity_type' not in fqid:
-            # Resolution of bundles without entity type is expensive, so we only
-            # support it during canning.
-            assert not config.is_in_lambda, ('Bundle FQID lacks entity type', fqid)
+        if 'table_name' not in fqid:
+            # Resolution of bundles without the table name is expensive, so we
+            # only support it during canning.
+            assert not config.is_in_lambda, ('Bundle FQID lacks table name', fqid)
             source = self.source_from_json(fqid['source'])
             entity_id = uuids.change_version(fqid['uuid'],
                                              self.bundle_uuid_version,
                                              self.datarepo_row_uuid_version)
             rows = self._run_sql(' UNION ALL '.join((
                 f'''
-                SELECT {entity_type.value!r} AS entity_type
-                FROM {backtick(self._full_table_name(source.spec, entity_type.value))}
+                SELECT {bundle_type.value!r} AS table_name
+                FROM {backtick(self._full_table_name(source.spec, bundle_type.value))}
                 WHERE datarepo_row_id = {entity_id!r}
                 '''
-                for entity_type in BundleEntityType
+                for bundle_type in BundleType
             )))
             fqid = {**fqid, **one(rows)}
         return super().resolve_bundle(fqid)
 
     def _emulate_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle:
-        if bundle_fqid.entity_type is BundleEntityType.primary:
+        if bundle_fqid.table_name is BundleType.primary:
             log.info('Bundle %r is a primary bundle', bundle_fqid.uuid)
             return self._primary_bundle(bundle_fqid)
-        elif bundle_fqid.entity_type is BundleEntityType.supplementary:
+        elif bundle_fqid.table_name is BundleType.supplementary:
             log.info('Bundle %r is a supplementary bundle', bundle_fqid.uuid)
             return self._supplementary_bundle(bundle_fqid)
-        elif bundle_fqid.entity_type is BundleEntityType.duos:
+        elif bundle_fqid.table_name is BundleType.duos:
             assert config.duos_service_url is not None, bundle_fqid
             log.info('Bundle %r is a DUOS bundle', bundle_fqid.uuid)
             return self._duos_bundle(bundle_fqid)
         else:
-            assert False, bundle_fqid.entity_type
+            assert False, bundle_fqid.table_name
 
     def _primary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle:
         source = bundle_fqid.source
@@ -296,15 +296,15 @@ def _primary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle:
         result = TDRAnvilBundle(fqid=bundle_fqid)
         entities_by_key: dict[KeyReference, EntityReference] = {}
         for entity_type, typed_keys in sorted(keys_by_type.items()):
-            pk_column = entity_type + '_id'
+            pk_column = entity_type.removeprefix('anvil_') + '_id'
             rows = self._retrieve_entities(source.spec, entity_type, typed_keys)
-            if entity_type == 'donor':
+            if entity_type == 'anvil_donor':
                 # We expect that the foreign key `part_of_dataset_id` is
                 # redundant for biosamples and donors. To simplify our queries,
                 # we do not follow the latter during the graph traversal.
                 # Here, we validate our expectation. Note that the key is an
                 # array for biosamples, but not for donors.
-                dataset_id: Key = one(keys_by_type['dataset'])
+                dataset_id: Key = one(keys_by_type['anvil_dataset'])
                 for row in rows:
                     donor_dataset_id = row['part_of_dataset_id']
                     require(donor_dataset_id == dataset_id, donor_dataset_id, dataset_id)
@@ -321,24 +321,24 @@ def _supplementary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBund
                                          self.bundle_uuid_version,
                                          self.datarepo_row_uuid_version)
         source = bundle_fqid.source.spec
-        bundle_entity_type = bundle_fqid.entity_type.value
+        table_name = bundle_fqid.table_name.value
         result = TDRAnvilBundle(fqid=bundle_fqid)
-        columns = self._columns(bundle_entity_type)
+        columns = self._columns(table_name)
         bundle_entity = dict(one(self._run_sql(f'''
             SELECT {', '.join(sorted(columns))}
-            FROM {backtick(self._full_table_name(source, bundle_entity_type))}
+            FROM {backtick(self._full_table_name(source, table_name))}
             WHERE datarepo_row_id = '{entity_id}'
         ''')))
-        linked_entity_type = 'dataset'
-        columns = self._columns(linked_entity_type)
+        dataset_table = 'anvil_dataset'
+        columns = self._columns(dataset_table)
         linked_entity = dict(one(self._run_sql(f'''
             SELECT {', '.join(sorted(columns))}
-            FROM {backtick(self._full_table_name(source, linked_entity_type))}
+            FROM {backtick(self._full_table_name(source, dataset_table))}
         ''')))
         link_args = {}
         for entity_type, row, arg in [
-            (bundle_entity_type, bundle_entity, 'outputs'),
-            (linked_entity_type, linked_entity, 'inputs')
+            ('anvil_file', bundle_entity, 'outputs'),
+            (dataset_table, linked_entity, 'inputs')
         ]:
             entity_ref = EntityReference(entity_type=entity_type, entity_id=row['datarepo_row_id'])
             result.add_entity(entity_ref, self._version, row)
@@ -352,7 +352,7 @@ def _duos_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle:
         entity_id = change_version(bundle_fqid.uuid,
                                    self.bundle_uuid_version,
                                    self.datarepo_row_uuid_version)
-        entity = EntityReference(entity_type=bundle_fqid.entity_type.value,
+        entity = EntityReference(entity_type='anvil_dataset',
                                  entity_id=entity_id)
         bundle = TDRAnvilBundle(fqid=bundle_fqid)
         bundle.add_entity(entity=entity,
@@ -366,27 +366,21 @@ def _bundle_entity(self, bundle_fqid: TDRAnvilBundleFQID) -> KeyReference:
         entity_id = uuids.change_version(bundle_uuid,
                                          self.bundle_uuid_version,
                                          self.datarepo_row_uuid_version)
-        entity_type = bundle_fqid.entity_type.value
-        pk_column = entity_type + '_id'
+        table_name = bundle_fqid.table_name.value
+        pk_column = table_name.removeprefix('anvil_') + '_id'
         bundle_entity = one(self._run_sql(f'''
             SELECT {pk_column}
-            FROM {backtick(self._full_table_name(source.spec, entity_type))}
+            FROM {backtick(self._full_table_name(source.spec, table_name))}
             WHERE datarepo_row_id = '{entity_id}'
         '''))[pk_column]
-        bundle_entity = KeyReference(key=bundle_entity, entity_type=entity_type)
+        bundle_entity = KeyReference(key=bundle_entity, entity_type=table_name)
         log.info('Bundle UUID %r resolved to primary key %r in table %r',
-                 bundle_uuid, bundle_entity.key, entity_type)
+                 bundle_uuid, bundle_entity.key, table_name)
         return bundle_entity
 
-    def _full_table_name(self, source: TDRSourceSpec, table_name: str) -> str:
-        prefixed = f'anvil_{table_name}'
-        if prefixed in self._schema_columns:
-            table_name = prefixed
-        return super()._full_table_name(source, table_name)
-
     def _consolidate_by_type(self, entities: Keys) -> MutableKeysByType:
         result = {
-            table['name'].removeprefix('anvil_'): set()
+            table['name']: set()
             for table in anvil_schema['tables']
         }
         for e in entities:
@@ -398,8 +392,8 @@ def _follow_upstream(self,
                          entities: KeysByType
                          ) -> KeyLinks:
         return set.union(
-            self._upstream_from_files(source, entities['file']),
-            self._upstream_from_biosamples(source, entities['biosample']),
+            self._upstream_from_files(source, entities['anvil_file']),
+            self._upstream_from_biosamples(source, entities['anvil_biosample']),
             # The direction of the edges linking donors to diagnoses is
             # contentious. Currently, we model diagnoses as being upstream from
             # donors. This is counterintuitive, but has two important practical
@@ -426,7 +420,7 @@ def _follow_upstream(self,
             # entities that are upstream from donors are datasets, which do not
             # perform a traversal and are treated as being linked to every
             # entity in the bundle regardless of the edges in the graph.
-            self._diagnoses_from_donors(source, entities['donor'])
+            self._diagnoses_from_donors(source, entities['anvil_donor'])
         )
 
     def _follow_downstream(self,
@@ -434,8 +428,8 @@ def _follow_downstream(self,
                            entities: KeysByType
                            ) -> KeyLinks:
         return set.union(
-            self._downstream_from_biosamples(source, entities['biosample']),
-            self._downstream_from_files(source, entities['file'])
+            self._downstream_from_biosamples(source, entities['anvil_biosample']),
+            self._downstream_from_files(source, entities['anvil_file'])
         )
 
     def _upstream_from_biosamples(self,
@@ -445,19 +439,19 @@ def _upstream_from_biosamples(self,
         if biosample_ids:
             rows = self._run_sql(f'''
                 SELECT b.biosample_id, b.donor_id, b.part_of_dataset_id
-                FROM {backtick(self._full_table_name(source, 'biosample'))} AS b
+                FROM {backtick(self._full_table_name(source, 'anvil_biosample'))} AS b
                 WHERE b.biosample_id IN ({', '.join(map(repr, biosample_ids))})
             ''')
             result: KeyLinks = set()
             for row in rows:
-                downstream_ref = KeyReference(entity_type='biosample',
+                downstream_ref = KeyReference(entity_type='anvil_biosample',
                                               key=row['biosample_id'])
                 result.add(KeyLink(outputs={downstream_ref},
-                                   inputs={KeyReference(entity_type='dataset',
+                                   inputs={KeyReference(entity_type='anvil_dataset',
                                                         key=one(row['part_of_dataset_id']))}))
                 for donor_id in row['donor_id']:
                     result.add(KeyLink(outputs={downstream_ref},
-                                       inputs={KeyReference(entity_type='donor',
+                                       inputs={KeyReference(entity_type='anvil_donor',
                                                             key=donor_id)}))
             return result
         else:
@@ -470,53 +464,53 @@ def _upstream_from_files(self,
         if file_ids:
             rows = self._run_sql(f'''
                 WITH file AS (
-                  SELECT f.file_id FROM {backtick(self._full_table_name(source, 'file'))} AS f
+                  SELECT f.file_id FROM {backtick(self._full_table_name(source, 'anvil_file'))} AS f
                   WHERE f.file_id IN ({', '.join(map(repr, file_ids))})
                 )
                 SELECT
                       f.file_id AS generated_file_id,
-                      'alignmentactivity' AS activity_table,
+                      'anvil_alignmentactivity' AS activity_table,
                       ama.alignmentactivity_id AS activity_id,
                       ama.used_file_id AS uses_file_id,
                       [] AS uses_biosample_id,
                   FROM file AS f
-                  JOIN {backtick(self._full_table_name(source, 'alignmentactivity'))} AS ama
+                  JOIN {backtick(self._full_table_name(source, 'anvil_alignmentactivity'))} AS ama
                     ON f.file_id IN UNNEST(ama.generated_file_id)
                 UNION ALL SELECT
                       f.file_id,
-                      'assayactivity',
+                      'anvil_assayactivity',
                       aya.assayactivity_id,
                       [],
                       aya.used_biosample_id,
                   FROM file AS f
-                  JOIN {backtick(self._full_table_name(source, 'assayactivity'))} AS aya
+                  JOIN {backtick(self._full_table_name(source, 'anvil_assayactivity'))} AS aya
                     ON f.file_id IN UNNEST(aya.generated_file_id)
                 UNION ALL SELECT
                       f.file_id,
-                      'sequencingactivity',
+                      'anvil_sequencingactivity',
                       sqa.sequencingactivity_id,
                       [],
                       sqa.used_biosample_id,
                   FROM file AS f
-                  JOIN {backtick(self._full_table_name(source, 'sequencingactivity'))} AS sqa
+                  JOIN {backtick(self._full_table_name(source, 'anvil_sequencingactivity'))} AS sqa
                     ON f.file_id IN UNNEST(sqa.generated_file_id)
                 UNION ALL SELECT
                     f.file_id,
-                    'variantcallingactivity',
+                    'anvil_variantcallingactivity',
                     vca.variantcallingactivity_id,
                     vca.used_file_id,
                     []
                   FROM file AS f
-                  JOIN {backtick(self._full_table_name(source, 'variantcallingactivity'))} AS vca
+                  JOIN {backtick(self._full_table_name(source, 'anvil_variantcallingactivity'))} AS vca
                     ON f.file_id IN UNNEST(vca.generated_file_id)
                 UNION ALL SELECT
                     f.file_id,
-                    'activity',
+                    'anvil_activity',
                     a.activity_id,
                     a.used_file_id,
                     a.used_biosample_id,
                   FROM file AS f
-                  JOIN {backtick(self._full_table_name(source, 'activity'))} AS a
+                  JOIN {backtick(self._full_table_name(source, 'anvil_activity'))} AS a
                     ON f.file_id IN UNNEST(a.generated_file_id)
             ''')
             return {
@@ -525,11 +519,11 @@ def _upstream_from_files(self,
                     # The generated link is not a complete representation of the
                     # upstream activity because it does not include generated files
                     # that are not ancestors of the downstream file
-                    outputs={KeyReference(entity_type='file', key=row['generated_file_id'])},
+                    outputs={KeyReference(entity_type='anvil_file', key=row['generated_file_id'])},
                     inputs={
                         KeyReference(entity_type=entity_type, key=key)
-                        for entity_type, column in [('file', 'uses_file_id'),
-                                                    ('biosample', 'uses_biosample_id')]
+                        for entity_type, column in [('anvil_file', 'uses_file_id'),
+                                                    ('anvil_biosample', 'uses_biosample_id')]
                         for key in row[column]
                     }
                 )
@@ -545,12 +539,12 @@ def _diagnoses_from_donors(self,
         if donor_ids:
             rows = self._run_sql(f'''
                 SELECT dgn.donor_id, dgn.diagnosis_id
-                FROM {backtick(self._full_table_name(source, 'diagnosis'))} as dgn
+                FROM {backtick(self._full_table_name(source, 'anvil_diagnosis'))} as dgn
                 WHERE dgn.donor_id IN ({', '.join(map(repr, donor_ids))})
             ''')
             return {
-                KeyLink(inputs={KeyReference(key=row['diagnosis_id'], entity_type='diagnosis')},
-                        outputs={KeyReference(key=row['donor_id'], entity_type='donor')},
+                KeyLink(inputs={KeyReference(key=row['diagnosis_id'], entity_type='anvil_diagnosis')},
+                        outputs={KeyReference(key=row['donor_id'], entity_type='anvil_donor')},
                         activity=None)
                 for row in rows
             }
@@ -566,24 +560,24 @@ def _downstream_from_biosamples(self,
                 WITH activities AS (
                     SELECT
                         sqa.sequencingactivity_id as activity_id,
-                        'sequencingactivity' as activity_table,
+                        'anvil_sequencingactivity' as activity_table,
                         sqa.used_biosample_id,
                         sqa.generated_file_id
-                    FROM {backtick(self._full_table_name(source, 'sequencingactivity'))} AS sqa
+                    FROM {backtick(self._full_table_name(source, 'anvil_sequencingactivity'))} AS sqa
                     UNION ALL
                     SELECT
                         aya.assayactivity_id,
-                        'assayactivity',
+                        'anvil_assayactivity',
                         aya.used_biosample_id,
                         aya.generated_file_id,
-                    FROM {backtick(self._full_table_name(source, 'assayactivity'))} AS aya
+                    FROM {backtick(self._full_table_name(source, 'anvil_assayactivity'))} AS aya
                     UNION ALL
                     SELECT
                         a.activity_id,
-                        'activity',
+                        'anvil_activity',
                         a.used_biosample_id,
                         a.generated_file_id,
-                    FROM {backtick(self._full_table_name(source, 'activity'))} AS a
+                    FROM {backtick(self._full_table_name(source, 'anvil_activity'))} AS a
                 )
                 SELECT
                     biosample_id,
@@ -594,9 +588,9 @@ def _downstream_from_biosamples(self,
                 WHERE biosample_id IN ({', '.join(map(repr, biosample_ids))})
             ''')
             return {
-                KeyLink(inputs={KeyReference(key=row['biosample_id'], entity_type='biosample')},
+                KeyLink(inputs={KeyReference(key=row['biosample_id'], entity_type='anvil_biosample')},
                         outputs={
-                            KeyReference(key=output_id, entity_type='file')
+                            KeyReference(key=output_id, entity_type='anvil_file')
                             for output_id in row['generated_file_id']
                         },
                         activity=KeyReference(key=row['activity_id'], entity_type=row['activity_table']))
@@ -614,22 +608,22 @@ def _downstream_from_files(self,
                 WITH activities AS (
                     SELECT
                         ala.alignmentactivity_id AS activity_id,
-                        'alignmentactivity' AS activity_table,
+                        'anvil_alignmentactivity' AS activity_table,
                         ala.used_file_id,
                         ala.generated_file_id
-                    FROM {backtick(self._full_table_name(source, 'alignmentactivity'))} AS ala
+                    FROM {backtick(self._full_table_name(source, 'anvil_alignmentactivity'))} AS ala
                     UNION ALL SELECT
                         vca.variantcallingactivity_id,
-                        'variantcallingactivity',
+                        'anvil_variantcallingactivity',
                         vca.used_file_id,
                         vca.generated_file_id
-                    FROM {backtick(self._full_table_name(source, 'variantcallingactivity'))} AS vca
+                    FROM {backtick(self._full_table_name(source, 'anvil_variantcallingactivity'))} AS vca
                     UNION ALL SELECT
                         a.activity_id,
-                        'activity',
+                        'anvil_activity',
                         a.used_file_id,
                         a.generated_file_id
-                    FROM {backtick(self._full_table_name(source, 'activity'))} AS a
+                    FROM {backtick(self._full_table_name(source, 'anvil_activity'))} AS a
                 )
                 SELECT
                     used_file_id,
@@ -640,9 +634,9 @@ def _downstream_from_files(self,
                 WHERE used_file_id IN ({', '.join(map(repr, file_ids))})
             ''')
             return {
-                KeyLink(inputs={KeyReference(key=row['used_file_id'], entity_type='file')},
+                KeyLink(inputs={KeyReference(key=row['used_file_id'], entity_type='anvil_file')},
                         outputs={
-                            KeyReference(key=file_id, entity_type='file')
+                            KeyReference(key=file_id, entity_type='anvil_file')
                             for file_id in row['generated_file_id']
                         },
                         activity=KeyReference(key=row['activity_id'], entity_type=row['activity_table']))
@@ -657,9 +651,9 @@ def _retrieve_entities(self,
                            keys: AbstractSet[Key],
                            ) -> MutableJSONs:
         if keys:
-            table_name = self._full_table_name(source, entity_type)
             columns = self._columns(entity_type)
-            pk_column = entity_type + '_id'
+            table_name = self._full_table_name(source, entity_type)
+            pk_column = entity_type.removeprefix('anvil_') + '_id'
             assert pk_column in columns, entity_type
             log.debug('Retrieving %i entities of type %r ...', len(keys), entity_type)
             rows = self._run_sql(f'''
@@ -693,7 +687,7 @@ def convert_column(value):
         for table in anvil_schema['tables']
     }
 
-    def _columns(self, entity_type: EntityType) -> set[str]:
-        columns = set(self._schema_columns[f'anvil_{entity_type}'])
+    def _columns(self, table_name: str) -> set[str]:
+        columns = set(self._schema_columns[table_name])
         columns.add('datarepo_row_id')
         return columns
diff --git a/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json b/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json
index 89a1b7b8e..7da9d5f3c 100644
--- a/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json
+++ b/test/indexer/data/2370f948-2783-aeb6-afea-e022897f4dcf.tdr.anvil.json
@@ -1,6 +1,6 @@
 {
     "entities": {
-        "dataset/2370f948-2783-4eb6-afea-e022897f4dcf": {
+        "anvil_dataset/2370f948-2783-4eb6-afea-e022897f4dcf": {
             "description": "Study description from DUOS",
             "version": "2022-06-01T00:00:00.000000Z"
         }
diff --git a/test/indexer/data/6b0f6c0f-5d80-a242-accb-840921351cd5.tdr.anvil.json b/test/indexer/data/6b0f6c0f-5d80-a242-accb-840921351cd5.tdr.anvil.json
index df83b4bbd..e689fbe3f 100644
--- a/test/indexer/data/6b0f6c0f-5d80-a242-accb-840921351cd5.tdr.anvil.json
+++ b/test/indexer/data/6b0f6c0f-5d80-a242-accb-840921351cd5.tdr.anvil.json
@@ -1,6 +1,6 @@
 {
     "entities": {
-        "dataset/2370f948-2783-4eb6-afea-e022897f4dcf": {
+        "anvil_dataset/2370f948-2783-4eb6-afea-e022897f4dcf": {
             "consent_group": [
                 "DS-BDIS"
             ],
@@ -23,7 +23,7 @@
             "title": "ANVIL_CMG_UWASH_DS_BDIS",
             "version": "2022-06-01T00:00:00.000000Z"
         },
-        "file/6b0f6c0f-5d80-4242-accb-840921351cd5": {
+        "anvil_file/6b0f6c0f-5d80-4242-accb-840921351cd5": {
             "data_modality": [],
             "datarepo_row_id": "6b0f6c0f-5d80-4242-accb-840921351cd5",
             "file_format": ".txt",
@@ -46,11 +46,11 @@
     "links": [
         {
             "inputs": [
-                "dataset/2370f948-2783-4eb6-afea-e022897f4dcf"
+                "anvil_dataset/2370f948-2783-4eb6-afea-e022897f4dcf"
             ],
             "activity": null,
             "outputs": [
-                "file/6b0f6c0f-5d80-4242-accb-840921351cd5"
+                "anvil_file/6b0f6c0f-5d80-4242-accb-840921351cd5"
             ]
         }
     ]
diff --git a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json
index 015c0978b..5d3fd8243 100644
--- a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json
+++ b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.results.json
@@ -14,7 +14,7 @@
                             "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051"
                         ],
                         "activity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -256,7 +256,7 @@
                             "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051"
                         ],
                         "activity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -431,7 +431,7 @@
     {
         "_index": "azul_v2_nadove4_test_replica",
         "_type": "_doc",
-        "_id": "sequencingactivity_1509ef40-d1ba-440d-b298-16b7c173dcd4_deaef50719859a3e58b9696a0ca061cf9750e7bb",
+        "_id": "anvil_sequencingactivity_1509ef40-d1ba-440d-b298-16b7c173dcd4_deaef50719859a3e58b9696a0ca061cf9750e7bb",
         "_score": 1.0,
         "_source": {
             "entity_id": "1509ef40-d1ba-440d-b298-16b7c173dcd4",
@@ -478,7 +478,7 @@
                             "18b3be87-e26b-4376-0d8d-c1e370e90e07"
                         ],
                         "activity_table": [
-                            "sequencingactivity"
+                            "anvil_sequencingactivity"
                         ],
                         "activity_type": [
                             "Sequencing"
@@ -690,7 +690,7 @@
                             "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051"
                         ],
                         "activity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -865,7 +865,7 @@
     {
         "_index": "azul_v2_nadove4_test_replica",
         "_type": "_doc",
-        "_id": "file_15b76f9c-6b46-433f-851d-34e89f1b9ba6_aec7ac65991cc6ea974a7a86fc30ec96c081cfcf",
+        "_id": "anvil_file_15b76f9c-6b46-433f-851d-34e89f1b9ba6_aec7ac65991cc6ea974a7a86fc30ec96c081cfcf",
         "_score": 1.0,
         "_source": {
             "entity_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6",
@@ -897,7 +897,7 @@
     {
         "_index": "azul_v2_nadove4_test_replica",
         "_type": "_doc",
-        "_id": "diagnosis_15d85d30-ad4a-4f50-87a8-a27f59dd1b5f_fc4d805825bb627c93588467d22caac1b76f4a17",
+        "_id": "anvil_diagnosis_15d85d30-ad4a-4f50-87a8-a27f59dd1b5f_fc4d805825bb627c93588467d22caac1b76f4a17",
         "_score": 1.0,
         "_source": {
             "entity_id": "15d85d30-ad4a-4f50-87a8-a27f59dd1b5f",
@@ -953,7 +953,7 @@
                             "a60c5138-3749-f7cb-8714-52d389ad5231"
                         ],
                         "activity_table": [
-                            "sequencingactivity"
+                            "anvil_sequencingactivity"
                         ],
                         "activity_type": [
                             "Sequencing"
@@ -1252,7 +1252,7 @@
                             "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051"
                         ],
                         "activity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -1270,7 +1270,7 @@
                             "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b"
                         ],
                         "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -1472,7 +1472,7 @@
     {
         "_index": "azul_v2_nadove4_test_replica",
         "_type": "_doc",
-        "_id": "dataset_2370f948-2783-4eb6-afea-e022897f4dcf_f10ae38d485330bf8fc7a95f7eb06626b984ad6a",
+        "_id": "anvil_dataset_2370f948-2783-4eb6-afea-e022897f4dcf_f10ae38d485330bf8fc7a95f7eb06626b984ad6a",
         "_score": 1.0,
         "_source": {
             "entity_id": "2370f948-2783-4eb6-afea-e022897f4dcf",
@@ -1523,7 +1523,7 @@
                             "a60c5138-3749-f7cb-8714-52d389ad5231"
                         ],
                         "activity_table": [
-                            "sequencingactivity"
+                            "anvil_sequencingactivity"
                         ],
                         "activity_type": [
                             "Sequencing"
@@ -1723,7 +1723,7 @@
     {
         "_index": "azul_v2_nadove4_test_replica",
         "_type": "_doc",
-        "_id": "file_3b17377b-16b1-431c-9967-e5d01fc5923f_669b7664f4b6865a0604285f923995acaf84b4ab",
+        "_id": "anvil_file_3b17377b-16b1-431c-9967-e5d01fc5923f_669b7664f4b6865a0604285f923995acaf84b4ab",
         "_score": 1.0,
         "_source": {
             "entity_id": "3b17377b-16b1-431c-9967-e5d01fc5923f",
@@ -1767,7 +1767,7 @@
                             "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b"
                         ],
                         "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -1954,7 +1954,7 @@
                             "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b"
                         ],
                         "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -2184,7 +2184,7 @@
     {
         "_index": "azul_v2_nadove4_test_replica",
         "_type": "_doc",
-        "_id": "sequencingactivity_816e364e-1193-4e5b-a91a-14e4b009157c_a9ad806d7904ac133ff713446e13e525dc37e263",
+        "_id": "anvil_sequencingactivity_816e364e-1193-4e5b-a91a-14e4b009157c_a9ad806d7904ac133ff713446e13e525dc37e263",
         "_score": 1.0,
         "_source": {
             "entity_id": "816e364e-1193-4e5b-a91a-14e4b009157c",
@@ -2226,7 +2226,7 @@
                             "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b"
                         ],
                         "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -2421,7 +2421,7 @@
                             "a60c5138-3749-f7cb-8714-52d389ad5231"
                         ],
                         "activity_table": [
-                            "sequencingactivity"
+                            "anvil_sequencingactivity"
                         ],
                         "activity_type": [
                             "Sequencing"
@@ -2712,7 +2712,7 @@
                             "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051"
                         ],
                         "activity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -2730,7 +2730,7 @@
                             "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b"
                         ],
                         "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -2932,7 +2932,7 @@
     {
         "_index": "azul_v2_nadove4_test_replica",
         "_type": "_doc",
-        "_id": "biosample_826dea02-e274-4ffe-aabc-eb3db63ad068_52f78e49c9a6e761dfd87e0d5e719d4e0099b80c",
+        "_id": "anvil_biosample_826dea02-e274-4ffe-aabc-eb3db63ad068_52f78e49c9a6e761dfd87e0d5e719d4e0099b80c",
         "_score": 1.0,
         "_source": {
             "entity_id": "826dea02-e274-4ffe-aabc-eb3db63ad068",
@@ -2987,7 +2987,7 @@
                             "a60c5138-3749-f7cb-8714-52d389ad5231"
                         ],
                         "activity_table": [
-                            "sequencingactivity"
+                            "anvil_sequencingactivity"
                         ],
                         "activity_type": [
                             "Sequencing"
@@ -3292,7 +3292,7 @@
                             "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051"
                         ],
                         "activity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -3310,7 +3310,7 @@
                             "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b"
                         ],
                         "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -3512,7 +3512,7 @@
     {
         "_index": "azul_v2_nadove4_test_replica",
         "_type": "_doc",
-        "_id": "diagnosis_939a4bd3-86ed-4a8a-81f4-fbe0ee673461_f0733af0a2ffd8001c329588ac51db1d72ad33b7",
+        "_id": "anvil_diagnosis_939a4bd3-86ed-4a8a-81f4-fbe0ee673461_f0733af0a2ffd8001c329588ac51db1d72ad33b7",
         "_score": 1.0,
         "_source": {
             "entity_id": "939a4bd3-86ed-4a8a-81f4-fbe0ee673461",
@@ -3568,7 +3568,7 @@
                             "a60c5138-3749-f7cb-8714-52d389ad5231"
                         ],
                         "activity_table": [
-                            "sequencingactivity"
+                            "anvil_sequencingactivity"
                         ],
                         "activity_type": [
                             "Sequencing"
@@ -3865,7 +3865,7 @@
                             "sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051"
                         ],
                         "activity_id": "18b3be87-e26b-4376-0d8d-c1e370e90e07",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -3883,7 +3883,7 @@
                             "sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b"
                         ],
                         "activity_id": "a60c5138-3749-f7cb-8714-52d389ad5231",
-                        "activity_table": "sequencingactivity",
+                        "activity_table": "anvil_sequencingactivity",
                         "activity_type": "Sequencing",
                         "assay_type": [
                             "~null"
@@ -4085,7 +4085,7 @@
     {
         "_index": "azul_v2_nadove4_test_replica",
         "_type": "_doc",
-        "_id": "donor_bfd991f2-2797-4083-972a-da7c6d7f1b2e_4ac20193963c27e42be696a6006a071f6cee4472",
+        "_id": "anvil_donor_bfd991f2-2797-4083-972a-da7c6d7f1b2e_4ac20193963c27e42be696a6006a071f6cee4472",
         "_score": 1.0,
         "_source": {
             "entity_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e",
diff --git a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.tdr.anvil.json b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.tdr.anvil.json
index 96d468b36..9c461ee99 100644
--- a/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.tdr.anvil.json
+++ b/test/indexer/data/826dea02-e274-affe-aabc-eb3db63ad068.tdr.anvil.json
@@ -1,6 +1,6 @@
 {
     "entities": {
-        "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068": {
+        "anvil_biosample/826dea02-e274-4ffe-aabc-eb3db63ad068": {
             "anatomical_site": null,
             "apriori_cell_type": [],
             "biosample_id": "f9d40cf6-37b8-22f3-ce35-0dc614d2452b",
@@ -21,7 +21,7 @@
             ],
             "version": "2022-06-01T00:00:00.000000Z"
         },
-        "dataset/2370f948-2783-4eb6-afea-e022897f4dcf": {
+        "anvil_dataset/2370f948-2783-4eb6-afea-e022897f4dcf": {
             "consent_group": [
                 "DS-BDIS"
             ],
@@ -44,7 +44,7 @@
             "title": "ANVIL_CMG_UWASH_DS_BDIS",
             "version": "2022-06-01T00:00:00.000000Z"
         },
-        "diagnosis/15d85d30-ad4a-4f50-87a8-a27f59dd1b5f": {
+        "anvil_diagnosis/15d85d30-ad4a-4f50-87a8-a27f59dd1b5f": {
             "datarepo_row_id": "15d85d30-ad4a-4f50-87a8-a27f59dd1b5f",
             "diagnosis_age_lower_bound": null,
             "diagnosis_age_unit": null,
@@ -66,7 +66,7 @@
             ],
             "version": "2022-06-01T00:00:00.000000Z"
         },
-        "diagnosis/939a4bd3-86ed-4a8a-81f4-fbe0ee673461": {
+        "anvil_diagnosis/939a4bd3-86ed-4a8a-81f4-fbe0ee673461": {
             "datarepo_row_id": "939a4bd3-86ed-4a8a-81f4-fbe0ee673461",
             "diagnosis_age_lower_bound": null,
             "diagnosis_age_unit": null,
@@ -88,7 +88,7 @@
             ],
             "version": "2022-06-01T00:00:00.000000Z"
         },
-        "donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e": {
+        "anvil_donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e": {
             "datarepo_row_id": "bfd991f2-2797-4083-972a-da7c6d7f1b2e",
             "donor_id": "1e2bd7e5-f45e-a391-daea-7c060be76acd",
             "genetic_ancestry": [],
@@ -103,7 +103,7 @@
             ],
             "version": "2022-06-01T00:00:00.000000Z"
         },
-        "file/15b76f9c-6b46-433f-851d-34e89f1b9ba6": {
+        "anvil_file/15b76f9c-6b46-433f-851d-34e89f1b9ba6": {
             "data_modality": [],
             "datarepo_row_id": "15b76f9c-6b46-433f-851d-34e89f1b9ba6",
             "file_format": ".vcf.gz",
@@ -122,7 +122,7 @@
             "sha256": "",
             "crc32": ""
         },
-        "file/3b17377b-16b1-431c-9967-e5d01fc5923f": {
+        "anvil_file/3b17377b-16b1-431c-9967-e5d01fc5923f": {
             "data_modality": [],
             "datarepo_row_id": "3b17377b-16b1-431c-9967-e5d01fc5923f",
             "file_format": ".bam",
@@ -141,7 +141,7 @@
             "sha256": "",
             "crc32": ""
         },
-        "sequencingactivity/1509ef40-d1ba-440d-b298-16b7c173dcd4": {
+        "anvil_sequencingactivity/1509ef40-d1ba-440d-b298-16b7c173dcd4": {
             "activity_type": "Sequencing",
             "assay_type": [],
             "data_modality": [],
@@ -158,7 +158,7 @@
             ],
             "version": "2022-06-01T00:00:00.000000Z"
         },
-        "sequencingactivity/816e364e-1193-4e5b-a91a-14e4b009157c": {
+        "anvil_sequencingactivity/816e364e-1193-4e5b-a91a-14e4b009157c": {
             "activity_type": "Sequencing",
             "assay_type": [],
             "data_modality": [],
@@ -179,56 +179,56 @@
     "links": [
         {
             "inputs": [
-                "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068"
+                "anvil_biosample/826dea02-e274-4ffe-aabc-eb3db63ad068"
             ],
-            "activity": "sequencingactivity/816e364e-1193-4e5b-a91a-14e4b009157c",
+            "activity": "anvil_sequencingactivity/816e364e-1193-4e5b-a91a-14e4b009157c",
             "outputs": [
-                "file/3b17377b-16b1-431c-9967-e5d01fc5923f"
+                "anvil_file/3b17377b-16b1-431c-9967-e5d01fc5923f"
             ]
         },
         {
             "inputs": [
-                "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068"
+                "anvil_biosample/826dea02-e274-4ffe-aabc-eb3db63ad068"
             ],
-            "activity": "sequencingactivity/1509ef40-d1ba-440d-b298-16b7c173dcd4",
+            "activity": "anvil_sequencingactivity/1509ef40-d1ba-440d-b298-16b7c173dcd4",
             "outputs": [
-                "file/15b76f9c-6b46-433f-851d-34e89f1b9ba6"
+                "anvil_file/15b76f9c-6b46-433f-851d-34e89f1b9ba6"
             ]
         },
         {
             "inputs": [
-                "dataset/2370f948-2783-4eb6-afea-e022897f4dcf"
+                "anvil_dataset/2370f948-2783-4eb6-afea-e022897f4dcf"
             ],
             "activity": null,
             "outputs": [
-                "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068"
+                "anvil_biosample/826dea02-e274-4ffe-aabc-eb3db63ad068"
             ]
         },
         {
             "inputs": [
-                "diagnosis/15d85d30-ad4a-4f50-87a8-a27f59dd1b5f"
+                "anvil_diagnosis/15d85d30-ad4a-4f50-87a8-a27f59dd1b5f"
             ],
             "activity": null,
             "outputs": [
-                "donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e"
+                "anvil_donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e"
             ]
         },
         {
             "inputs": [
-                "diagnosis/939a4bd3-86ed-4a8a-81f4-fbe0ee673461"
+                "anvil_diagnosis/939a4bd3-86ed-4a8a-81f4-fbe0ee673461"
             ],
             "activity": null,
             "outputs": [
-                "donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e"
+                "anvil_donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e"
             ]
         },
         {
             "inputs": [
-                "donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e"
+                "anvil_donor/bfd991f2-2797-4083-972a-da7c6d7f1b2e"
             ],
             "activity": null,
             "outputs": [
-                "biosample/826dea02-e274-4ffe-aabc-eb3db63ad068"
+                "anvil_biosample/826dea02-e274-4ffe-aabc-eb3db63ad068"
             ]
         }
     ]
diff --git a/test/indexer/test_anvil.py b/test/indexer/test_anvil.py
index 3ad0d3b71..263f833d1 100644
--- a/test/indexer/test_anvil.py
+++ b/test/indexer/test_anvil.py
@@ -42,7 +42,7 @@
     tdr_anvil,
 )
 from azul.plugins.repository.tdr_anvil import (
-    BundleEntityType,
+    BundleType,
     TDRAnvilBundle,
     TDRAnvilBundleFQID,
 )
@@ -105,13 +105,13 @@ def bundle_fqid(cls,
                     *,
                     uuid,
                     version=None,
-                    entity_type=BundleEntityType.primary
+                    table_name=BundleType.primary
                     ) -> TDRAnvilBundleFQID:
         assert version is None, 'All AnVIL bundles should use the same version'
         return TDRAnvilBundleFQID(source=cls.source,
                                   uuid=uuid,
                                   version=cls.version,
-                                  entity_type=entity_type)
+                                  table_name=table_name)
 
     @classmethod
     def primary_bundle(cls) -> TDRAnvilBundleFQID:
@@ -120,12 +120,12 @@ def primary_bundle(cls) -> TDRAnvilBundleFQID:
     @classmethod
     def supplementary_bundle(cls) -> TDRAnvilBundleFQID:
         return cls.bundle_fqid(uuid='6b0f6c0f-5d80-a242-accb-840921351cd5',
-                               entity_type=BundleEntityType.supplementary)
+                               table_name=BundleType.supplementary)
 
     @classmethod
     def duos_bundle(cls) -> TDRAnvilBundleFQID:
         return cls.bundle_fqid(uuid='2370f948-2783-aeb6-afea-e022897f4dcf',
-                               entity_type=BundleEntityType.duos)
+                               table_name=BundleType.duos)
 
 
 class TestAnvilIndexer(AnvilIndexerTestCase,
@@ -199,7 +199,7 @@ def tearDown(self):
         self.index_service.delete_indices(self.catalog)
 
     def test_dataset_description(self):
-        dataset_ref = EntityReference(entity_type='dataset',
+        dataset_ref = EntityReference(entity_type='anvil_dataset',
                                       entity_id='2370f948-2783-4eb6-afea-e022897f4dcf')
         bundles = [self.primary_bundle(), self.duos_bundle()]
         for bundle_fqid in bundles:
diff --git a/test/integration_test.py b/test/integration_test.py
index a1c6b9941..641173336 100644
--- a/test/integration_test.py
+++ b/test/integration_test.py
@@ -152,7 +152,7 @@
     Link,
 )
 from azul.plugins.repository.tdr_anvil import (
-    BundleEntityType,
+    BundleType,
     TDRAnvilBundleFQID,
     TDRAnvilBundleFQIDJSON,
 )
@@ -166,9 +166,6 @@
     ManifestFormat,
     ManifestGenerator,
 )
-from azul.strings import (
-    pluralize,
-)
 from azul.terra import (
     ServiceAccountCredentialsProvider,
     TDRClient,
@@ -350,7 +347,7 @@ def _list_managed_access_bundles(self,
                 if not (
                     # DUOS bundles are too sparse to fulfill the managed access tests
                     config.is_anvil_enabled(catalog)
-                    and cast(TDRAnvilBundleFQID, bundle_fqid).entity_type is BundleEntityType.duos
+                    and cast(TDRAnvilBundleFQID, bundle_fqid).table_name is BundleType.duos
                 )
             )
             bundle_fqid = self.random.choice(bundle_fqids)
@@ -503,8 +500,7 @@ def _test_other_endpoints(self):
         if config.is_hca_enabled(catalog):
             bundle_index, project_index = 'bundles', 'projects'
         elif config.is_anvil_enabled(catalog):
-            bundle_index = pluralize(BundleEntityType.primary.value)
-            project_index = 'datasets'
+            bundle_index, project_index = 'biosamples', 'datasets'
         else:
             assert False, catalog
         service_paths = {
@@ -1284,17 +1280,17 @@ def _get_indexed_bundles(self,
                 # and 0 or more other entities. Biosamples only occur in primary
                 # bundles.
                 if len(hit['biosamples']) > 0:
-                    entity_type = BundleEntityType.primary
+                    table_name = BundleType.primary
                 # Supplementary bundles contain only 1 file and 1 dataset.
                 elif len(hit['files']) > 0:
-                    entity_type = BundleEntityType.supplementary
+                    table_name = BundleType.supplementary
                 # DUOS bundles contain only 1 dataset.
                 elif len(hit['datasets']) > 0:
-                    entity_type = BundleEntityType.duos
+                    table_name = BundleType.duos
                 else:
                     assert False, hit
                 bundle_fqid = cast(TDRAnvilBundleFQIDJSON, bundle_fqid)
-                bundle_fqid['entity_type'] = entity_type.value
+                bundle_fqid['table_name'] = table_name.value
             bundle_fqid = self.repository_plugin(catalog).resolve_bundle(bundle_fqid)
             indexed_fqids.add(bundle_fqid)
         return indexed_fqids
diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py
index da3bf4b3a..f57b88d01 100644
--- a/test/service/test_manifest.py
+++ b/test/service/test_manifest.py
@@ -2090,7 +2090,7 @@ def test_verbatim_jsonl_manifest(self):
         expected = {
             # Consolidate entities with the same replica (i.e. datasets)
             json_hash(entity).digest(): {
-                'type': 'anvil_' + entity_ref.entity_type,
+                'type': entity_ref.entity_type,
                 'value': entity,
             }
             for bundle in self.bundles()
diff --git a/test/service/test_response_anvil.py b/test/service/test_response_anvil.py
index 9a176000c..2d6fb44ab 100644
--- a/test/service/test_response_anvil.py
+++ b/test/service/test_response_anvil.py
@@ -65,7 +65,7 @@ def test_entity_indices(self):
                                     'sequencing:d4f6c0c4-1e11-438e-8218-cfea63b8b051'
                                 ],
                                 'activity_id': '18b3be87-e26b-4376-0d8d-c1e370e90e07',
-                                'activity_table': 'sequencingactivity',
+                                'activity_table': 'anvil_sequencingactivity',
                                 'activity_type': 'Sequencing',
                                 'assay_type': [
                                     None
@@ -186,7 +186,7 @@ def test_entity_indices(self):
                                     'sequencing:a6c663c7-6f26-4ed2-af9d-48e9c709a22b'
                                 ],
                                 'activity_id': 'a60c5138-3749-f7cb-8714-52d389ad5231',
-                                'activity_table': 'sequencingactivity',
+                                'activity_table': 'anvil_sequencingactivity',
                                 'activity_type': 'Sequencing',
                                 'assay_type': [
                                     None

From ef0637e5ea323d4e558d370cdd6bab8eeb6712a6 Mon Sep 17 00:00:00 2001
From: Noa Aviel Dove <nadove@ucsc.edu>
Date: Thu, 17 Oct 2024 15:33:54 -0700
Subject: [PATCH 6/6] Do not filter AnVIL bundles for obsolete versions

---
 src/azul/azulclient.py   | 10 ++++++----
 test/integration_test.py | 10 ++++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/azul/azulclient.py b/src/azul/azulclient.py
index 2c1f7d877..67a90a1f6 100644
--- a/src/azul/azulclient.py
+++ b/src/azul/azulclient.py
@@ -269,10 +269,12 @@ def remote_reindex_partition(self, message: JSON) -> None:
         validate_uuid_prefix(prefix)
         source = self.repository_plugin(catalog).source_from_json(source)
         bundle_fqids = self.list_bundles(catalog, source, prefix)
-        bundle_fqids = self.filter_obsolete_bundle_versions(bundle_fqids)
-        log.info('After filtering obsolete versions, '
-                 '%i bundles remain in prefix %r of source %r in catalog %r',
-                 len(bundle_fqids), prefix, str(source.spec), catalog)
+        # All AnVIL bundles and entities use the same version
+        if not config.is_anvil_enabled(catalog):
+            bundle_fqids = self.filter_obsolete_bundle_versions(bundle_fqids)
+            log.info('After filtering obsolete versions, '
+                     '%i bundles remain in prefix %r of source %r in catalog %r',
+                     len(bundle_fqids), prefix, str(source.spec), catalog)
         messages = (
             self.bundle_message(catalog, bundle_fqid)
             for bundle_fqid in bundle_fqids
diff --git a/test/integration_test.py b/test/integration_test.py
index 641173336..3d3b635ab 100644
--- a/test/integration_test.py
+++ b/test/integration_test.py
@@ -1300,10 +1300,12 @@ def _assert_catalog_complete(self,
                                  bundle_fqids: Set[SourcedBundleFQID]
                                  ) -> None:
         with self.subTest('catalog_complete', catalog=catalog):
-            expected_fqids = set(self.azul_client.filter_obsolete_bundle_versions(bundle_fqids))
-            obsolete_fqids = bundle_fqids - expected_fqids
-            if obsolete_fqids:
-                log.debug('Ignoring obsolete bundle versions %r', obsolete_fqids)
+            expected_fqids = bundle_fqids
+            if not config.is_anvil_enabled(catalog):
+                expected_fqids = set(self.azul_client.filter_obsolete_bundle_versions(expected_fqids))
+                obsolete_fqids = bundle_fqids - expected_fqids
+                if obsolete_fqids:
+                    log.debug('Ignoring obsolete bundle versions %r', obsolete_fqids)
             num_bundles = len(expected_fqids)
             timeout = 600
             log.debug('Expecting bundles %s ', sorted(expected_fqids))