Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: skip record when transformation is noop #52

Merged
merged 5 commits into from
Sep 19, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: skip record when transformation is noop
abulte committed Sep 18, 2024
commit b39068848ad6dc23cf7967f0e91836826d6d7850
13 changes: 13 additions & 0 deletions ecospheres_migrator/batch.py
Original file line number Diff line number Diff line change
@@ -24,6 +24,16 @@ class FailureTransformBatchRecord(TransformBatchRecord):
error: str


class SkipReason(Enum):
NO_CHANGES = "no changes"


@dataclass(kw_only=True)
class SkippedTransformBatchRecord(TransformBatchRecord):
reason: SkipReason
info: str


class TransformBatch:
def __init__(self):
self.records: list[TransformBatchRecord] = []
@@ -37,6 +47,9 @@ def successes(self) -> list[SuccessTransformBatchRecord]:
def failures(self) -> list[FailureTransformBatchRecord]:
return [r for r in self.records if isinstance(r, FailureTransformBatchRecord)]

def skipped(self) -> list[SkippedTransformBatchRecord]:
return [r for r in self.records if isinstance(r, SkippedTransformBatchRecord)]

# FIXME: needed?
def to_mef(self):
mef = MefArchive()
54 changes: 31 additions & 23 deletions ecospheres_migrator/migrator.py
Original file line number Diff line number Diff line change
@@ -9,9 +9,12 @@
FailureTransformBatchRecord,
MigrateBatch,
MigrateMode,
SkippedTransformBatchRecord,
SkipReason,
SuccessMigrateBatchRecord,
SuccessTransformBatchRecord,
TransformBatch,
TransformBatchRecord,
)
from ecospheres_migrator.geonetwork import (
GeonetworkClient,
@@ -68,41 +71,46 @@ def transform(self, transformation: Path, selection: list[Record]) -> TransformB
for r in selection:
log.debug(f"Processing {r.uuid} (template={r.template} state={r.state})")
original = self.gn.get_record(r.uuid)
batch_record = TransformBatchRecord(
url=self.gn.url,
uuid=r.uuid,
template=r.template,
state=r.state,
original=xml_to_string(original),
)
if r.state and r.state.stage == WorkflowStage.WORKING_COPY:
batch.add(
FailureTransformBatchRecord(
url=self.gn.url,
uuid=r.uuid,
template=r.template,
state=r.state,
original=xml_to_string(original),
**batch_record.__dict__,
error="Record has a working copy",
)
)
continue
try:
info = extract_record_info(original, sources)
result = transform(original, CoupledResourceLookUp="'disabled'")
# TODO: check if result != original
batch.add(
SuccessTransformBatchRecord(
url=self.gn.url,
uuid=r.uuid,
template=r.template,
state=r.state,
original=xml_to_string(original),
result=xml_to_string(result),
info=xml_to_string(info),
result = transform(original)
result_str = xml_to_string(result)
original_str = xml_to_string(original)
if result_str != original_str:
batch.add(
SuccessTransformBatchRecord(
**batch_record.__dict__,
result=result_str,
info=xml_to_string(info),
)
)
else:
batch.add(
SkippedTransformBatchRecord(
**batch_record.__dict__,
info=xml_to_string(info),
reason=SkipReason.NO_CHANGES,
)
)
)
except Exception as e:
batch.add(
FailureTransformBatchRecord(
url=self.gn.url,
uuid=r.uuid,
template=r.template,
state=r.state,
original=xml_to_string(original),
**batch_record.__dict__,
error=str(e),
)
)
@@ -139,7 +147,7 @@ def migrate(
)
)
else:
assert group is not None
assert group is not None, "Group must be set when not overwriting"
# TODO: publish flag
new_record = self.gn.put_record(
r.uuid, r.result, template=r.template, group=group
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
<p>Job terminé.</p>
{% set successes = job.result.successes() %}
{% set failures = job.result.failures() %}
{% set skipped = job.result.skipped() %}
<div class="fr-accordions-group fr-mb-4w">
{% if failures %}
<section class="fr-accordion">
@@ -57,6 +58,60 @@
</div>
</section>
{% endif %}
{% if skipped %}
<section class="fr-accordion">
<h3 class="fr-accordion__title">
<button class="fr-accordion__btn"
aria-expanded="true"
aria-controls="accordion-skipped">Transformations ignorées ({{ skipped|length }})</button>
</h3>
<div class="fr-collapse" id="accordion-skipped">
<div class="fr-table fr-mb-1w" id="table-md-component">
<div class="fr-table__wrapper">
<div class="fr-table__container">
<div class="fr-table__content">
<table id="table-md">
<thead>
<tr>
<th scope="col">Statut</th>
<th scope="col">Fiche originale</th>
<th scope="col">XML original</th>
<th scope="col">Raison</th>
</tr>
</thead>
<tbody>
{% for record in skipped %}
<tr id="table-md-row-key-{{ loop.index }}"
data-row-key="{{ loop.index }}">
<td>⏭️</td>
<td>
<a href="{{ url }}/fre/catalog.search#/metadata/{{ record.uuid }}"
target="_blank"
rel="noopener">{{ record.uuid }}</a>
</td>
<td>
<a href="{{ url_for('transform_original', job_id=job.id, uuid=record.uuid) }}"
target="_blank"
rel="noopener">Voir le XML</a>
</td>
<td>
{% if record.reason.name == "NO_CHANGES" %}
Pas de modification lors de la transformation.
{% else %}
{{ record.reason.name }}
{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div>
</div>
</section>
{% endif %}
{% if successes %}
<section class="fr-accordion">
<h3 class="fr-accordion__title">
2 changes: 1 addition & 1 deletion ecospheres_migrator/transformations/change-language.xsl
Original file line number Diff line number Diff line change
@@ -7,7 +7,7 @@
<xsl:strip-space elements="*"/>

<xsl:template match="/gmd:MD_Metadata/gmd:language/gco:CharacterString/text()">
<xsl:text>xxx</xsl:text>
<xsl:text>eng</xsl:text>
</xsl:template>

<xsl:template match="@*|node()">
40 changes: 24 additions & 16 deletions tests/test_migrate.py
Original file line number Diff line number Diff line change
@@ -16,11 +16,11 @@ def get_records(migrator: Migrator, md_fixtures: list[Fixture]) -> dict[str, str
return records


def test_migrate_noop_overwrite(migrator: Migrator, md_fixtures: list[Fixture]):
"""`noop` migration in overwrite mode should update content"""
def test_migrate_change_language_overwrite(migrator: Migrator, md_fixtures: list[Fixture]):
"""`change-language` migration in overwrite mode should update content"""
records_before = get_records(migrator, md_fixtures)

batch, _ = get_transform_results("noop", migrator)
batch, _ = get_transform_results("change-language", migrator)
migrate_batch = migrator.migrate(batch, overwrite=True, group=None)
assert len(migrate_batch.successes()) == len(batch.successes())
assert len(migrate_batch.failures()) == 0
@@ -32,16 +32,16 @@ def test_migrate_noop_overwrite(migrator: Migrator, md_fixtures: list[Fixture]):
assert records_after[uuid] != records_before[uuid]


def test_migrate_noop_duplicate(
def test_migrate_change_language_duplicate(
migrator: Migrator, clean_md_fixtures: list[Fixture], group_fixture: int
):
"""
`noop` migration in duplicate mode should create new records in specific group.
`change-language` migration in duplicate mode should create new records in specific group.
Use `clean_md_fixtures` to remove all records from the group before running the test.
"""
records_before = get_records(migrator, clean_md_fixtures)

batch, _ = get_transform_results("noop", migrator)
batch, _ = get_transform_results("change-language", migrator)
migrate_batch = migrator.migrate(batch, overwrite=False, group=group_fixture)
assert len(migrate_batch.successes()) == len(batch.successes())
assert len(migrate_batch.failures()) == 0
@@ -104,49 +104,57 @@ def test_migrate_transform_job_id(migrator: Migrator):
assert migrate_batch.transform_job_id == "xxx"


def test_migrate_batch_records_success(migrator: Migrator, md_fixtures: list[Fixture]):
batch, _ = get_transform_results("noop", migrator)
migrate_batch = migrator.migrate(batch, overwrite=False, group=None)
def test_migrate_batch_records_success(
migrator: Migrator, md_fixtures: list[Fixture], group_fixture: int
):
batch, _ = get_transform_results("change-language", migrator)
migrate_batch = migrator.migrate(batch, overwrite=False, group=group_fixture)
assert len(migrate_batch.successes())
for record in migrate_batch.successes():
assert record.source_uuid in [f.uuid for f in md_fixtures]
assert record.target_uuid not in [f.uuid for f in md_fixtures]
assert record.url == GN_TEST_URL
assert record.source_content is not None # TODO: check content when formatting is done
assert record.target_content is not None # TODO: check content when formatting is done
assert record.source_content is not None
assert record.target_content is not None
assert record.source_content != record.target_content
assert record.template is False


def test_migrate_batch_records_failure(migrator: Migrator, md_fixtures: list[Fixture]):
batch, _ = get_transform_results("noop", migrator)
batch, _ = get_transform_results("change-language", migrator)
with patch("ecospheres_migrator.geonetwork.GeonetworkClient.update_record") as mocked_method:
mocked_method.side_effect = Exception("Mocked update_record error")
migrate_batch = migrator.migrate(batch, overwrite=False, group=None)
assert len(migrate_batch.failures())
for record in migrate_batch.failures():
assert record.source_uuid in [f.uuid for f in md_fixtures]
assert record.url == GN_TEST_URL
assert record.source_content is not None # TODO: check content when formatting is done
assert record.target_content is not None # TODO: check content when formatting is done
assert record.source_content is not None
assert record.target_content is not None
assert record.source_content != record.target_content
assert record.template is False
assert record.error is not None # actual error is tested below


def test_migrate_overwrite_gn_error(migrator: Migrator, md_fixtures: list[Fixture]):
batch, _ = get_transform_results("noop", migrator)
batch, _ = get_transform_results("change-language", migrator)
with patch("ecospheres_migrator.geonetwork.GeonetworkClient.update_record") as mocked_method:
mocked_method.side_effect = Exception("Mocked update_record error")
migrate_batch = migrator.migrate(batch, overwrite=True, group=None)
assert len(migrate_batch.failures()) == len(md_fixtures)
assert migrate_batch.mode == MigrateMode.OVERWRITE
assert len(migrate_batch.failures())
for record in migrate_batch.failures():
assert record.error == "Mocked update_record error"


def test_migrate_duplicate_gn_error(migrator: Migrator, md_fixtures: list[Fixture]):
batch, _ = get_transform_results("noop", migrator)
batch, _ = get_transform_results("change-language", migrator)
with patch("ecospheres_migrator.geonetwork.GeonetworkClient.put_record") as mocked_method:
mocked_method.side_effect = Exception("Mocked put_record error")
migrate_batch = migrator.migrate(batch, overwrite=False, group=1)
assert len(migrate_batch.failures()) == len(md_fixtures)
assert migrate_batch.mode == MigrateMode.CREATE
assert len(migrate_batch.failures())
for record in migrate_batch.failures():
assert record.error == "Mocked put_record error"
16 changes: 14 additions & 2 deletions tests/test_transform.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from pathlib import Path

from conftest import Fixture

from ecospheres_migrator.batch import TransformBatch
from ecospheres_migrator.geonetwork import Record
from ecospheres_migrator.migrator import Migrator
@@ -22,14 +24,24 @@ def get_transform_results(


def test_transform_noop(migrator: Migrator):
"""`noop` transform is always successful"""
"""`noop` transform is always skipped"""
results, selection = get_transform_results("noop", migrator)
assert len(results.successes()) == len(selection)
assert len(results.skipped()) == len(selection)
assert len(results.successes()) == 0
assert len(results.failures()) == 0


def test_transform_error(migrator: Migrator):
"""`error` transform is never successful"""
results, selection = get_transform_results("error", migrator)
assert len(results.skipped()) == 0
assert len(results.successes()) == 0
assert len(results.failures()) == len(selection)


def test_transform_change_language(migrator: Migrator, clean_md_fixtures: list[Fixture]):
"""`change-language` transform is always successful"""
results, selection = get_transform_results("change-language", migrator)
assert len(results.skipped()) == 0
assert len(results.successes()) == len(selection)
assert len(results.failures()) == 0