Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: skip record when transformation is noop #52

Merged
merged 5 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion ecospheres_migrator/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@
)

from ecospheres_migrator.auth import authenticated, connection_infos
from ecospheres_migrator.batch import MigrateMode, SuccessTransformBatchRecord, TransformBatchRecord
from ecospheres_migrator.batch import (
MigrateMode,
SkipReasonMessage,
SuccessTransformBatchRecord,
TransformBatchRecord,
)
from ecospheres_migrator.migrator import Migrator
from ecospheres_migrator.queue import get_job, get_queue

Expand Down Expand Up @@ -151,6 +156,7 @@ def transform_job_status(job_id: str):
now=datetime.now().isoformat(timespec="seconds"),
url=url,
modes=MigrateMode,
reasons=SkipReasonMessage,
)


Expand Down
28 changes: 28 additions & 0 deletions ecospheres_migrator/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,25 @@ class FailureTransformBatchRecord(TransformBatchRecord):
error: str


class SkipReasonMessage(Enum):
"""
We don't use `SkipReason.value` for this because we pickle the reason
and want to be able to change the associated message inbetween jobs.
"""

NO_CHANGES = "Pas de modification lors de la transformation."


class SkipReason(Enum):
NO_CHANGES = "no changes"
abulte marked this conversation as resolved.
Show resolved Hide resolved


@dataclass(kw_only=True)
class SkippedTransformBatchRecord(TransformBatchRecord):
reason: SkipReason
info: str


class TransformBatch:
def __init__(self):
self.records: list[TransformBatchRecord] = []
Expand All @@ -37,6 +56,12 @@ def successes(self) -> list[SuccessTransformBatchRecord]:
def failures(self) -> list[FailureTransformBatchRecord]:
return [r for r in self.records if isinstance(r, FailureTransformBatchRecord)]

def skipped(self) -> list[SkippedTransformBatchRecord]:
return [r for r in self.records if isinstance(r, SkippedTransformBatchRecord)]

def __repr__(self):
return f"TransformBatch({len(self.records)} records, {len(self.failures())} failures, {len(self.successes())} successes, {len(self.skipped())} skipped)"

# FIXME: needed?
def to_mef(self):
mef = MefArchive()
Expand Down Expand Up @@ -84,3 +109,6 @@ def successes(self) -> list[SuccessMigrateBatchRecord]:

def failures(self) -> list[FailureMigrateBatchRecord]:
return [r for r in self.records if isinstance(r, FailureMigrateBatchRecord)]

def __repr__(self):
return f"MigrateBatch({len(self.records)} records, {len(self.failures())} failures, {len(self.successes())} successes)"
59 changes: 33 additions & 26 deletions ecospheres_migrator/migrator.py
abulte marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@
FailureTransformBatchRecord,
MigrateBatch,
MigrateMode,
SkippedTransformBatchRecord,
SkipReason,
SuccessMigrateBatchRecord,
SuccessTransformBatchRecord,
TransformBatch,
TransformBatchRecord,
)
from ecospheres_migrator.geonetwork import (
GeonetworkClient,
Expand Down Expand Up @@ -68,41 +71,47 @@ def transform(self, transformation: Path, selection: list[Record]) -> TransformB
for r in selection:
log.debug(f"Processing {r.uuid} (template={r.template} state={r.state})")
original = self.gn.get_record(r.uuid)
batch_record = TransformBatchRecord(
url=self.gn.url,
uuid=r.uuid,
template=r.template,
state=r.state,
original=xml_to_string(original),
)
if r.state and r.state.stage == WorkflowStage.WORKING_COPY:
batch.add(
FailureTransformBatchRecord(
abulte marked this conversation as resolved.
Show resolved Hide resolved
url=self.gn.url,
uuid=r.uuid,
template=r.template,
state=r.state,
original=xml_to_string(original),
**batch_record.__dict__,
error="Record has a working copy",
)
)
continue
try:
# FIXME: extract_record_info() mutates original
info = extract_record_info(original, sources)
result = transform(original, CoupledResourceLookUp="'disabled'")
# TODO: check if result != original
batch.add(
SuccessTransformBatchRecord(
url=self.gn.url,
uuid=r.uuid,
template=r.template,
state=r.state,
original=xml_to_string(original),
result=xml_to_string(result),
info=xml_to_string(info),
result = transform(original)
result_str = xml_to_string(result)
original_str = xml_to_string(original)
if result_str != original_str:
batch.add(
SuccessTransformBatchRecord(
**batch_record.__dict__,
result=result_str,
info=xml_to_string(info),
)
)
else:
batch.add(
SkippedTransformBatchRecord(
**batch_record.__dict__,
info=xml_to_string(info),
reason=SkipReason.NO_CHANGES,
)
)
)
except Exception as e:
batch.add(
FailureTransformBatchRecord(
url=self.gn.url,
uuid=r.uuid,
template=r.template,
state=r.state,
original=xml_to_string(original),
**batch_record.__dict__,
error=str(e),
)
)
Expand All @@ -117,9 +126,7 @@ def migrate(
group: int | None = None,
transform_job_id: str | None = None,
) -> MigrateBatch:
log.debug(
f"Migrating batch ({len(batch.successes())}/{len(batch.failures())}) for {self.url} (overwrite={overwrite})"
)
log.debug(f"Migrating batch {batch} for {self.url} (overwrite={overwrite})")
migrate_batch = MigrateBatch(
mode=MigrateMode.OVERWRITE if overwrite else MigrateMode.CREATE,
transform_job_id=transform_job_id,
Expand All @@ -139,7 +146,7 @@ def migrate(
)
)
else:
assert group is not None
assert group is not None, "Group must be set when not overwriting"
# TODO: publish flag
new_record = self.gn.put_record(
r.uuid, r.result, template=r.template, group=group
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<p>Job terminé.</p>
{% set successes = job.result.successes() %}
{% set failures = job.result.failures() %}
{% set skipped = job.result.skipped() %}
<div class="fr-accordions-group fr-mb-4w">
{% if failures %}
<section class="fr-accordion">
Expand Down Expand Up @@ -57,6 +58,56 @@
</div>
</section>
{% endif %}
{% if skipped %}
<section class="fr-accordion">
<h3 class="fr-accordion__title">
<button class="fr-accordion__btn"
aria-expanded="true"
aria-controls="accordion-skipped">Transformations ignorées ({{ skipped|length }})</button>
</h3>
<div class="fr-collapse" id="accordion-skipped">
<div class="fr-table fr-mb-1w" id="table-md-component">
<div class="fr-table__wrapper">
<div class="fr-table__container">
<div class="fr-table__content">
<table id="table-md">
<thead>
<tr>
<th scope="col">Statut</th>
<th scope="col">Fiche originale</th>
<th scope="col">XML original</th>
<th scope="col">Raison</th>
</tr>
</thead>
<tbody>
{% for record in skipped %}
<tr id="table-md-row-key-{{ loop.index }}"
data-row-key="{{ loop.index }}">
<td>⏭️</td>
<td>
<a href="{{ url }}/fre/catalog.search#/metadata/{{ record.uuid }}"
target="_blank"
rel="noopener">{{ record.uuid }}</a>
</td>
<td>
<a href="{{ url_for('transform_original', job_id=job.id, uuid=record.uuid) }}"
target="_blank"
rel="noopener">Voir le XML</a>
</td>
<td>
{{ reasons[record.reason.name].value }}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
</div>
</div>
</div>
</section>
{% endif %}
{% if successes %}
<section class="fr-accordion">
<h3 class="fr-accordion__title">
Expand Down
2 changes: 1 addition & 1 deletion ecospheres_migrator/transformations/change-language.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<xsl:strip-space elements="*"/>

<xsl:template match="/gmd:MD_Metadata/gmd:language/gco:CharacterString/text()">
<xsl:text>xxx</xsl:text>
<xsl:text>eng</xsl:text>
abulte marked this conversation as resolved.
Show resolved Hide resolved
</xsl:template>

<xsl:template match="@*|node()">
Expand Down
42 changes: 24 additions & 18 deletions tests/test_migrate.py
abulte marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ def get_records(migrator: Migrator, md_fixtures: list[Fixture]) -> dict[str, str
return records


def test_migrate_noop_overwrite(migrator: Migrator, md_fixtures: list[Fixture]):
"""`noop` migration in overwrite mode should update content"""
def test_migrate_change_language_overwrite(migrator: Migrator, md_fixtures: list[Fixture]):
"""`change-language` migration in overwrite mode should update content"""
records_before = get_records(migrator, md_fixtures)

batch, _ = get_transform_results("noop", migrator)
batch, _ = get_transform_results("change-language", migrator)
migrate_batch = migrator.migrate(batch, overwrite=True, group=None)
assert len(migrate_batch.successes()) == len(batch.successes())
assert len(migrate_batch.failures()) == 0
Expand All @@ -32,16 +32,16 @@ def test_migrate_noop_overwrite(migrator: Migrator, md_fixtures: list[Fixture]):
assert records_after[uuid] != records_before[uuid]


def test_migrate_noop_duplicate(
def test_migrate_change_language_duplicate(
migrator: Migrator, clean_md_fixtures: list[Fixture], group_fixture: int
):
"""
`noop` migration in duplicate mode should create new records in specific group.
`change-language` migration in duplicate mode should create new records in specific group.
Use `clean_md_fixtures` to remove all records from the group before running the test.
"""
records_before = get_records(migrator, clean_md_fixtures)

batch, _ = get_transform_results("noop", migrator)
batch, _ = get_transform_results("change-language", migrator)
migrate_batch = migrator.migrate(batch, overwrite=False, group=group_fixture)
assert len(migrate_batch.successes()) == len(batch.successes())
assert len(migrate_batch.failures()) == 0
Expand Down Expand Up @@ -104,49 +104,55 @@ def test_migrate_transform_job_id(migrator: Migrator):
assert migrate_batch.transform_job_id == "xxx"


def test_migrate_batch_records_success(migrator: Migrator, md_fixtures: list[Fixture]):
batch, _ = get_transform_results("noop", migrator)
migrate_batch = migrator.migrate(batch, overwrite=False, group=None)
def test_migrate_batch_records_success(
migrator: Migrator, md_fixtures: list[Fixture], group_fixture: int
):
batch, _ = get_transform_results("change-language", migrator)
migrate_batch = migrator.migrate(batch, overwrite=False, group=group_fixture)
assert len(migrate_batch.successes()) == len(batch.records)
for record in migrate_batch.successes():
assert record.source_uuid in [f.uuid for f in md_fixtures]
assert record.target_uuid not in [f.uuid for f in md_fixtures]
assert record.url == GN_TEST_URL
assert record.source_content is not None # TODO: check content when formatting is done
assert record.target_content is not None # TODO: check content when formatting is done
assert record.source_content is not None
assert record.target_content is not None
assert record.source_content != record.target_content
assert record.template is False


def test_migrate_batch_records_failure(migrator: Migrator, md_fixtures: list[Fixture]):
batch, _ = get_transform_results("noop", migrator)
batch, _ = get_transform_results("change-language", migrator)
with patch("ecospheres_migrator.geonetwork.GeonetworkClient.update_record") as mocked_method:
mocked_method.side_effect = Exception("Mocked update_record error")
migrate_batch = migrator.migrate(batch, overwrite=False, group=None)
assert len(migrate_batch.failures()) == len(migrate_batch.records)
for record in migrate_batch.failures():
assert record.source_uuid in [f.uuid for f in md_fixtures]
assert record.url == GN_TEST_URL
assert record.source_content is not None # TODO: check content when formatting is done
assert record.target_content is not None # TODO: check content when formatting is done
assert record.source_content is not None
assert record.target_content is not None
assert record.source_content != record.target_content
assert record.template is False
assert record.error is not None # actual error is tested below


def test_migrate_overwrite_gn_error(migrator: Migrator, md_fixtures: list[Fixture]):
batch, _ = get_transform_results("noop", migrator)
batch, _ = get_transform_results("change-language", migrator)
with patch("ecospheres_migrator.geonetwork.GeonetworkClient.update_record") as mocked_method:
mocked_method.side_effect = Exception("Mocked update_record error")
migrate_batch = migrator.migrate(batch, overwrite=True, group=None)
assert len(migrate_batch.failures()) == len(md_fixtures)
assert migrate_batch.mode == MigrateMode.OVERWRITE
assert len(migrate_batch.failures()) == len(md_fixtures)
for record in migrate_batch.failures():
assert record.error == "Mocked update_record error"


def test_migrate_duplicate_gn_error(migrator: Migrator, md_fixtures: list[Fixture]):
batch, _ = get_transform_results("noop", migrator)
batch, _ = get_transform_results("change-language", migrator)
with patch("ecospheres_migrator.geonetwork.GeonetworkClient.put_record") as mocked_method:
mocked_method.side_effect = Exception("Mocked put_record error")
migrate_batch = migrator.migrate(batch, overwrite=False, group=1)
assert len(migrate_batch.failures()) == len(md_fixtures)
assert migrate_batch.mode == MigrateMode.CREATE
assert len(migrate_batch.failures()) == len(md_fixtures)
for record in migrate_batch.failures():
assert record.error == "Mocked put_record error"
16 changes: 14 additions & 2 deletions tests/test_transform.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from pathlib import Path

from conftest import Fixture

from ecospheres_migrator.batch import TransformBatch
from ecospheres_migrator.geonetwork import Record
from ecospheres_migrator.migrator import Migrator
Expand All @@ -22,14 +24,24 @@ def get_transform_results(


def test_transform_noop(migrator: Migrator):
"""`noop` transform is always successful"""
"""`noop` transform is always skipped"""
results, selection = get_transform_results("noop", migrator)
assert len(results.successes()) == len(selection)
assert len(results.skipped()) == len(selection)
assert len(results.successes()) == 0
assert len(results.failures()) == 0


def test_transform_error(migrator: Migrator):
"""`error` transform is never successful"""
results, selection = get_transform_results("error", migrator)
assert len(results.skipped()) == 0
assert len(results.successes()) == 0
assert len(results.failures()) == len(selection)


def test_transform_change_language(migrator: Migrator, clean_md_fixtures: list[Fixture]):
"""`change-language` transform is always successful"""
results, selection = get_transform_results("change-language", migrator)
assert len(results.skipped()) == 0
assert len(results.successes()) == len(selection)
assert len(results.failures()) == 0
Loading