From 47f9395836de4048bcb725c94070bb43d1c995d3 Mon Sep 17 00:00:00 2001 From: Enrique Noriega Date: Tue, 19 Mar 2024 11:56:10 -0700 Subject: [PATCH 1/2] Added support for optional annotation of AMR linking to `integrated-text-extractions` --- skema/rest/integrated_text_reading_proxy.py | 33 +++++++++++++++++++-- skema/rest/schema.py | 5 ++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/skema/rest/integrated_text_reading_proxy.py b/skema/rest/integrated_text_reading_proxy.py index 78b1076d7da..8ed11de8d15 100644 --- a/skema/rest/integrated_text_reading_proxy.py +++ b/skema/rest/integrated_text_reading_proxy.py @@ -408,7 +408,7 @@ def integrated_extractions( ) async def integrated_text_extractions( response: Response, - texts: TextReadingInputDocuments, + inputs: TextReadingInputDocuments, annotate_skema: bool = True, annotate_mit: bool = True, ) -> TextReadingAnnotationsOutput: @@ -428,10 +428,12 @@ async def integrated_text_extractions( ``` """ # Get the input plain texts - texts = texts.texts + texts = inputs.texts + + amrs = inputs.amrs # Run the text extractors - return integrated_extractions( + extractions = integrated_extractions( response, annotate_text_with_skema, texts, @@ -440,6 +442,31 @@ async def integrated_text_extractions( annotate_mit ) + # Do the alignment + aligned_amrs = list() + if len(amrs) > 0: + # Build an UploadFile instance from the extractions + json_extractions = extractions.model_dump_json() + extractions_ufile = UploadFile(file=io.BytesIO(json_extractions.encode('utf-8'))) + for amr in amrs: + # amr = json.loads(amr) + amr_ufile = UploadFile(file=io.BytesIO(amr.encode('utf-8'))) + try: + aligned_amr = metal_proxy.link_amr( + amr_file=amr_ufile, + text_extractions_file=extractions_ufile) + aligned_amrs.append(aligned_amr) + except Exception as e: + error = TextReadingError(pipeline="AMR Linker", message=f"Error annotating {amr.filename}: {e}") + if extractions.generalized_errors is None: + extractions.generalized_errors = [error] + else: + extractions.generalized_errors.append(error) + + extractions.aligned_amrs = aligned_amrs + + return extractions + @router.post( "/integrated-pdf-extractions", diff --git a/skema/rest/schema.py b/skema/rest/schema.py index 56ffdbcf8ab..71e9b92d749 100644 --- a/skema/rest/schema.py +++ b/skema/rest/schema.py @@ -5,6 +5,7 @@ from typing import List, Optional, Dict, Any from askem_extractions.data_model import AttributeCollection +from fastapi import UploadFile from pydantic import BaseModel, Field # see https://github.com/pydantic/pydantic/issues/5821#issuecomment-1559196859 @@ -168,6 +169,10 @@ class TextReadingInputDocuments(BaseModel): description="List of input plain texts to be annotated by the text reading pipelines", examples=[["x = 0", "y = 1", "I: Infected population"]], ) + amrs: List[str] = Field( + description="List of optional AMR files to align with the extractions", + examples=[[]] + ) class TextReadingError(BaseModel): From 8bed04672eb79c48e3403f602de2a5ffe3c51feb Mon Sep 17 00:00:00 2001 From: Enrique Noriega Date: Tue, 19 Mar 2024 12:43:13 -0700 Subject: [PATCH 2/2] Fixed unit test --- skema/rest/tests/test_integrated_text_reading_proxy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skema/rest/tests/test_integrated_text_reading_proxy.py b/skema/rest/tests/test_integrated_text_reading_proxy.py index d4e726e2440..900954beabc 100644 --- a/skema/rest/tests/test_integrated_text_reading_proxy.py +++ b/skema/rest/tests/test_integrated_text_reading_proxy.py @@ -23,7 +23,8 @@ def test_text_integrated_extractions(): "x = 0", "y = 1", "I: Infected population" - ] + ], + "amrs": [] } response = client.post(f"/integrated-text-extractions", params=params, json=payload)