ml4ai · enoriega · Mar 19, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/skema/rest/integrated_text_reading_proxy.py b/skema/rest/integrated_text_reading_proxy.py
@@ -408,7 +408,7 @@ def integrated_extractions(
 )
 async def integrated_text_extractions(
         response: Response,
-        texts: TextReadingInputDocuments,
+        inputs: TextReadingInputDocuments,
         annotate_skema: bool = True,
         annotate_mit: bool = True,
 ) -> TextReadingAnnotationsOutput:
@@ -428,10 +428,12 @@ async def integrated_text_extractions(
     ```
     """
     # Get the input plain texts
-    texts = texts.texts
+    texts = inputs.texts
+
+    amrs = inputs.amrs
 
     # Run the text extractors
-    return integrated_extractions(
+    extractions = integrated_extractions(
         response,
         annotate_text_with_skema,
         texts,
@@ -440,6 +442,31 @@ async def integrated_text_extractions(
         annotate_mit
     )
 
+    # Do the alignment
+    aligned_amrs = list()
+    if len(amrs) > 0:
+        # Build an UploadFile instance from the extractions
+        json_extractions = extractions.model_dump_json()
+        extractions_ufile = UploadFile(file=io.BytesIO(json_extractions.encode('utf-8')))
+        for amr in amrs:
+            # amr = json.loads(amr)
+            amr_ufile = UploadFile(file=io.BytesIO(amr.encode('utf-8')))
+            try:
+                aligned_amr = metal_proxy.link_amr(
+                    amr_file=amr_ufile,
+                    text_extractions_file=extractions_ufile)
+                aligned_amrs.append(aligned_amr)
+            except Exception as e:
+                error = TextReadingError(pipeline="AMR Linker", message=f"Error annotating {amr.filename}: {e}")
+                if extractions.generalized_errors is None:
+                    extractions.generalized_errors = [error]
+                else:
+                    extractions.generalized_errors.append(error)
+
+    extractions.aligned_amrs = aligned_amrs
+
+    return extractions
+
 
 @router.post(
     "/integrated-pdf-extractions",

diff --git a/skema/rest/schema.py b/skema/rest/schema.py
@@ -5,6 +5,7 @@
 from typing import List, Optional, Dict, Any
 
 from askem_extractions.data_model import AttributeCollection
+from fastapi import UploadFile
 from pydantic import BaseModel, Field
 
 # see https://github.com/pydantic/pydantic/issues/5821#issuecomment-1559196859
@@ -168,6 +169,10 @@ class TextReadingInputDocuments(BaseModel):
         description="List of input plain texts to be annotated by the text reading pipelines",
         examples=[["x = 0", "y = 1", "I: Infected population"]],
     )
+    amrs: List[str] = Field(
+        description="List of optional AMR files to align with the extractions",
+        examples=[[]]
+    )
 
 
 class TextReadingError(BaseModel):

diff --git a/skema/rest/tests/test_integrated_text_reading_proxy.py b/skema/rest/tests/test_integrated_text_reading_proxy.py
@@ -23,7 +23,8 @@ def test_text_integrated_extractions():
             "x = 0",
             "y = 1",
             "I: Infected population"
-        ]
+        ],
+        "amrs": []
     }
 
     response = client.post(f"/integrated-text-extractions", params=params, json=payload)