From 173e7d6e9a196a831c5f4fd496d91ba3d89a77c8 Mon Sep 17 00:00:00 2001
From: Spyros <morfeas3000@gmail.com>
Date: Sun, 21 Jul 2024 13:21:12 +0100
Subject: [PATCH] fix the api part of #531 by providing an api call to export
 all CREs in an instance in a visually pleasing format

---
 application/defs/cre_defs.py                  |  2 +-
 .../frontend/src/pages/Explorer/explorer.tsx  |  2 +-
 application/tests/spreadsheet_test.py         | 59 ++++++++++++++++-
 application/tests/web_main_test.py            | 65 +++++++++++++++++--
 application/utils/spreadsheet.py              | 62 +++++++++++++++++-
 application/web/web_main.py                   | 26 ++++++++
 6 files changed, 208 insertions(+), 8 deletions(-)
diff --git a/application/defs/cre_defs.py b/application/defs/cre_defs.py
index ac9b720b..4380d82d 100644
--- a/application/defs/cre_defs.py
+++ b/application/defs/cre_defs.py
@@ -11,7 +11,7 @@ class ExportFormat(
     Enum
 ):  # TODO: this can likely be replaced with a method that iterates over an object's vars and formats headers to
     #  <doctype>:<name>:<varname>
-    separator = ":"
+    separator = "|"
     section = "section"
     subsection = "subsection"
     hyperlink = "hyperlink"
diff --git a/application/frontend/src/pages/Explorer/explorer.tsx b/application/frontend/src/pages/Explorer/explorer.tsx
index 4836e2df..08a29d22 100644
--- a/application/frontend/src/pages/Explorer/explorer.tsx
+++ b/application/frontend/src/pages/Explorer/explorer.tsx
@@ -8,9 +8,9 @@ import { LoadingAndErrorIndicator } from '../../components/LoadingAndErrorIndica
 import { TYPE_CONTAINS, TYPE_LINKED_TO } from '../../const';
 import { useDataStore } from '../../providers/DataProvider';
 import { LinkedTreeDocument, TreeDocument } from '../../types';
-import { LinkedStandards } from './LinkedStandards';
 import { getDocumentDisplayName } from '../../utils';
 import { getInternalUrl } from '../../utils/document';
+import { LinkedStandards } from './LinkedStandards';
 
 export const Explorer = () => {
   const { dataLoading, dataTree } = useDataStore();
diff --git a/application/tests/spreadsheet_test.py b/application/tests/spreadsheet_test.py
index 4a647b91..35cc9786 100644
--- a/application/tests/spreadsheet_test.py
+++ b/application/tests/spreadsheet_test.py
@@ -4,7 +4,10 @@
 from application import create_app, sqla  # type: ignore
 from application.database import db
 from application.defs import cre_defs as defs
-from application.utils.spreadsheet import prepare_spreadsheet
+from application.utils.spreadsheet import (
+    prepare_spreadsheet,
+    generate_mapping_template_file,
+)
 
 
 class TestDB(unittest.TestCase):
@@ -530,6 +533,60 @@ def test_prepare_spreadsheet_simple(self) -> None:
 
         self.assertCountEqual(result, expected)
 
+    def test_generate_mapping_template_file(self) -> None:
+        """
+        Given: a CRE structure with 4 depth levels and 2 root cres
+        prepare a staggered csv accordingly
+        """
+        # empty string means temporary db
+        collection = db.Node_collection().with_graph()
+        roots = []
+        for j in range(2):
+            root = defs.CRE(description=f"root{j}", name=f"root{j}", id=f"123-30{j}")
+            db_root = collection.add_cre(root)
+            roots.append(root)
+            previous_db = db_root
+            previous_cre = root
+
+            for i in range(4):
+                c = defs.CRE(
+                    description=f"CREdesc{j}-{i}",
+                    name=f"CREname{j}-{i}",
+                    id=f"123-4{j}{i}",
+                )
+                dbcre = collection.add_cre(c)
+                collection.add_internal_link(
+                    higher=previous_db, lower=dbcre, type=defs.LinkTypes.Contains
+                )
+                previous_cre.add_link(
+                    defs.Link(document=c, ltype=defs.LinkTypes.Contains)
+                )
+                previous_cre = c
+                previous_db = dbcre
+        csv = generate_mapping_template_file(database=collection, docs=roots)
+        self.assertEqual(
+            csv,
+            [
+                {
+                    "CRE 0": "",
+                    "CRE 1": "",
+                    "CRE 2": "",
+                    "CRE 3": "",
+                    "CRE 4": "",
+                },
+                {"CRE 0": "123-300|root0"},
+                {"CRE 1": "123-400|CREname0-0"},
+                {"CRE 2": "123-401|CREname0-1"},
+                {"CRE 3": "123-402|CREname0-2"},
+                {"CRE 4": "123-403|CREname0-3"},
+                {"CRE 0": "123-301|root1"},
+                {"CRE 1": "123-410|CREname1-0"},
+                {"CRE 2": "123-411|CREname1-1"},
+                {"CRE 3": "123-412|CREname1-2"},
+                {"CRE 4": "123-413|CREname1-3"},
+            ],
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/application/tests/web_main_test.py b/application/tests/web_main_test.py
index 402527a8..2b54dfe2 100644
--- a/application/tests/web_main_test.py
+++ b/application/tests/web_main_test.py
@@ -1,3 +1,4 @@
+import io
 import csv
 import random
 import string
@@ -5,18 +6,16 @@
 import json
 import unittest
 from unittest.mock import patch
-
 import redis
 import rq
+import os
 
 from application import create_app, sqla  # type: ignore
 from application.database import db
 from application.defs import cre_defs as defs
-from application.defs import osib_defs
 from application.web import web_main
 from application.utils.gap_analysis import GAP_ANALYSIS_TIMEOUT
-
-import os
+from application.utils import spreadsheet
 
 
 class MockJob:
@@ -886,3 +885,61 @@ def test_all_cres(self, db_mock) -> None:
                 {"data": expected, "page": 1, "total_pages": 1},
                 json.loads(response.data),
             )
+
+    def test_get_cre_csv(self) -> None:
+        # empty string means temporary db
+        collection = db.Node_collection().with_graph()
+        roots = []
+        for j in range(2):
+            root = defs.CRE(description=f"root{j}", name=f"root{j}", id=f"123-30{j}")
+            db_root = collection.add_cre(root)
+            roots.append(root)
+            previous_db = db_root
+            previous_cre = root
+
+            for i in range(4):
+                c = defs.CRE(
+                    description=f"CREdesc{i}-{j}",
+                    name=f"CREname{i}-{j}",
+                    id=f"123-4{j}{i}",
+                )
+                dbcre = collection.add_cre(c)
+                collection.add_internal_link(
+                    higher=previous_db, lower=dbcre, type=defs.LinkTypes.Contains
+                )
+                previous_cre.add_link(
+                    defs.Link(document=c, ltype=defs.LinkTypes.Contains)
+                )
+                previous_cre = c
+                previous_db = dbcre
+
+        with self.app.test_client() as client:
+            response = client.get(
+                "/rest/v1/cre_csv",
+                headers={"Content-Type": "application/json"},
+            )
+            self.assertEqual(200, response.status_code)
+            expected_out = [
+                {
+                    "CRE 0": "",
+                    "CRE 1": "",
+                    "CRE 2": "",
+                    "CRE 3": "",
+                    "CRE 4": "",
+                },
+                {"CRE 0": "123-300|root0"},
+                {"CRE 1": "123-400|CREname0-0"},
+                {"CRE 2": "123-401|CREname0-1"},
+                {"CRE 3": "123-402|CREname0-2"},
+                {"CRE 4": "123-403|CREname0-3"},
+                {"CRE 0": "123-301|root1"},
+                {"CRE 1": "123-410|CREname1-0"},
+                {"CRE 2": "123-411|CREname1-1"},
+                {"CRE 3": "123-412|CREname1-2"},
+                {"CRE 4": "123-413|CREname1-3"},
+            ]
+            data = spreadsheet.write_csv(expected_out)
+            self.assertEqual(
+                data.getvalue(),
+                response.data.decode(),
+            )
diff --git a/application/utils/spreadsheet.py b/application/utils/spreadsheet.py
index ae4b5c99..5a858625 100644
--- a/application/utils/spreadsheet.py
+++ b/application/utils/spreadsheet.py
@@ -2,7 +2,7 @@
 import io
 import logging
 from copy import deepcopy
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Set
 import os
 import gspread
 import yaml
@@ -237,3 +237,63 @@ def write_spreadsheet(title: str, docs: List[Dict[str, Any]], emails: List[str])
     for email in emails:
         sh.share(email, perm_type="user", role="writer")
     return "https://docs.google.com/spreadsheets/d/%s" % sh.id
+
+
+def generate_mapping_template_file(
+    database: db.Node_collection, docs: List[defs.CRE]
+) -> str:
+    maxOffset = 0
+    related = set()
+
+    def add_offset_cre(
+        cre: defs.CRE, database: db.Node_collection, offset: int, visited_cres: Set
+    ) -> List[Dict[str, str]]:
+        nonlocal maxOffset, related
+        maxOffset = max(maxOffset, offset)
+        rows = []
+
+        rows.append(
+            {f"CRE {offset}": f"{cre.id}{defs.ExportFormat.separator.value}{cre.name}"}
+        )
+        visited_cres.add(cre.id)
+        dbcre = database.get_CREs(external_id=cre.id)
+        if not dbcre:
+            raise ValueError(f"CRE with id {cre.id} not found in the database")
+        cre = dbcre[0]
+        for link in cre.links:
+            if (
+                link.document.doctype == defs.Credoctypes.CRE
+                and link.document.id not in visited_cres
+            ):
+                if link.ltype == defs.LinkTypes.Contains:
+                    rows.extend(
+                        add_offset_cre(
+                            cre=link.document,
+                            database=database,
+                            offset=offset + 1,
+                            visited_cres=visited_cres,
+                        )
+                    )
+                elif link.ltype == defs.LinkTypes.Related:
+                    related.add(link.document.id)
+        return rows
+
+    visited_cres = set()
+    csv: List[Dict[str, str]] = []
+
+    for cre in docs:
+        csv.extend(
+            add_offset_cre(
+                cre=cre, database=database, offset=0, visited_cres=visited_cres
+            )
+        )
+    result = [{f"CRE {offset}": "" for offset in range(0, maxOffset + 1)}]
+    result.extend(csv)
+
+    orphaned_documents = [doc for doc in related if doc not in visited_cres]
+    if len(orphaned_documents):
+        raise ValueError(
+            "found CREs with only related links not provided in the root_cre list, unless you are really sure for this use case, this is a bug"
+        )
+
+    return result
diff --git a/application/web/web_main.py b/application/web/web_main.py
index 077a2ed0..bf703f53 100644
--- a/application/web/web_main.py
+++ b/application/web/web_main.py
@@ -4,6 +4,7 @@
 import json
 import logging
 import os
+import io
 import pathlib
 import urllib.parse
 from typing import Any
@@ -29,6 +30,7 @@
     send_from_directory,
     url_for,
     session,
+    send_file,
 )
 from google.oauth2 import id_token
 from google_auth_oauthlib.flow import Flow
@@ -684,6 +686,30 @@ def all_cres() -> Any:
     abort(404)
 
 
+@app.route("/rest/v1/cre_csv", methods=["GET"])
+def get_cre_csv() -> Any:
+    database = db.Node_collection()
+    root_cres = database.get_root_cres()
+    if root_cres:
+        docs = sheet_utils.generate_mapping_template_file(
+            database=database, docs=root_cres
+        )
+        csvVal = write_csv(docs=docs).getvalue().encode("utf-8")
+
+        # Creating the byteIO object from the StringIO Object
+        mem = io.BytesIO()
+        mem.write(csvVal)
+        mem.seek(0)
+
+        return send_file(
+            mem,
+            as_attachment=True,
+            download_name="CRE-Catalogue.csv",
+            mimetype="text/csv",
+        )
+    abort(404)
+
+
 # @app.route("/rest/v1/all_nodes", methods=["GET"])
 # def all_nodes() -> Any:
 #     database = db.Node_collection()