From 482b768f1e89be3c48bdf010b3376703073846a0 Mon Sep 17 00:00:00 2001 From: Vincent Raymond Date: Fri, 19 Apr 2024 14:20:50 -0400 Subject: [PATCH] [code2fn] Various bug fixes and documentation updates (#878) ## Summary of Changes - Adds unit testing for Fortran compound-conditional - Adds missing files for consumer mode model coverage reports - Fixes circular dependency issue between model coverage reports and img2mml - Pins code2fn related libraries in pyproject.toml - Pins all tree-sitter grammar versions to prevent a breaking change in the future. ### Related issues Resolves #663 Resolves #876 --- .github/workflows/tests-and-docs.yml | 2 +- pyproject.toml | 8 +- .../test_compound_conditional_cast_fortran.py | 101 ++++++++++++++ .../tests/test_conditional_cast_fortran.py | 11 +- .../tests/test_expression_cast_fortran.py | 9 +- .../fortran/tests/test_for_cast_fortran.py | 10 +- .../tests/test_identifier_cast_fortran.py | 5 +- .../tests/test_operation_cast_fortran.py | 19 +-- .../fortran/tests/test_while_cast_fortran.py | 13 +- .../model_coverage_report/base.html | 56 ++++++-- .../model_coverage_report/html_builder.py | 119 ++++++++++++----- .../model_coverage_report.py | 123 ++++++++++-------- .../tree_sitter_parsers/languages.yaml | 3 + skema/utils/__init__.py | 0 skema/utils/change_dir_back.py | 12 ++ 15 files changed, 363 insertions(+), 128 deletions(-) create mode 100644 skema/program_analysis/CAST/fortran/tests/test_compound_conditional_cast_fortran.py create mode 100644 skema/utils/__init__.py create mode 100644 skema/utils/change_dir_back.py diff --git a/.github/workflows/tests-and-docs.yml b/.github/workflows/tests-and-docs.yml index a338b47ddd8..0b9b895fd01 100644 --- a/.github/workflows/tests-and-docs.yml +++ b/.github/workflows/tests-and-docs.yml @@ -83,12 +83,12 @@ jobs: # retrieve latest model for img2mml component curl -L https://artifacts.askem.lum.ai/skema/img2mml/models/cnn_xfmer_arxiv_im2mml_with_fonts_boldface_best.pt > skema/img2mml/trained_models/cnn_xfmer_arxiv_im2mml_with_fonts_boldface_best.pt pip install ".[all]" + # Install tree-sitter parser (for Python component unit tests) - name: Install tree-sitter parsers working-directory: . run: python skema/program_analysis/tree_sitter_parsers/build_parsers.py --ci --all - # docs (API) # generate python docs using pdoc - name: "Create documentation for Python components (API docs)" diff --git a/pyproject.toml b/pyproject.toml index d55b46bae25..09c7a3b55de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,11 +15,11 @@ dependencies=[ "numpy", "dill==0.3.7", "networkx==2.8.8", - "PyYAML", + "PyYAML==6.*", "tree-sitter==0.20.4", "neo4j==5.14.1", "requests", - "beautifulsoup4", # used to remove comments etc from pMML before sending to MORAE + "beautifulsoup4==4.12.*", # used to remove comments etc from pMML before sending to MORAE "typing_extensions", # see https://github.com/pydantic/pydantic/issues/5821#issuecomment-1559196859 "fastapi~=0.100.0", "starlette", @@ -27,7 +27,7 @@ dependencies=[ "pydantic>=2.0.0", "uvicorn", "python-multipart", - "func_timeout" + "func_timeout==4.3.5" ] # The Python program analysis pipeline does not currently work with Python 3.9 # or 3.10. This may change in the future. @@ -110,7 +110,7 @@ all = ["skema[core]", "skema[dev]", "skema[doc]", "skema[demo]", "skema[annotati [tool.setuptools.package-data] # needed to ensure models are included in package/discoverable -"*" = ["*.json", "vocab.txt", "*.pt", "*.png", "*.html", "*.yml", "*.yaml"] +"*" = ["*.json", "*vocab.txt", "*.pt", "*.png", "*.html", "*.yml", "*.yaml"] [tool.setuptools.dynamic] readme = {file = ["README.md"], content-type = "text/markdown"} diff --git a/skema/program_analysis/CAST/fortran/tests/test_compound_conditional_cast_fortran.py b/skema/program_analysis/CAST/fortran/tests/test_compound_conditional_cast_fortran.py new file mode 100644 index 00000000000..48137e6b37d --- /dev/null +++ b/skema/program_analysis/CAST/fortran/tests/test_compound_conditional_cast_fortran.py @@ -0,0 +1,101 @@ +import pytest +from tempfile import TemporaryDirectory +from pathlib import Path + +from skema.program_analysis.CAST.fortran.ts2cast import TS2CAST +from skema.program_analysis.CAST2FN.model.cast import ( + Assignment, + Var, + Name, + CASTLiteralValue, + ModelIf, + Operator, + ScalarType +) + +def cond_compound1(): + return """ +program cond_compound1 +integer :: a = 3 +if (a .gt. 1 .and. a .lt. 10) then + a = 40 +end if +end program cond_compound1 + """ + + +def generate_cast(test_file_string): + with TemporaryDirectory() as temp: + source_path = Path(temp) / "source.f95" + source_path.write_text(test_file_string) + out_cast = TS2CAST(str(source_path)).out_cast + + return out_cast[0] + +def test_cond_compound1(): + exp_cast = generate_cast(cond_compound1()) + + asg_node = exp_cast.nodes[0].body[0] + + assert isinstance(asg_node, Assignment) + assert isinstance(asg_node.left, Var) + assert isinstance(asg_node.left.val, Name) + assert asg_node.left.val.name == "a" + assert asg_node.left.val.id == 0 + + assert isinstance(asg_node.right, CASTLiteralValue) + assert asg_node.right.value_type == ScalarType.INTEGER + assert asg_node.right.value == '3' + + cond_node = exp_cast.nodes[0].body[1] + cond_expr = cond_node.expr + assert isinstance(cond_node, ModelIf) + assert isinstance(cond_expr, ModelIf) + + if_node = cond_expr + assert isinstance(if_node, ModelIf) + + expr = if_node.expr + assert isinstance(expr, Operator) + assert expr.op == ".gt." + assert len(expr.operands) == 2 + assert isinstance(expr.operands[1], CASTLiteralValue) + assert expr.operands[1].value_type == ScalarType.INTEGER + assert expr.operands[1].value == "1" + + assert isinstance(expr.operands[0], Name) + assert expr.operands[0].name == "a" + assert expr.operands[0].id == 0 + + assert len(if_node.body) == 1 + body = if_node.body[0] + assert isinstance(body, Operator) + assert body.op == ".lt." + assert len(body.operands) == 2 + assert isinstance(body.operands[0], Name) + assert body.operands[0].name == "a" + assert body.operands[0].id == 0 + + assert isinstance(body.operands[1], CASTLiteralValue) + assert body.operands[1].value_type == ScalarType.INTEGER + assert body.operands[1].value == "10" + + assert len(if_node.orelse) == 1 + orelse = if_node.orelse[0] + assert isinstance(orelse, CASTLiteralValue) + assert orelse.value_type == ScalarType.BOOLEAN + assert orelse.value == False + + cond_body = cond_node.body + assert len(cond_body) == 1 + assert isinstance(cond_body[0], Assignment) + assert isinstance(cond_body[0].left, Var) + assert cond_body[0].left.val.name == "a" + assert cond_body[0].left.val.id == 0 + + assert isinstance(cond_body[0].right, CASTLiteralValue) + assert cond_body[0].right.value_type == ScalarType.INTEGER + assert cond_body[0].right.value == '40' + + cond_else = cond_node.orelse + assert len(cond_else) == 0 diff --git a/skema/program_analysis/CAST/fortran/tests/test_conditional_cast_fortran.py b/skema/program_analysis/CAST/fortran/tests/test_conditional_cast_fortran.py index 2efd7784bab..c94a30b87d2 100644 --- a/skema/program_analysis/CAST/fortran/tests/test_conditional_cast_fortran.py +++ b/skema/program_analysis/CAST/fortran/tests/test_conditional_cast_fortran.py @@ -9,7 +9,8 @@ Name, CASTLiteralValue, ModelIf, - Operator + Operator, + ScalarType ) def cond1(): @@ -63,7 +64,7 @@ def test_cond1(): assert asg_node.left.val.name == "x" assert isinstance(asg_node.right, CASTLiteralValue) - assert asg_node.right.value_type == "Integer" + assert asg_node.right.value_type == ScalarType.INTEGER assert asg_node.right.value == '2' assert isinstance(cond_node, ModelIf) @@ -103,7 +104,7 @@ def test_cond2(): assert asg_node.left.val.id == 0 assert isinstance(asg_node.right, CASTLiteralValue) - assert asg_node.right.value_type == "Integer" + assert asg_node.right.value_type == ScalarType.INTEGER assert asg_node.right.value == '2' asg_node = exp_cast.nodes[0].body[1] @@ -114,7 +115,7 @@ def test_cond2(): assert asg_node.left.val.id == 1 assert isinstance(asg_node.right, CASTLiteralValue) - assert asg_node.right.value_type == "Integer" + assert asg_node.right.value_type == ScalarType.INTEGER assert asg_node.right.value == '3' assert isinstance(cond_node, ModelIf) @@ -127,7 +128,7 @@ def test_cond2(): assert isinstance(cond_expr.operands[0], Name) assert cond_expr.operands[0].name == "x" assert isinstance(cond_expr.operands[1], CASTLiteralValue) - assert cond_expr.operands[1].value_type == "Integer" + assert cond_expr.operands[1].value_type == ScalarType.INTEGER assert cond_expr.operands[1].value == "5" assert len(cond_body) == 3 diff --git a/skema/program_analysis/CAST/fortran/tests/test_expression_cast_fortran.py b/skema/program_analysis/CAST/fortran/tests/test_expression_cast_fortran.py index 6713a169640..3e59647654e 100644 --- a/skema/program_analysis/CAST/fortran/tests/test_expression_cast_fortran.py +++ b/skema/program_analysis/CAST/fortran/tests/test_expression_cast_fortran.py @@ -7,7 +7,8 @@ Assignment, Var, Name, - CASTLiteralValue + CASTLiteralValue, + ScalarType ) def exp0(): @@ -44,7 +45,7 @@ def test_exp0(): assert asg_node.left.val.name == "x" assert isinstance(asg_node.right, CASTLiteralValue) - assert asg_node.right.value_type == "Integer" + assert asg_node.right.value_type == ScalarType.INTEGER assert asg_node.right.value == '2' @@ -60,7 +61,7 @@ def test_exp1(): assert asg_node.left.val.id == 0 assert isinstance(asg_node.right, CASTLiteralValue) - assert asg_node.right.value_type == "Integer" + assert asg_node.right.value_type == ScalarType.INTEGER assert asg_node.right.value == '2' # ------ @@ -72,7 +73,7 @@ def test_exp1(): assert asg_node.left.val.id == 1 assert isinstance(asg_node.right, CASTLiteralValue) - assert asg_node.right.value_type == "Integer" + assert asg_node.right.value_type == ScalarType.INTEGER assert asg_node.right.value == '3' if __name__ == "__main__": diff --git a/skema/program_analysis/CAST/fortran/tests/test_for_cast_fortran.py b/skema/program_analysis/CAST/fortran/tests/test_for_cast_fortran.py index 1b4e96f2b1b..5b202981017 100644 --- a/skema/program_analysis/CAST/fortran/tests/test_for_cast_fortran.py +++ b/skema/program_analysis/CAST/fortran/tests/test_for_cast_fortran.py @@ -11,7 +11,9 @@ CASTLiteralValue, ModelIf, Loop, - Operator + Operator, + ScalarType, + StructureType ) def for1(): @@ -48,7 +50,7 @@ def test_for1(): assert asg_node.left.val.name == "x" assert isinstance(asg_node.right, CASTLiteralValue) - assert asg_node.right.value_type == "Integer" + assert asg_node.right.value_type == ScalarType.INTEGER assert asg_node.right.value == '7' assert isinstance(loop_node, Loop) @@ -79,7 +81,7 @@ def test_for1(): assert isinstance(loop_pre[1], Assignment) assert isinstance(loop_pre[1].left, CASTLiteralValue) - assert loop_pre[1].left.value_type == "Tuple" + assert loop_pre[1].left.value_type == StructureType.TUPLE assert isinstance(loop_pre[1].left.value[0], Var) assert loop_pre[1].left.value[0].val.name == "i" @@ -101,7 +103,7 @@ def test_for1(): assert loop_test.operands[0].name == "sc_0" assert isinstance(loop_test.operands[1], CASTLiteralValue) - assert loop_test.operands[1].value_type == "Boolean" + assert loop_test.operands[1].value_type == ScalarType.BOOLEAN # Loop Body loop_body = loop_node.body diff --git a/skema/program_analysis/CAST/fortran/tests/test_identifier_cast_fortran.py b/skema/program_analysis/CAST/fortran/tests/test_identifier_cast_fortran.py index b6326a736a6..c532e4ac529 100644 --- a/skema/program_analysis/CAST/fortran/tests/test_identifier_cast_fortran.py +++ b/skema/program_analysis/CAST/fortran/tests/test_identifier_cast_fortran.py @@ -10,7 +10,8 @@ CASTLiteralValue, ModelIf, Loop, - Operator + Operator, + ScalarType ) def identifier1(): @@ -41,6 +42,6 @@ def test_identifier1(): assert asg_node.left.val.name == "x" assert isinstance(asg_node.right, CASTLiteralValue) - assert asg_node.right.value_type == "Integer" + assert asg_node.right.value_type == ScalarType.INTEGER assert asg_node.right.value == '2' diff --git a/skema/program_analysis/CAST/fortran/tests/test_operation_cast_fortran.py b/skema/program_analysis/CAST/fortran/tests/test_operation_cast_fortran.py index ca95e64d78a..79291519e67 100644 --- a/skema/program_analysis/CAST/fortran/tests/test_operation_cast_fortran.py +++ b/skema/program_analysis/CAST/fortran/tests/test_operation_cast_fortran.py @@ -8,7 +8,8 @@ Var, Name, Operator, - CASTLiteralValue + CASTLiteralValue, + ScalarType ) def binop1(): @@ -74,11 +75,11 @@ def test_binop1(): assert isinstance(binop_node.right.operands[0], CASTLiteralValue) assert binop_node.right.operands[0].value == '2' - assert binop_node.right.operands[0].value_type == 'Integer' + assert binop_node.right.operands[0].value_type == ScalarType.INTEGER assert isinstance(binop_node.right.operands[1], CASTLiteralValue) assert binop_node.right.operands[1].value == '3' - assert binop_node.right.operands[1].value_type == 'Integer' + assert binop_node.right.operands[1].value_type == ScalarType.INTEGER def test_binop2(): exp_cast = generate_cast(binop2()) @@ -92,7 +93,7 @@ def test_binop2(): assert binop_node.left.val.id == 0 assert isinstance(binop_node.right, CASTLiteralValue) - assert binop_node.right.value_type == "Integer" + assert binop_node.right.value_type == ScalarType.INTEGER assert binop_node.right.value == '2' # ------ @@ -112,7 +113,7 @@ def test_binop2(): assert isinstance(binop_node.right.operands[1], CASTLiteralValue) assert binop_node.right.operands[1].value == '3' - assert binop_node.right.operands[1].value_type == 'Integer' + assert binop_node.right.operands[1].value_type == ScalarType.INTEGER def test_binop3(): exp_cast = generate_cast(binop3()) @@ -126,7 +127,7 @@ def test_binop3(): assert binop_node.left.val.id == 0 assert isinstance(binop_node.right, CASTLiteralValue) - assert binop_node.right.value_type == "Integer" + assert binop_node.right.value_type == ScalarType.INTEGER assert binop_node.right.value == '1' # ------ @@ -138,7 +139,7 @@ def test_binop3(): assert binop_node.left.val.id == 1 assert isinstance(binop_node.right, CASTLiteralValue) - assert binop_node.right.value_type == "Integer" + assert binop_node.right.value_type == ScalarType.INTEGER assert binop_node.right.value == '2' # ------ @@ -198,7 +199,7 @@ def test_unary1(): assert isinstance(unary_node.right.operands[0], CASTLiteralValue) assert unary_node.right.operands[0].value == '1' - assert unary_node.right.operands[0].value_type == 'Integer' + assert unary_node.right.operands[0].value_type == ScalarType.INTEGER def test_unary2(): exp_cast = generate_cast(unary2()) @@ -213,7 +214,7 @@ def test_unary2(): assert isinstance(unary_node.right, CASTLiteralValue) assert unary_node.right.value == '1' - assert unary_node.right.value_type == 'Integer' + assert unary_node.right.value_type == ScalarType.INTEGER unary_node = exp_cast.nodes[0].body[1] diff --git a/skema/program_analysis/CAST/fortran/tests/test_while_cast_fortran.py b/skema/program_analysis/CAST/fortran/tests/test_while_cast_fortran.py index 7cafe0caf58..cdfc8364e95 100644 --- a/skema/program_analysis/CAST/fortran/tests/test_while_cast_fortran.py +++ b/skema/program_analysis/CAST/fortran/tests/test_while_cast_fortran.py @@ -11,7 +11,8 @@ CASTLiteralValue, ModelIf, Loop, - Operator + Operator, + ScalarType ) def while1(): @@ -56,7 +57,7 @@ def test_while1(): assert asg_node.left.val.name == "x" assert isinstance(asg_node.right, CASTLiteralValue) - assert asg_node.right.value_type == "Integer" + assert asg_node.right.value_type == ScalarType.INTEGER assert asg_node.right.value == '2' assert isinstance(loop_node, Loop) @@ -70,7 +71,7 @@ def test_while1(): assert loop_test.operands[0].name == "x" assert isinstance(loop_test.operands[1], CASTLiteralValue) - assert loop_test.operands[1].value_type == "Integer" + assert loop_test.operands[1].value_type == ScalarType.INTEGER assert loop_test.operands[1].value == "5" # Loop Body @@ -99,7 +100,7 @@ def test_while2(): assert asg_node.left.val.name == "x" assert isinstance(asg_node.right, CASTLiteralValue) - assert asg_node.right.value_type == "Integer" + assert asg_node.right.value_type == ScalarType.INTEGER assert asg_node.right.value == '2' assert isinstance(asg_node_2, Assignment) @@ -108,7 +109,7 @@ def test_while2(): assert asg_node_2.left.val.name == "y" assert isinstance(asg_node_2.right, CASTLiteralValue) - assert asg_node_2.right.value_type == "Integer" + assert asg_node_2.right.value_type == ScalarType.INTEGER assert asg_node_2.right.value == '3' assert isinstance(loop_node, Loop) @@ -122,7 +123,7 @@ def test_while2(): assert loop_test.operands[0].name == "x" assert isinstance(loop_test.operands[1], CASTLiteralValue) - assert loop_test.operands[1].value_type == "Integer" + assert loop_test.operands[1].value_type == ScalarType.INTEGER assert loop_test.operands[1].value == "5" # Loop Body diff --git a/skema/program_analysis/model_coverage_report/base.html b/skema/program_analysis/model_coverage_report/base.html index ad0189d2391..a997aa466ad 100644 --- a/skema/program_analysis/model_coverage_report/base.html +++ b/skema/program_analysis/model_coverage_report/base.html @@ -1,8 +1,8 @@ - - + + Code Model Coverage Tracker - + -

Code Model Coverage Tracker

- - - +

Code Model Coverage Tracker

+ +
+ +
+ + + + + +
+

Note: AMR in this report is generated without the assistance of an LLM. Results may be less accurate.

+
+ + + diff --git a/skema/program_analysis/model_coverage_report/html_builder.py b/skema/program_analysis/model_coverage_report/html_builder.py index 0db5220f02c..1fa5fcf144c 100644 --- a/skema/program_analysis/model_coverage_report/html_builder.py +++ b/skema/program_analysis/model_coverage_report/html_builder.py @@ -1,3 +1,4 @@ +from copy import deepcopy from pathlib import Path from bs4 import BeautifulSoup @@ -33,39 +34,74 @@ def add_table_data_field(self, table_row_tag, field_data: str, anchored=False, a def add_model(self, model_name: str): """Adds a new model to the HTML source""" - # Create the new model HTML structure - new_model_container = self.soup.new_tag( - "div", id=model_name, class_="model-container" - ) - new_model_heading = self.soup.new_tag("h2") - new_model_heading.string = model_name - # Create basic model table - new_model_table_container_basic = self.soup.new_tag( - "div", id=f"{model_name}-basic", class_="table-container" - ) - - new_model_table_basic = self.soup.new_tag("table", id=f"table-{model_name}", **{"class":"searchable sortable data-table"}) - new_model_thead = self.soup.new_tag("thead") - - new_model_table_header_basic = self.soup.new_tag("tr") - self.add_table_header_field(new_model_table_header_basic, "File Name") - self.add_table_header_field(new_model_table_header_basic, "Num Lines") - self.add_table_header_field(new_model_table_header_basic, "Can Ingest") - self.add_table_header_field(new_model_table_header_basic, "Tree-Sitter Parse Tree") - self.add_table_header_field(new_model_table_header_basic, "CAST") - self.add_table_header_field(new_model_table_header_basic, "Gromet") - self.add_table_header_field(new_model_table_header_basic, "Gromet Errors") - self.add_table_header_field(new_model_table_header_basic, "Gromet Report") - self.add_table_header_field(new_model_table_header_basic, "Preprocessed Gromet") - # Append the elements to each other - new_model_container.extend([new_model_heading, new_model_table_container_basic]) - new_model_table_container_basic.append(new_model_table_basic) - new_model_table_basic.append(new_model_thead) - new_model_thead.append(new_model_table_header_basic) - - # Append to outer body - self.soup.body.append(new_model_container) + def add_model_developer(): + # Create the new model HTML structure + new_model_container = self.soup.new_tag( + "div", id=f"top-{model_name}-basic", class_="model-container" + ) + new_model_heading = self.soup.new_tag("h2") + new_model_heading.string = model_name + # Create basic model table + new_model_table_container_basic = self.soup.new_tag( + "div", id=f"{model_name}-basic", class_="table-container" + ) + + new_model_table_basic = self.soup.new_tag("table", id=f"table-{model_name}", **{"class":"searchable sortable data-table"}) + new_model_thead = self.soup.new_tag("thead") + + new_model_table_header_basic = self.soup.new_tag("tr") + self.add_table_header_field(new_model_table_header_basic, "File Name") + self.add_table_header_field(new_model_table_header_basic, "Num Lines") + self.add_table_header_field(new_model_table_header_basic, "Can Ingest") + self.add_table_header_field(new_model_table_header_basic, "Tree-Sitter Parse Tree") + self.add_table_header_field(new_model_table_header_basic, "CAST") + self.add_table_header_field(new_model_table_header_basic, "Gromet") + self.add_table_header_field(new_model_table_header_basic, "Gromet Errors") + self.add_table_header_field(new_model_table_header_basic, "Gromet Report") + self.add_table_header_field(new_model_table_header_basic, "Preprocessed Gromet") + + # Append the elements to each other + new_model_container.extend([new_model_heading, new_model_table_container_basic]) + new_model_table_container_basic.append(new_model_table_basic) + new_model_table_basic.append(new_model_thead) + new_model_thead.append(new_model_table_header_basic) + + # Append to developer + target_div = self.soup.find('div', id='developer') + target_div.append(new_model_container) + + def add_model_consumer(): + # Create the new model HTML structure + new_model_container = self.soup.new_tag( + "div", id=f"top-{model_name}-consumer", class_="model-container" + ) + new_model_heading = self.soup.new_tag("h2") + new_model_heading.string = model_name + + # Create basic model table + new_model_table_container_basic = self.soup.new_tag( + "div", id=f"{model_name}-consumer", class_="table-container" + ) + + new_model_table_basic = self.soup.new_tag("table", id=f"table-{model_name}", **{"class":"searchable sortable data-table"}) + new_model_thead = self.soup.new_tag("thead") + + new_model_table_header_basic = self.soup.new_tag("tr") + self.add_table_header_field(new_model_table_header_basic, "File Name") + self.add_table_header_field(new_model_table_header_basic, "Can Ingest") + self.add_table_header_field(new_model_table_header_basic, "AMR") + + # Append the elements to each other + new_model_container.extend([new_model_heading, new_model_table_container_basic]) + new_model_table_container_basic.append(new_model_table_basic) + new_model_table_basic.append(new_model_thead) + new_model_thead.append(new_model_table_header_basic) + target_div = self.soup.find('div', id='consumer') + target_div.append(deepcopy(new_model_container)) + + add_model_consumer() + add_model_developer() def add_model_header_data( self, @@ -139,6 +175,25 @@ def add_file_basic( model_table.append(new_row) + def add_file_consumer( + self, + model: str, + file_name: str, + can_ingest: bool, + amr_path: Path, + + ): + """Add a file entry to a model table""" + model_table = self.soup.select_one(f"#{model}-consumer table") + new_row = self.soup.new_tag("tr") + + # Add row data fields + self.add_table_data_field(new_row, file_name) + self.add_table_data_field(new_row, "✓" if can_ingest else "✗") + self.add_table_data_field(new_row, str(amr_path), anchored=True, anchor_text="Open AMR") + + model_table.append(new_row) + def write_html(self): """Output html to a file.""" output_path = Path(__file__).resolve().parent / "output.html" diff --git a/skema/program_analysis/model_coverage_report/model_coverage_report.py b/skema/program_analysis/model_coverage_report/model_coverage_report.py index e76059f755c..3320eba3031 100644 --- a/skema/program_analysis/model_coverage_report/model_coverage_report.py +++ b/skema/program_analysis/model_coverage_report/model_coverage_report.py @@ -1,92 +1,83 @@ import argparse +import json +import httpx import os -import traceback # Debugs import requests +import subprocess +import asyncio +import traceback import yaml -import json + from enum import Enum -from typing import List, Dict, Tuple, Callable, Any -from zipfile import ZipFile from io import BytesIO -from tempfile import TemporaryDirectory from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Any, Callable, Dict, List, Tuple +from zipfile import ZipFile from func_timeout import func_timeout, FunctionTimedOut -from tree_sitter import Language, Parser, Tree +from tree_sitter import Language, Parser -from skema.program_analysis.CAST2FN.cast import CAST -from skema.program_analysis.run_ann_cast_pipeline import ann_cast_pipeline +from skema.program_analysis.fortran2cast import fortran_to_cast +from skema.program_analysis.matlab2cast import matlab_to_cast from skema.program_analysis.model_coverage_report.html_builder import HTML_Instance from skema.program_analysis.multi_file_ingester import process_file_system -from skema.program_analysis.single_file_ingester import process_file -from skema.program_analysis.snippet_ingester import process_snippet -from skema.program_analysis.tree_sitter_parsers.build_parsers import ( - INSTALLED_LANGUAGES_FILEPATH, - LANGUAGES_YAML_FILEPATH, -) from skema.program_analysis.python2cast import python_to_cast -from skema.program_analysis.fortran2cast import fortran_to_cast -from skema.program_analysis.matlab2cast import matlab_to_cast +from skema.program_analysis.single_file_ingester import process_file +from skema.program_analysis.tree_sitter_parsers.build_parsers import INSTALLED_LANGUAGES_FILEPATH from skema.program_analysis.tree_sitter_parsers.util import extension_to_language from skema.rest.utils import fn_preprocessor +from skema.rest.workflows import code_snippets_to_pn_amr from skema.utils.fold import del_nulls, dictionary_to_gromet_json +from skema.utils.change_dir_back import change_dir_back +from skema.skema_py.server import System +# Constants for file paths THIS_PATH = Path(__file__).parent.resolve() -MODEL_YAML_PATH = Path(__file__).parent / "models.yaml" +MODEL_YAML_PATH = THIS_PATH / "models.yaml" MODEL_YAML = yaml.safe_load(MODEL_YAML_PATH.read_text()) class Status(Enum): - """Status enum for the status of executing a step in the code2fn pipeline""" - VALID = "Valid" TIMEOUT = "Timeout" EXCEPTION = "Exception" @staticmethod - def all_valid(status_list: List) -> bool: - """Check if all status in a List are Status.VALID""" - return all([status == Status.VALID for status in status_list]) + def all_valid(status_list: List[Enum]) -> bool: + return all(status == Status.VALID for status in status_list) @staticmethod - def get_overall_status(status_list: List) -> str: - """Return the final pipeline status given a List of status for each step in the pipeline""" - return ( - Status.TIMEOUT - if Status.TIMEOUT in status_list - else Status.EXCEPTION - if Status.EXCEPTION in status_list - else Status.VALID - ) - - -def generate_data_product( - output_dir: str, model_name: str, file_name: str, data_product_function: Callable, args=(), kwargs=None -) -> Tuple[str, Any]: - """Wrapper function for generating data products, returns the status of processing.""" - (output, output_path, status) = (None, None, None) - - output_path = Path(output_dir) / "data"/ data_product_function.__name__ / model_name / file_name + def get_overall_status(status_list: List[Enum]) -> Enum: + if Status.TIMEOUT in status_list: + return Status.TIMEOUT + elif Status.EXCEPTION in status_list: + return Status.EXCEPTION + return Status.VALID + +def valid_path(path: str) -> bool: + return "include_" not in path + + +@change_dir_back(THIS_PATH) +def generate_data_product(output_dir: str, model_name: str, file_name: str, data_product_function: Callable, args=(), kwargs=None) -> Tuple[str, Any, Status]: + output_path = Path(output_dir) / "data" / data_product_function.__name__ / model_name / file_name output_path.parent.mkdir(parents=True, exist_ok=True) relative_output_path = output_path.relative_to(output_dir) - # There is a possibility that the processing function fails after changing the working directory. - # So we should change it back before and after each itteraton. - os.chdir(THIS_PATH) + output = None try: - output = func_timeout(10, data_product_function, args=args, kwargs=kwargs) - if output == "": - raise Exception("Data product is empty") + output = func_timeout(10, data_product_function, args=args, kwargs=(kwargs or {})) + if not output: + raise ValueError("Data product is empty") output_path.write_text(output) status = Status.VALID except FunctionTimedOut: - os.chdir(THIS_PATH) output_path.write_text("Processing exceeded timeout (10s)") status = Status.TIMEOUT - except (Exception, SystemExit) as e: - os.chdir(THIS_PATH) + except Exception as e: output_path.write_text(traceback.format_exc()) status = Status.EXCEPTION - + return output, relative_output_path, status @@ -137,6 +128,23 @@ def generate_gromet_preprocess_logs(gromet_collection: Dict) -> str: logs = fn_preprocessor(gromet_collection)[1] return "\n".join(logs) +def ingest_with_morae(filename: str, source: str): + """Generator function for ingesting source file with MORAE + NOTE: This function uses the non-llm amr pipeline due to being run in CI. + """ + + async def morae_async(): + async with httpx.AsyncClient() as client: + return await code_snippets_to_pn_amr( + system=System( + files=[filename], + blobs=[source] + ), + client=client # Pass the instantiated client + ) + + return json.dumps(asyncio.run(morae_async())) + def process_single_model(html: HTML_Instance, output_dir: str, model_name: str): """Generate an HTML report for a single model""" html.add_model(model_name) @@ -211,6 +219,11 @@ def process_single_model(html: HTML_Instance, output_dir: str, model_name: str): except: gromet_error_count = 0 + + amr_output, amr_report_relative_path, amr_report_status = generate_data_product( + output_dir, model_name, f"{filename}.json", ingest_with_morae, args=(file.filename, source), kwargs=None + ) + # Check the status of each pipeline step final_status = Status.get_overall_status( [cast_status, gromet_status] @@ -224,6 +237,12 @@ def process_single_model(html: HTML_Instance, output_dir: str, model_name: str): can_ingest = False total_lines += len(source.splitlines()) + html.add_file_consumer( + model_name, + file.filename, + amr_report_status is Status.VALID, + amr_report_relative_path + ) html.add_file_basic( model_name, file.filename, @@ -237,6 +256,7 @@ def process_single_model(html: HTML_Instance, output_dir: str, model_name: str): gromet_preprocess_relative_path ) + # If all files are valid in a system, attempt to ingest full system into single GrometFNModuleCollection if not Status.all_valid(file_status_list): html.add_model_header_data( @@ -311,9 +331,6 @@ def main(): elif args.mode == "single": supported, total = process_single_model(html, output_dir, args.model_name) model_line_coverage[args.model_name] = (supported, total) - - # DataTables have to be initialized after all models are generated - html.add_data_table_script() output_path = Path(output_dir) / "report.html" output_path.write_text(html.soup.prettify()) diff --git a/skema/program_analysis/tree_sitter_parsers/languages.yaml b/skema/program_analysis/tree_sitter_parsers/languages.yaml index 022e73c7445..e484d7e6038 100644 --- a/skema/program_analysis/tree_sitter_parsers/languages.yaml +++ b/skema/program_analysis/tree_sitter_parsers/languages.yaml @@ -2,6 +2,7 @@ c: tree_sitter_name: tree-sitter-c clone_url: https://github.com/tree-sitter/tree-sitter-c.git + commit_sha: 1aafaff supports_comment_extraction: True supports_fn_extraction: False extensions: @@ -39,6 +40,7 @@ matlab: python: tree_sitter_name: tree-sitter-python clone_url: https://github.com/tree-sitter/tree-sitter-python.git + commit_sha: a227610 supports_comment_extraction: True supports_fn_extraction: True extensions: @@ -46,6 +48,7 @@ python: r: tree_sitter_name: tree-sitter-r clone_url: https://github.com/r-lib/tree-sitter-r.git + commmit_sha: c55f8b4 supports_comment_extraction: True supports_fn_extraction: False extensions: diff --git a/skema/utils/__init__.py b/skema/utils/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/skema/utils/change_dir_back.py b/skema/utils/change_dir_back.py new file mode 100644 index 00000000000..f48f3fa41f2 --- /dev/null +++ b/skema/utils/change_dir_back.py @@ -0,0 +1,12 @@ +import os +def change_dir_back(this_path): + """Decorator to ensure the working directory is changed back after function call.""" + def outer_decorator(func): + def inner_decorator(*args, **kwargs): + try: + return func(*args, **kwargs) + finally: + os.chdir(this_path) + + return inner_decorator + return outer_decorator \ No newline at end of file