diff --git a/poetry.lock b/poetry.lock index 5cbe474..4ec31c3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "certifi" @@ -277,13 +277,13 @@ tests = ["pytest (>=4.6)"] [[package]] name = "nexusformat" -version = "1.0.2" +version = "1.0.6" description = "Python API to access NeXus data" optional = false python-versions = ">=3.6" files = [ - {file = "nexusformat-1.0.2-py3-none-any.whl", hash = "sha256:2cc0ea1db12304120080c92f67cb578b9f8214844f49ab6f8c809e3bbf795605"}, - {file = "nexusformat-1.0.2.tar.gz", hash = "sha256:c55920137904f4b7b4e4e8cb64d3e65883a5ac0c5182c2ccb685aee9d42de3e6"}, + {file = "nexusformat-1.0.6-py3-none-any.whl", hash = "sha256:89947215dfa8843584df25432b93ede009651382c5521eb24ee972d38752b23e"}, + {file = "nexusformat-1.0.6.tar.gz", hash = "sha256:5315373c0ff6affb9a99dcac6df0b42f62629c781f91786cb07228da17f7ce50"}, ] [package.dependencies] @@ -367,71 +367,76 @@ files = [ [[package]] name = "pandas" -version = "2.1.3" +version = "2.2.2" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" files = [ - {file = "pandas-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acf08a73b5022b479c1be155d4988b72f3020f308f7a87c527702c5f8966d34f"}, - {file = "pandas-2.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3cc4469ff0cf9aa3a005870cb49ab8969942b7156e0a46cc3f5abd6b11051dfb"}, - {file = "pandas-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35172bff95f598cc5866c047f43c7f4df2c893acd8e10e6653a4b792ed7f19bb"}, - {file = "pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59dfe0e65a2f3988e940224e2a70932edc964df79f3356e5f2997c7d63e758b4"}, - {file = "pandas-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0296a66200dee556850d99b24c54c7dfa53a3264b1ca6f440e42bad424caea03"}, - {file = "pandas-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:465571472267a2d6e00657900afadbe6097c8e1dc43746917db4dfc862e8863e"}, - {file = "pandas-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04d4c58e1f112a74689da707be31cf689db086949c71828ef5da86727cfe3f82"}, - {file = "pandas-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fa2ad4ff196768ae63a33f8062e6838efed3a319cf938fdf8b95e956c813042"}, - {file = "pandas-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4441ac94a2a2613e3982e502ccec3bdedefe871e8cea54b8775992485c5660ef"}, - {file = "pandas-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5ded6ff28abbf0ea7689f251754d3789e1edb0c4d0d91028f0b980598418a58"}, - {file = "pandas-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca5680368a5139d4920ae3dc993eb5106d49f814ff24018b64d8850a52c6ed2"}, - {file = "pandas-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:de21e12bf1511190fc1e9ebc067f14ca09fccfb189a813b38d63211d54832f5f"}, - {file = "pandas-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a5d53c725832e5f1645e7674989f4c106e4b7249c1d57549023ed5462d73b140"}, - {file = "pandas-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7cf4cf26042476e39394f1f86868d25b265ff787c9b2f0d367280f11afbdee6d"}, - {file = "pandas-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72c84ec1b1d8e5efcbff5312abe92bfb9d5b558f11e0cf077f5496c4f4a3c99e"}, - {file = "pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f539e113739a3e0cc15176bf1231a553db0239bfa47a2c870283fd93ba4f683"}, - {file = "pandas-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc77309da3b55732059e484a1efc0897f6149183c522390772d3561f9bf96c00"}, - {file = "pandas-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:08637041279b8981a062899da0ef47828df52a1838204d2b3761fbd3e9fcb549"}, - {file = "pandas-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b99c4e51ef2ed98f69099c72c75ec904dd610eb41a32847c4fcbc1a975f2d2b8"}, - {file = "pandas-2.1.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f7ea8ae8004de0381a2376662c0505bb0a4f679f4c61fbfd122aa3d1b0e5f09d"}, - {file = "pandas-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd76d67ca2d48f56e2db45833cf9d58f548f97f61eecd3fdc74268417632b8a"}, - {file = "pandas-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1329dbe93a880a3d7893149979caa82d6ba64a25e471682637f846d9dbc10dd2"}, - {file = "pandas-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:321ecdb117bf0f16c339cc6d5c9a06063854f12d4d9bc422a84bb2ed3207380a"}, - {file = "pandas-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:11a771450f36cebf2a4c9dbd3a19dfa8c46c4b905a3ea09dc8e556626060fe71"}, - {file = "pandas-2.1.3.tar.gz", hash = "sha256:22929f84bca106921917eb73c1521317ddd0a4c71b395bcf767a106e3494209f"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, + {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, + {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, + {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, + {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, + {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, + {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, + {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, + {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, + {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, + {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, + {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, + {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, + {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, + {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, + {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, ] [package.dependencies] numpy = [ - {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""}, + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" -tzdata = ">=2022.1" +tzdata = ">=2022.7" [package.extras] -all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"] -aws = ["s3fs (>=2022.05.0)"] -clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"] -compression = ["zstandard (>=0.17.0)"] -computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] consortium-standard = ["dataframe-api-compat (>=0.1.7)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"] -feather = ["pyarrow (>=7.0.0)"] -fss = ["fsspec (>=2022.05.0)"] -gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"] -hdf5 = ["tables (>=3.7.0)"] -html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"] -mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"] -parquet = ["pyarrow (>=7.0.0)"] -performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"] -plot = ["matplotlib (>=3.6.1)"] -postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"] -spss = ["pyreadstat (>=1.1.5)"] -sql-other = ["SQLAlchemy (>=1.4.36)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.8.0)"] +xml = ["lxml (>=4.9.2)"] [[package]] name = "pluggy" @@ -448,6 +453,26 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pyambit" +version = "0.0.1" +description = "Python implementation of AMBIT data model" +optional = false +python-versions = ">=3.9,<4.0" +files = [] +develop = false + +[package.dependencies] +nexusformat = "^1.0.6" +pandas = "^2.2.2" +pydantic = "^1" + +[package.source] +type = "git" +url = "https://github.com/ideaconsult/pyambit.git" +reference = "HEAD" +resolved_reference = "2e50efb949bf30ed4f24551726694df5e156dc71" + [[package]] name = "pydantic" version = "1.10.13" @@ -559,6 +584,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -566,8 +592,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -584,6 +618,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -591,6 +626,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -747,4 +783,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "33d2e592b10abc748ffef8dc78b732484ac377074f6842ec02f9bfb97d1a49ba" +content-hash = "0c8fef54f64ec047e9c05ac1a17eca0f31038869eb0004bf867e43548ddd191e" diff --git a/pyproject.toml b/pyproject.toml index 2d7f796..d0815ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ requests = "^2.31.0" xlsxwriter = "^3.1.9" measurement = "^3.2.2" openpyxl = "^3.1.2" +pyambit = {git = "https://github.com/ideaconsult/pyambit.git"} [tool.poetry.urls] "Bug Tracker" = "https://github.com/ideaconsult/pynanomapper/issues" diff --git a/src/pynanomapper/aa.py b/src/pynanomapper/aa.py index 5a7c4e4..5074801 100644 --- a/src/pynanomapper/aa.py +++ b/src/pynanomapper/aa.py @@ -40,7 +40,8 @@ def __init__(self, token=None): self.token = token def __call__(self, r): - r.headers['Authorization'] = "Bearer {}".format(self.token) + if self.token != None: + r.headers['Authorization'] = "Bearer {}".format(self.token) return r def setKey(self, token): diff --git a/src/pynanomapper/clients/authservice.py b/src/pynanomapper/clients/authservice.py index a314224..5989c80 100644 --- a/src/pynanomapper/clients/authservice.py +++ b/src/pynanomapper/clients/authservice.py @@ -43,7 +43,9 @@ def logout(self): def getHeaders(self): headers = {} headers["Accept"] = "application/json" - headers["Authorization"] = "Bearer {}".format(self.token['access_token']) + _token = self.token['access_token'] + if _token != None: + headers["Authorization"] = "Bearer {}".format(_token) return headers diff --git a/src/pynanomapper/clients/datamodel_simple.py b/src/pynanomapper/clients/datamodel_simple.py index 45bc41f..9bb7514 100644 --- a/src/pynanomapper/clients/datamodel_simple.py +++ b/src/pynanomapper/clients/datamodel_simple.py @@ -34,8 +34,8 @@ def __init__(self, investigation, provider, parameters, filename, spectrum_embed self.spectrum_embedding = spectrum_embedding @staticmethod - def x4search(): - return np.linspace(140,3*1024+140,num=1024) + def x4search(dim=1024): + return np.linspace(140,3*1024+140,num=dim) #return np.linspace(140,140+2048,num=1024) @staticmethod @@ -79,16 +79,16 @@ def xy2embedding(x,y,xlinspace = None,remove_baseline=True,window=16): if xlinspace is None: xlinspace = StudyRaman.x4search() spe = rc2.spectrum.Spectrum(x=x, y=y, metadata={}) - (spe,hist_dist,index) = StudyRaman.spectra2dist(spe,xcrop = [xlinspace[0],xlinspace[-1]],remove_baseline=True,window=window) + (spe,hist_dist,index) = StudyRaman.spectra2dist(spe,xcrop = [xlinspace[0],xlinspace[-1]],remove_baseline=remove_baseline,window=window) return (hist_dist.cdf(xlinspace),hist_dist.pdf(xlinspace)) @staticmethod - def h52embedding(h5,dataset="raw",xlinspace = None): + def h52embedding(h5,dataset="raw",xlinspace = None,remove_baseline=True,window=16): if xlinspace is None: xlinspace = StudyRaman.x4search() x = h5[dataset][0] y = h5[dataset][1] - return StudyRaman.xy2embedding(x,y,xlinspace) + return StudyRaman.xy2embedding(x,y,xlinspace,remove_baseline=remove_baseline,window=window) def to_solr_json(self): _solr = {} diff --git a/src/pynanomapper/clients/service_charisma.py b/src/pynanomapper/clients/service_charisma.py index e8cef10..26dafdb 100644 --- a/src/pynanomapper/clients/service_charisma.py +++ b/src/pynanomapper/clients/service_charisma.py @@ -135,12 +135,16 @@ def dict2figure(pm,figsize): def solrquery_get(self,solr_url, params): headers = {} - headers["Authorization"] = "Bearer {}".format(self.tokenservice.api_key()); + _token = self.tokenservice.api_key() + if _token != None: + headers["Authorization"] = "Bearer {}".format(_token); return requests.get(solr_url, params = params, headers= headers) def solrquery_post(self,solr_url, json): headers = {} - headers["Authorization"] = "Bearer {}".format(self.tokenservice.api_key()); + _token = self.tokenservice.api_key() + if _token != None: + headers["Authorization"] = "Bearer {}".format(_token); return requests.get(solr_url, json = json, headers= headers) def thumbnail(self,solr_url,domain,figsize=(6,4),extraprm=""): @@ -197,14 +201,15 @@ def image(self,domain,dataset="raw",figsize=(6,4),extraprm=""): except Exception as err: return self.empty_figure(figsize,"Error","{}".format(domain.split("/")[-1])) - def knnquery(self,domain,dataset="raw"): + def knnquery(self,domain,dataset="raw",dim=1024): try: with self.File(domain,mode="r") as h5: x = h5[dataset][0] y = h5[dataset][1] - (cdf,pdf) = StudyRaman.h52embedding(h5,dataset="raw",xlinspace = StudyRaman.x4search()) + (cdf,pdf) = StudyRaman.h52embedding(h5,dataset="raw",xlinspace = StudyRaman.x4search(dim=dim)) result_json = {} - result_json["cdf"] = compress(cdf.tolist(),precision=6) + result_json["cdf"] = compress(cdf.tolist(),precision=4) + result_json["pdf"] = compress(pdf.tolist(),precision=4) #return ','.join(map(str, cdf)) try: px = 1/plt.rcParams['figure.dpi'] # pixel in inches diff --git a/src/pynanomapper/datamodel/ambit.py b/src/pynanomapper/datamodel/ambit.py deleted file mode 100644 index f933ea2..0000000 --- a/src/pynanomapper/datamodel/ambit.py +++ /dev/null @@ -1,553 +0,0 @@ -from typing import List, TypeVar, Generic, Any -from pydantic import BaseModel,Field,AnyUrl, create_model, validator, root_validator -from enum import Enum - -import typing -from typing import Dict, Optional, Union -import json -from json import JSONEncoder -import numpy as np -from numpy.typing import NDArray -from .ambit_deco import (add_ambitmodel_method) -import re -import math - #The Optional type is used to indicate that a field can have a value of either the specified type or None. -class AmbitModel(BaseModel): - pass - -class Value(AmbitModel): - unit: Optional[str] = None - loValue: Optional[float] = None - upValue: Optional[float] = None - loQualifier: Optional[str] = None - upQualifier: Optional[str] = None - annotation: Optional[str] = None - errQualifier: Optional[str] = None - errorValue: Optional[float] = None - - @classmethod - def create(cls, loValue: float = None, unit: str = None, **kwargs): - return cls(loValue = loValue, unit = unit, **kwargs) - -class EndpointCategory(AmbitModel): - code: str - term: Optional[str] - title: Optional[str] - -class Protocol(AmbitModel): - topcategory: Optional[str] = None - category: Optional[EndpointCategory] = None - endpoint: Optional[str] = None - guideline: List[str] = None - - def to_json(self): - def protocol_encoder(obj): - if isinstance(obj, EndpointCategory): - return obj.__dict__ - return obj - protocol_dict = self.dict() - return json.dumps(protocol_dict, default=protocol_encoder) - -class EffectResult(AmbitModel): - loQualifier: Optional[str] = None - loValue: Optional[float] = None - upQualifier: Optional[str] = None - upValue: Optional[float] = None - textValue: Optional[str] = None - errQualifier: Optional[str] = None - errorValue: Optional[float] = None - unit: Optional[str] = None - - @classmethod - def create(cls, loValue: float = None, unit: str = None, **kwargs): - return cls(loValue = loValue, unit = unit, **kwargs) - -EffectResult = create_model('EffectResult', __base__=EffectResult) - - -class ValueArray(AmbitModel): - unit: Optional[str] = None - #the arrays can in fact contain strings, we don't need textValue! - values: Union[NDArray, None] = None - errQualifier: Optional[str] = None - errorValue: Optional[Union[NDArray, None]] = None - - class Config: - arbitrary_types_allowed = True - - @classmethod - def create(cls, values : NDArray = None, unit : str = None, errorValue : NDArray = None, errQualifier : str = None ): - return cls(values = values, unit=unit,errorValue = errorValue, errQualifier = errQualifier) - - def to_json(self): - def value_array_encoder(obj): - if isinstance(obj, np.ndarray): - return obj.tolist() - return obj.__dict__ - - return json.dumps(self, default=value_array_encoder) - -class EffectRecord(AmbitModel): - endpoint: str - endpointtype: Optional[str] = None - result: EffectResult = None - conditions: Optional[Dict[str, Union[str, int, float, Value, None]]] = None - idresult: Optional[int] = None - endpointGroup: Optional[int] = None - endpointSynonyms: List[str] = None - sampleID: Optional[str] = None - - @validator('endpoint', pre=True) - def clean_endpoint(cls, v): - if v is None: - return None - else: - return v.replace("/","_") - - @validator('endpointtype', pre=True) - def clean_endpointtype(cls, v): - if v is None: - return None - else: - return v.replace("/","_") - - def addEndpointSynonym(self, endpointSynonym: str): - if self.endpointSynonyms is None: - self.endpointSynonyms = [] - self.endpointSynonyms.append(endpointSynonym) - - def formatSynonyms(self, striplinks: bool) -> str: - if self.endpointSynonyms: - return ", ".join(self.endpointSynonyms) - return "" - - def to_json(self): - def effect_record_encoder(obj): - if isinstance(obj, List): - return [item.__dict__ for item in obj] - return obj - - return json.dumps(self.__dict__, default=effect_record_encoder) - - def to_dict(self): - data = self.dict(exclude_none=True) - if self.result: - data['result'] = self.result.dict() - return data - - class Config: - allow_population_by_field_name = True - - @classmethod - def create(cls, endpoint: str = None, conditions: Dict[str, Union[str, Value, None]] = None, result : EffectResult = None): - if conditions is None: - conditions = {} - return cls(endpoint=endpoint, conditions=conditions, result = result) - - def to_json(self): - def custom_encoder(obj): - if isinstance(obj, BaseModel): - return obj.__dict__ - return obj - - return json.dumps(self, default=custom_encoder) - - - @validator('conditions', pre=True) - def clean_parameters(cls, v): - if v is None: - return {} - conditions = {} - for key, value in v.items(): - if value is None: - continue - new_key = key.replace("/", "_") if "/" in key else key - if value is None: - pass - elif key in ["REPLICATE","EXPERIMENT","BIOLOGICAL_REPLICATE","TECHNICAL_REPLICATE"]: - if isinstance(value, dict): - conditions[new_key] = str(value["loValue"]) - #print(key, type(value),value,conditions[new_key]) - elif isinstance(value, int): - conditions[new_key] = value - elif isinstance(value, float): - print("warning> Float value {}:{}".format(key, value)) - conditions[new_key] = int(value) - raise Exception("warning> Float value {}:{}".format(key, value)) - else: - #this is to extract nuber from e.g. 'Replicate 1' - match = re.search(r'[+-]?\d+(?:\.\d+)?', value) - if match: - conditions[new_key] = match.group() - - else: - conditions[new_key] = value - - return conditions - - @classmethod - def from_dict(cls, data: dict): - if 'conditions' in data: - parameters = data['conditions'] - for key, value in parameters.items(): - if isinstance(value, dict): - parameters[key] = Value(**value) - return cls(**data) - - - -EffectRecord = create_model('EffectRecord', __base__=EffectRecord) - - -class EffectArray(EffectRecord): - signal: ValueArray = None - axes: Optional[Dict[str, ValueArray]] = None - - @classmethod - def create(cls, signal : ValueArray = None, axes : Dict[str, ValueArray] = None ): - return cls(signal = signal, axes = axes) - - class EffectArrayEncoder(JSONEncoder): - def default(self, obj): - if isinstance(obj, ValueArray): - return obj.__dict__ - if isinstance(obj, np.ndarray): - return obj.tolist() - return super().default(obj) - - def to_json(self): - data = self.dict(exclude={'axes', 'signal'}) - data['signal'] = self.signal.__dict__ if self.signal else None - data['axes'] = {key: value.__dict__ for key, value in self.axes.items()} if self.axes else None - return json.dumps(data, cls=self.EffectArrayEncoder) - - def to_dict(self): - data = self.dict(exclude_none=True) - if self.signal: - data['signal'] = self.signal.dict() - if self.axes: - data['axes'] = {key: value.dict() for key, value in self.axes.items()} - return data - -EffectArray = create_model('EffectArray', __base__=EffectArray) - -class ProtocolEffectRecord(EffectRecord): - protocol: Protocol - documentUUID: str - studyResultType: Optional[str] = None - interpretationResult: Optional[str] = None - - -class STRUC_TYPE(str, Enum): - NA = 'NA' - MARKUSH = 'MARKUSH' - D1 = 'SMILES' - D2noH = '2D no H' - D2withH = '2D with H' - D3noH = '3D no H' - D3withH = '3D with H' - optimized = 'optimized' - experimental = 'experimental' - NANO = 'NANO' - PDB = 'PDB' - - - -class ReliabilityParams(AmbitModel): - r_isRobustStudy: Optional[str] = None - r_isUsedforClassification: Optional[str] = None - r_isUsedforMSDS: Optional[str] = None - r_purposeFlag: Optional[str] = None - r_studyResultType: Optional[str] = None - r_value: Optional[str] = None - -class Citation(AmbitModel): - year: Optional[str] = None - title: str - owner: str - @classmethod - def create(cls, owner: str, citation_title: str, year: str = None): - return cls(owner=owner, title=citation_title, year=year) - -Citation = create_model('Citation', __base__=Citation) - -class Company(AmbitModel): - uuid: Optional[str] = None - name: Optional[str] = None - -class Sample(AmbitModel): - uuid: str - -class SampleLink(AmbitModel): - substance: Sample - company: Company = Company(name="Default company") - - @classmethod - def create(cls, sample_uuid: str, sample_provider: str): - return cls(substance=Sample(sample_uuid), company=Company(name=sample_provider)) - - class Config: - allow_population_by_field_name = True - - def to_json(self): - def custom_encoder(obj): - if isinstance(obj, BaseModel): - return obj.__dict__ - return obj - return json.dumps(self, default=custom_encoder) - -SampleLink = create_model('SampleLink', __base__=SampleLink) - -""" - ProtocolApplication : store results for single assay and a single sample - - Args: - papp (ProtocolApplication): The object to be written into nexus format. - - Returns: - protocol: Protocol - effects: List[EffectRecord] - - Examples: - from typing import List - from pynanomapper.datamodel.ambit import EffectRecord, Protocol, EndpointCategory, ProtocolApplication - effect_list: List[EffectRecord] = [] - effect_list.append(EffectRecord(endpoint="Endpoint 1", unit="Unit 1", loValue=5.0)) - effect_list.append(EffectRecord(endpoint="Endpoint 2", unit="Unit 2", loValue=10.0)) - papp = ProtocolApplication(protocol=Protocol(topcategory="P-CHEM",category=EndpointCategory(code="XYZ")),effects=effect_list) - papp -""" -class ProtocolApplication(AmbitModel): - uuid: Optional[str] = None - #reliability: Optional[ReliabilityParams] - interpretationResult: Optional[str] = None - interpretationCriteria: Optional[str] = None - parameters: Optional[Dict[str, Union[str, Value, None]]] = None - citation: Optional[Citation] - effects: List[Union[EffectRecord,EffectArray]] - owner : Optional[SampleLink] = None - protocol: Optional[Protocol] = None - investigation_uuid: Optional[str] = None - assay_uuid: Optional[str] = None - updated: Optional[str] - - class Config: - allow_population_by_field_name = True - - @classmethod - def create(cls, protocol: Protocol = None , effects: List[Union[EffectRecord,EffectArray]] = None,**kwargs): - if protocol is None: - protocol = Protocol() - if effects is None: - effects = [] - return cls(protocol = protocol,effects=effects, **kwargs) - - @validator('parameters', pre=True) - def clean_parameters(cls, v): - if v is None: - return {} - - cleaned_params = {} - for key, value in v.items(): - new_key = key.replace("/", "_") if "/" in key else key - if isinstance(value, dict): - cleaned_params[new_key] = Value(**value) - else: - cleaned_params[new_key] = value - - return cleaned_params - - def to_json(self): - def encode_numpy(obj): - if isinstance(obj, np.ndarray): - return obj.tolist() - raise TypeError(f"Object of type {type(obj)} is not JSON serializable") - - data = self.dict(exclude={'effects'}) - data['effects'] = [effect.dict() for effect in self.effects] - if self.citation: - data['citation'] = self.citation.dict() - if self.parameters: - data['parameters'] = {key: value.dict() for key, value in self.parameters.items()} - if self.owner: - data['owner'] = self.owner.dict() - if self.protocol: - data['protocol'] = self.protocol.dict() - return json.dumps(data, default=encode_numpy, indent=2) - -ProtocolApplication = create_model('ProtocolApplication', __base__=ProtocolApplication) - -# parsed_json["substance"][0] -# s = Study(**sjson) -class Study(AmbitModel): - """ - Example: - # Creating an instance of Substances, with studies - # Parse json retrieved from AMBIT services - from pynanomapper.datamodel.measurements import Study - import requests - url = https://apps.ideaconsult.net/gracious/substance/GRCS-7bd6de68-a312-3254-8b3f-9f46d6976ce6/study?media=application/json - response = requests.get(url) - parsed_json = response.json() - papps = Study(**parsed_json) - for papp in papps: - print(papp) - """ - study: List[ProtocolApplication] - def to_json(self) -> str: - data = {'study': [pa.dict() for pa in self.study]} - return json.dumps(data) - -class ReferenceSubstance(AmbitModel): - i5uuid : Optional[str] = None - uri: Optional[str] = None - -class TypicalProportion(AmbitModel): - precision: Optional[str] = Field(None, regex=r'^\S+$') - value: Optional[float] = None - unit: Optional[str] = Field(None, regex=r'^\S+$') - -class RealProportion(AmbitModel): - lowerPrecision: Optional[str] = None - lowerValue: Optional[float] = None - upperPrecision: Optional[str] = None - upperValue: Optional[float] = None - unit: Optional[str] = Field(None, regex=r'^\S+$') - -class ComponentProportion(AmbitModel): - typical: TypicalProportion - real: RealProportion - function_as_additive: Optional[float] = None - - class Config: - use_enum_values = True - - -class Compound(AmbitModel): - URI: Optional[AnyUrl] = None - structype: Optional[str] = None - metric: Optional[float] = None - name: Optional[str] = None - cas: Optional[str] = None #Field(None, regex=r'^\d{1,7}-\d{2}-\d$') - einecs: Optional[str] = None #Field(None, regex= r'^[A-Za-z0-9/@+=(),:;\[\]{}\-.]+$') - inchikey: Optional[str] = None #Field(None, regex=r'^[A-Z\-]{27}$') - inchi: Optional[str] = None - formula: Optional[str] = None - -class Component(BaseModel): - compound: Compound - values: Dict[str, Any] = None - #facets: list - #bundles: dict - -class CompositionEntry(AmbitModel): - component: Component - compositionUUID: Optional[str] = None - compositionName: Optional[str] = None - relation: Optional[str] = "HAS_COMPONENT" - proportion: Optional[ComponentProportion] = None - hidden: bool = False - -def update_compound_features(composition : List[CompositionEntry], feature): - # Modify the composition based on the feature - for entry in composition: - for key,value in entry.component.values.items(): - if feature[key]["sameAs"] == "http://www.opentox.org/api/1.1#CASRN": - entry.component.compound.cas = value - elif feature[key]["sameAs"] == "http://www.opentox.org/api/1.1#EINECS": - entry.component.compound.einecs = value - elif feature[key]["sameAs"] == "http://www.opentox.org/api/1.1#ChemicalName": - entry.component.compound.name = value - - return composition - -class Composition(AmbitModel): - composition : List[CompositionEntry] = None - feature : dict - - - - @root_validator - def update_composition(cls, values): - composition = values.get('composition') - feature = values.get('feature') - if composition and feature: - values['composition'] = update_compound_features(composition,feature) - return values - -class SubstanceRecord(AmbitModel): - URI : Optional[str] = None - ownerUUID : Optional[str] = None - ownerName : Optional[str] = None - i5uuid : Optional[str] = None - name : str - publicname : Optional[str] = None - format: Optional[str] = None - substanceType: Optional[str] = None - referenceSubstance: Optional[ReferenceSubstance] = None - # composition : List[] - # externalIdentifiers : List[] - study: Optional[List[ProtocolApplication]] = None - composition: Optional[List[CompositionEntry]] = None - def to_json(self): - def substance_record_encoder(obj): - if isinstance(obj, List): - return [item.__dict__ for item in obj] - return obj.__dict__ - - return json.dumps(self, default=substance_record_encoder) - -# s = Substances(**parsed_json) - -class Substances(AmbitModel): - """ - Example: - # Creating an instance of Substances, with studies - # Parse json retrieved from AMBIT services - from pynanomapper.datamodel.measurements import Substances - _p = Substances(**parsed_json) - for substance in _p.substance: - papps = substance.study - for papp in papps: - print(papp.protocol) - print(papp.parameters) - for e in papp.effects: - print(e) - - """ - substance: List[SubstanceRecord] - - def to_json(self): - def substances_encoder(obj): - if isinstance(obj, Substances): - return obj.substance - return obj.__dict__ - - return json.dumps(self, default=substances_encoder) - - -import uuid - -def configure_papp(papp: ProtocolApplication, - provider="My organisation", - sample = "My sample", - sample_provider = "PROJECT", - investigation="My experiment", - year=2024, - prefix="XLSX", - meta =None): - papp.citation = Citation(owner=provider,title=investigation,year=year) - papp.investigation_uuid = str(uuid.uuid5(uuid.NAMESPACE_OID,investigation)) - papp.assay_uuid = str(uuid.uuid5(uuid.NAMESPACE_OID,"{} {}".format(investigation,provider))) - papp.parameters = meta - - papp.uuid = "{}-{}".format(prefix,uuid.uuid5(uuid.NAMESPACE_OID,"{} {} {} {} {} {}".format( - papp.protocol.category, - "" if investigation is None else investigation, - "" if sample_provider is None else sample_provider, - "" if sample is None else sample, - "" if provider is None else provider, - "" if meta is None else str(meta)))) - company=Company(name = sample_provider) - substance = Sample(uuid = "{}-{}".format(prefix,uuid.uuid5(uuid.NAMESPACE_OID,sample))) - papp.owner = SampleLink(substance = substance,company=company) diff --git a/src/pynanomapper/datamodel/ambit_deco.py b/src/pynanomapper/datamodel/ambit_deco.py deleted file mode 100644 index 86c7bd1..0000000 --- a/src/pynanomapper/datamodel/ambit_deco.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env python - -from functools import wraps -from . import ambit - -def add_ambitmodel_method(cls): - def decorator(fun): - @wraps(fun) - def retf(obj, *args, **kwargs): - ret = fun(obj, *args, **kwargs) - return ret - setattr(cls, fun.__name__, retf) - return retf - return decorator diff --git a/src/pynanomapper/datamodel/nexus_parser.py b/src/pynanomapper/datamodel/nexus_parser.py deleted file mode 100644 index e8369f5..0000000 --- a/src/pynanomapper/datamodel/nexus_parser.py +++ /dev/null @@ -1,104 +0,0 @@ -import h5py -import ramanchada2 as rc2 -class NexusParser: - def __init__(self): - self.parsed_objects = {} - - def parse_data(self,entry,default=False,nxprocess=False): - for attr in entry.attrs: - print(attr,entry.attrs.get(attr)) - for name, item in entry.items(): - nx_class = item.attrs.get('NX_class', None) - print("PROCESSED " if nxprocess else "","DATA ",item.name, ' ', nx_class) - - def parse_entry(self,entry,nxprocess=False,dataparser=None): - print(dataparser) - nx_class = entry.attrs.get('NX_class', None) - default = entry.attrs.get('default', None) - #print(entry.name, ' ', nx_class, default) - for name, item in entry.items(): - nx_class = item.attrs.get('NX_class', None) - if nx_class == "NXdata": - if dataparser is None: - self.parse_data(item,entry.name==default,nxprocess) - else: - print("dataparsre",dataparser) - dataparser(item,entry.name==default,nxprocess) - - elif nx_class == "NXenvironment": - pass - elif nx_class == "NXinstrument": - pass - elif nx_class == "NXcite": - pass - elif nx_class == "NXcollection": - pass - elif nx_class == "NXnote": - pass - elif nx_class == "NXsample": - self.parse_sample(item) - else: - print("ENTRY ",item.name, ' ', nx_class) - - def parse_sample(self,group): - nx_class = group.attrs.get('NX_class', None) - if nx_class == "NXsample_component": - pass - else: - print(group.name, ' ', nx_class) - - def parse(self,file_path :str,dataparser=None): - with h5py.File(file_path, 'r') as file: - self.parse_h5(file,dataparser) - - def parse_h5(self,h5_file,dataparser=None): - try: - def iterate_groups(group, indent='',nxprocess = False): - nx_class = group.attrs.get('NX_class', None) - if nx_class == "NXentry" or nx_class == "NXsubentry": - self.parse_entry(group,nxprocess,dataparser) - elif nx_class == "NXsample": - self.parse_sample(group) - - else: - for name, item in group.items(): - nx_class = item.attrs.get('NX_class', None) - if isinstance(item, h5py.Group): - #print(indent + 'Group:', name, ' ', nx_class) - # Recursively call the function for nested groups - iterate_groups(item, indent + ' ',nxprocess or nx_class=="NX_process") - else: - print(indent + 'Dataset:', name, ' ', nx_class) - - # Start the iteration from the root of the file - iterate_groups(h5_file) - except Exception as err: - print(err) - -class SpectrumParser(NexusParser): - def __init__(self): - super().__init__() - # Replace the parent class field with the spectrum-specific field - self.parsed_objects = {} - - def parse_data(self,entry,default=False,nxprocess=False): - - signal = entry.attrs.get('signal', None) - interpretation = entry.attrs.get('interpretation', None) - axes = entry.attrs.get('axes', None) - #print(default,signal,interpretation,axes,isinstance(entry[signal], h5py.Dataset)) - y = entry[signal][:] - for axis in axes: - x = entry[axis][:] - break - spe = rc2.spectrum.Spectrum(x=x,y=y) - self.parsed_objects[str(entry)] = spe - -#spectrum_parser = SpectrumParser() -#spectrum_parser.parse(file_path) - -# Access the spectrum data -#for key in spectrum_parser.parsed_objects: -# spe = spectrum_parser.parsed_objects[key] -# print("Spectrum data", key, spe) -# spe.plot() diff --git a/src/pynanomapper/datamodel/nexus_spectra.py b/src/pynanomapper/datamodel/nexus_spectra.py deleted file mode 100644 index 3136976..0000000 --- a/src/pynanomapper/datamodel/nexus_spectra.py +++ /dev/null @@ -1,90 +0,0 @@ -import ramanchada2 as rc2 -from ramanchada2.misc.types.fit_peaks_result import FitPeaksResult -import matplotlib.pyplot as plt -import pynanomapper.datamodel.ambit as mx -import numpy as np -from typing import Dict, Optional, Union, List -from pynanomapper.datamodel.nexus_writer import to_nexus -import numpy.typing as npt -import json -import nexusformat.nexus.tree as nx -import pprint -import uuid - - -def spe2effect(x: npt.NDArray, y: npt.NDArray, unit="cm-1",endpointtype="RAW_DATA"): - data_dict: Dict[str, mx.ValueArray] = { - 'x': mx.ValueArray(values = x, unit=unit) - } - return mx.EffectArray(endpoint="Raman spectrum",endpointtype=endpointtype, - signal = mx.ValueArray(values = y,unit="count"), - axes = data_dict) - -def configure_papp(papp: mx.ProtocolApplication, - instrument=None, wavelength=None, provider="FNMT", - sample = "PST", - sample_provider = "CHARISMA", - investigation="Round Robin 1", - prefix="CRMA",meta =None): - papp.citation = mx.Citation(owner=provider,title=investigation,year=2022) - papp.investigation_uuid = str(uuid.uuid5(uuid.NAMESPACE_OID,investigation)) - papp.assay_uuid = str(uuid.uuid5(uuid.NAMESPACE_OID,"{} {}".format(investigation,provider))) - papp.parameters = {"E.method" : "Raman spectrometry" , - "wavelength" : wavelength, - "T.instrument_model" : instrument - } - - papp.uuid = "{}-{}".format(prefix,uuid.uuid5(uuid.NAMESPACE_OID,"RAMAN {} {} {} {} {} {}".format( - "" if investigation is None else investigation, - "" if sample_provider is None else sample_provider, - "" if sample is None else sample, - "" if provider is None else provider, - "" if instrument is None else instrument, - "" if wavelength is None else wavelength))) - company=mx.Company(name = sample_provider) - substance = mx.Sample(uuid = "{}-{}".format(prefix,uuid.uuid5(uuid.NAMESPACE_OID,sample))) - papp.owner = mx.SampleLink(substance = substance,company=company) - -def spe2ambit(x: npt.NDArray, y: npt.NDArray, meta: Dict, - instrument=None, wavelength=None, - provider="FNMT", - investigation="Round Robin 1", - sample = "PST", - sample_provider = "CHARISMA", - prefix="CRMA",endpointtype="RAW_DATA", unit="cm-1",papp=None): - - if papp is None: - effect_list: List[Union[mx.EffectRecord,mx.EffectArray]] = [] - effect_list.append(spe2effect(x,y,unit,endpointtype)) - papp = mx.ProtocolApplication(protocol=mx.Protocol(topcategory="P-CHEM", - category=mx.EndpointCategory(code="ANALYTICAL_METHODS_SECTION")), - effects=effect_list) - configure_papp(papp, - instrument=instrument, wavelength=wavelength, provider=provider, - sample = sample, - sample_provider = sample_provider, - investigation=investigation, - prefix=prefix, - meta = meta) - else: - papp.effects.append(spe2effect(x,y,unit,endpointtype)) - return papp - - -def peaks2nxdata(fitres:FitPeaksResult): - df = fitres.to_dataframe_peaks() - nxdata = nx.NXdata() - axes = ["height","center","sigma","beta","fwhm","height"] - for a in axes: - nxdata[a] = nx.NXfield(df[a].values, name=a) - a_err = f"{a}_errors" - nxdata[a_err] = nx.NXfield(df[f"{a}_stderr"].values, name=a_err) - str_array = np.array(['='.encode('ascii', errors='ignore') if (x is None) else x.encode('ascii', errors='ignore') for x in df.index.values]) - nxdata["group_peak"] = nx.NXfield(str_array, name="group_peak") - #nxdata.signal = 'amplitude' - nxdata.attrs['signal'] = "height" - nxdata.attrs["auxiliary_signals"] = ["amplitude","beta","sigma","fwhm"] - nxdata.attrs['axes'] = ["center"] - nxdata.attrs["interpretation"] = "spectrum" - nxdata.attrs["{}_indices".format("center")] = 0 - return nxdata diff --git a/src/pynanomapper/datamodel/nexus_writer.py b/src/pynanomapper/datamodel/nexus_writer.py deleted file mode 100644 index 06070d5..0000000 --- a/src/pynanomapper/datamodel/nexus_writer.py +++ /dev/null @@ -1,664 +0,0 @@ -import numpy as np -from pydantic import validate_arguments - -from . import ambit as mx -from .ambit_deco import add_ambitmodel_method -import nexusformat.nexus as nx -import pandas as pd -import re -import traceback -import numbers -import math -from typing import List - -""" - ProtocolApplication to nexus entry (NXentry) - Tries to follow https://manual.nexusformat.org/rules.html - - Args: - papp (ProtocolApplication): The object to be written into nexus format. - nx_root (nx.NXroot()): Nexus root (or None). - - Returns: - nx_root: Nexus root - - Raises: - Exception: on parse - - Examples: - from pynanomapper.datamodel.nexus_writer import to_nexus - from pynanomapper.datamodel.measurements import ProtocolApplication - pa = ProtocolApplication(**json_data) - import nexusformat.nexus.tree as nx - ne = pa.to_nexus(nx.NXroot()) - print(ne.tree) -""" -@add_ambitmodel_method(mx.ProtocolApplication) -def to_nexus(papp : mx.ProtocolApplication, nx_root: nx.NXroot() = None ) : - - if nx_root == None: - nx_root = nx.NXroot() - - #https://manual.nexusformat.org/classes/base_classes/NXentry.html - try: - if not papp.protocol.topcategory in nx_root: - nx_root[papp.protocol.topcategory] = nx.NXgroup() - if not papp.protocol.category.code in nx_root[papp.protocol.topcategory]: - nx_root[papp.protocol.topcategory][papp.protocol.category.code] = nx.NXgroup() - try: - provider = "" if papp.citation.owner is None else papp.citation.owner.replace("/","_").upper() - except: - provider = "@" - entry_id = "{}/{}/entry_{}_{}".format(papp.protocol.topcategory,papp.protocol.category.code,provider,papp.uuid) - except Exception as err: - #print(err,papp.citation.owner) - entry_id = "entry_{}".format(papp.uuid) - - - if not (entry_id in nx_root): - nx_root[entry_id] = nx.tree.NXentry() - - nx_root['{}/entry_identifier_uuid'.format(entry_id)] = papp.uuid - - nx_root['{}/definition'.format(entry_id)] = papp.__class__.__name__ - nxmap = nx_root['{}/definition'.format(entry_id)] - nxmap.attrs["PROTOCOL_APPLICATION_UUID"]="@entry_identifier_uuid" - nxmap.attrs["INVESTIGATION_UUID"]="@collection_identifier" - nxmap.attrs["ASSAY_UUID"]="@experiment_identifier" - nxmap.attrs["Protocol"]= "experiment_documentation" - nxmap.attrs["Citation"]= "reference" - nxmap.attrs["Substance"]= "sample" - nxmap.attrs["Parameters"]= ["instrument","environment","parameters"] - nxmap.attrs["EffectRecords"] = "datasets" - #experiment_identifier - #experiment_description - #collection_identifier collection of related measurements or experiments. - nx_root['{}/collection_identifier'.format(entry_id)] = papp.investigation_uuid - nx_root['{}/experiment_identifier'.format(entry_id)] = papp.assay_uuid - #collection_description - - #duration - #program_name - #revision - #experiment_documentation (SOP) - #notes - #USER: (optional) NXuser - #SAMPLE: (optional) NXsample - #INSTRUMENT: (optional) NXinstrument - #COLLECTION: (optional) NXcollection - #MONITOR: (optional) NXmonitor - #PARAMETERS: (optional) NXparameters Container for parameters, usually used in processing or analysis. - #PROCESS: (optional) NXprocess - #SUBENTRY: (optional) NXsubentry Group of multiple application definitions for “multi-modal” (e.g. SAXS/WAXS) measurements. - - try: - if not (papp.protocol is None): - docid = '{}/experiment_documentation'.format(entry_id) - if not (docid in nx_root): - nx_root[docid] = nx.NXnote() - experiment_documentation = nx_root[docid] - experiment_documentation["date"] = papp.updated - #category = nx.NXgroup() - #experiment_documentation["category"] = category - experiment_documentation.attrs["topcategory"] = papp.protocol.topcategory - experiment_documentation.attrs["code"] = papp.protocol.category.code - experiment_documentation.attrs["term"] = papp.protocol.category.term - experiment_documentation.attrs["title"] = papp.protocol.category.title - experiment_documentation.attrs["endpoint"] = papp.protocol.endpoint - experiment_documentation.attrs["guideline"] = papp.protocol.guideline - if not papp.parameters is None: - for tag in ["E.method","ASSAY"]: - if tag in papp.parameters: - experiment_documentation.attrs["method"] = papp.parameters[tag] - - - except Exception as err: - raise Exception("ProtocolApplication: protocol parsing error " + str(err)) from err - - try: - citation_id = '{}/reference'.format(entry_id) - if not (citation_id in nx_root): - nx_root[citation_id] = nx.NXcite() - if papp.citation != None: - nx_root[citation_id]["title"] = papp.citation.title - nx_root[citation_id]["year"] = papp.citation.year - nx_root[citation_id]["owner"] = papp.citation.owner - doi = extract_doi(papp.citation.title) - if not doi is None: - nx_root[citation_id]["doi"] = doi - if papp.citation.title.startswith("http"): - nx_root[citation_id]["url"] = papp.citation.title - - #url, doi, description - except Exception as err: - raise Exception("ProtocolApplication: citation data parsing error " + str(err)) from err - - if not "substance" in nx_root: - nx_root["substance"] = nx.NXgroup() - - #now the actual sample - sample_id = '{}/sample'.format(entry_id) - if not sample_id in nx_root: - nx_root['{}/sample'.format(entry_id)] = nx.NXsample() - sample = nx_root['{}/sample'.format(entry_id)] - - if papp.owner != None: - substance_id = 'substance/{}'.format(papp.owner.substance.uuid) - if not substance_id in nx_root: - nx_root[substance_id] = nx.NXsample() - nx_root['{}/sample/substance'.format(entry_id)] = nx.NXlink(substance_id) - - #parameters - if not ('{}/instrument'.format(entry_id) in nx_root): - nx_root['{}/instrument'.format(entry_id)] = nx.NXinstrument() - instrument = nx_root['{}/instrument'.format(entry_id)] - - if not ('{}/parameters'.format(entry_id) in nx_root): - nx_root['{}/parameters'.format(entry_id)] = nx.NXcollection() - parameters = nx_root['{}/parameters'.format(entry_id)] - - if not ('{}/environment'.format(entry_id) in nx_root): - nx_root['{}/environment'.format(entry_id)] = nx.NXenvironment() - environment = nx_root['{}/environment'.format(entry_id)] - - if not (papp.parameters is None): - for prm in papp.parameters: - try: - value = papp.parameters[prm] - target = environment - if "instrument" in prm.lower(): - target = instrument - if "technique" in prm.lower(): - target = instrument - if "wavelength" in prm.lower(): - target = instrument - elif "sample" in prm.lower(): - target = sample - elif "material" in prm.lower(): - target = sample - elif ("ASSAY" == prm.upper()) or ("E.METHOD" == prm.upper()): - print(prm.upper) - target = nx_root[entry_id]["experiment_documentation"] - #continue - elif ("E.SOP_REFERENCE" == prm): - #target = instrument - target = nx_root[entry_id]["experiment_documentation"] - elif ("OPERATOR" == prm): - #target = instrument - target = nx_root[entry_id]["experiment_documentation"] - elif (prm.startswith("T.")): - target = instrument - - if "EXPERIMENT_END_DATE" == prm: - nx_root[entry_id]["end_time"] = value - elif "EXPERIMENT_START_DATE" == prm: - nx_root[entry_id]["start_time"] = value - elif "__input_file" == prm: - nx_root[entry_id]["experiment_documentation"][prm] = value - - elif isinstance(value,str): - target[prm] = nx.NXfield(str(value)) - elif isinstance(value,mx.Value): - #tbd ranges? - target[prm] = nx.NXfield(value.loValue,unit=value.unit) - except Exception as err: - raise Exception("ProtocolApplication: parameters parsing error " + str(err)) from err - - if not (papp.owner is None): - try: - sample["uuid"] = papp.owner.substance.uuid - sample["provider"] = papp.owner.company.name - except Exception as err: - raise Exception("ProtocolApplication owner (sample) parsing error " + str(err)) from err - - try: - process_pa(papp,nx_root[entry_id],nx_root) - except Exception as err: - print("Exception traceback:\n%s", traceback.format_exc()) - raise Exception("ProtocolApplication: effectrecords parsing error " + str(err)) from err - - return nx_root - - -@add_ambitmodel_method(mx.Study) -def to_nexus(study : mx.Study, nx_root: nx.NXroot() = None ): - if nx_root == None: - nx_root = nx.NXroot() - x = 1 - for papp in study.study: - - papp.to_nexus(nx_root); - #x = x+1 - #if x>22: - # print(papp.uuid) - # papp.to_nexus(nx_root) - # break - return nx_root - -""" - SubstanceRecord to nexus entry (NXentry) - - Args: - substance record (SubstanceRecord): The object to be written. - nx_root (nx.NXroot()): Nexus root (or None). - - Returns: - nx_root: Nexus root - - Raises: - Exception: on parse - - Examples: - import pynanomapper.datamodel.measurements as m2n - from pynanomapper.datamodel.nexus_writer import to_nexus - import nexusformat.nexus.tree as nx - substance="GRCS-18f0f0e8-b5f4-39bc-b8f8-9c869c8bd82f" - url = "https://apps.ideaconsult.net/gracious/substance/{}?media=application/json".format(substance) - response = requests.get(url) - sjson = response.json() - nxroot = nx.NXroot() - substances = m2n.Substances(**sjson) - for substance in substances.substance: - url = "{}/composition?media=application/json".format(substance.URI) - response = requests.get(url) - pjson = response.json() - cmp = m2n.Composition(**pjson) - substance.composition = cmp.composition # note the assignment - url = "{}/study?media=application/json".format(substance.URI) - response = requests.get(url) - sjson = response.json() - substance.study = m2n.Study(**sjson).study - try: - ne = substance.to_nexus(nxroot) - except Exception as err: - print(substance.URI) - print(err) - nxroot.save("example.nxs",mode="w") -""" -@add_ambitmodel_method(mx.SubstanceRecord) -def to_nexus(substance : mx.SubstanceRecord, nx_root: nx.NXroot() = None ): - if nx_root == None: - nx_root = nx.NXroot() - - if not "substance" in nx_root: - nx_root["substance"] = nx.NXgroup() - substance_id = 'substance/{}'.format(substance.i5uuid) - if not substance_id in nx_root: - nx_root[substance_id] = nx.NXsample() - nx_root[substance_id].attrs["uuid"] = substance.i5uuid - nx_root[substance_id].name = substance.name - nx_root[substance_id].attrs["publicname"] = substance.publicname - nx_root[substance_id].attrs["substanceType"] = substance.substanceType - nx_root[substance_id].attrs["ownerName"] = substance.ownerName - nx_root[substance_id].attrs["ownerUUID"] = substance.ownerUUID - - if substance.composition != None: - for index,ce in enumerate(substance.composition): - component = nx.NXsample_component() - #name='' cas='' einecs='' inchikey='YVZATJAPAZIWIL-UHFFFAOYSA-M' inchi='InChI=1S/H2O.Zn/h1H2;/q;+1/p-1' formula='HOZn' - component.name = ce.component.compound.name - component.einecs = ce.component.compound.einecs - component.cas = ce.component.compound.cas - component.formula = ce.component.compound.formula - component.inchi = ce.component.compound.inchi - component.inchikey = ce.component.compound.inchikey - component.description = ce.relation - #print(ce.component.values) - #print(ce.proportion) - #print(ce.relation) - nx_root["{}/{}_{}".format(substance_id,ce.relation.replace("HAS_",""),index)] = component - - if not (substance.study is None): - for papp in substance.study: - papp.to_nexus(nx_root); - - return nx_root - -@add_ambitmodel_method(mx.Substances) -def to_nexus(substances : mx.Substances, nx_root: nx.NXroot() = None ): - if nx_root == None: - nx_root = nx.NXroot() - for substance in substances.substance: - substance.to_nexus(nx_root); - return nx_root - -@add_ambitmodel_method(mx.Composition) -def to_nexus(composition : mx.Composition, nx_root: nx.NXroot() = None ): - if nx_root == None: - nx_root = nx.NXroot() - - return nx_root - - -def format_name(meta_dict,key, default = ""): - name = meta_dict[key] if key in meta_dict else default - return name if isinstance(name,str) else default if math.isnan(name) else name - -def nexus_data(selected_columns,group,group_df,condcols,debug=False): - try: - meta_dict = dict(zip(selected_columns, group)) - #print(group_df.columns) - tmp = group_df.dropna(axis=1,how="all") - if debug: - display(tmp) - _interpretation = "scalar" - ds_conc = [] - ds_conditions = [] - ds_response = None - ds_aux = [] - ds_aux_tags = [] - ds_errors = None - _attributes = {} - #for c in ["CONCENTRATION","CONCENTRATION_loValue","CONCENTRATION_SURFACE_loValue","CONCENTRATION_MASS_loValue"]: - # if c in tmp.columns: - # tmp = tmp.sort_values(by=[c]) - # c_tag = c - # c_unittag = "{}_unit".format(c_tag.replace("_loValue","")) - # c_unit = meta_dict[c_unittag] if c_unittag in tmp.columns else "" - # ds_conc.append(nx.tree.NXfield(tmp[c].values, name=c_tag, units=c_unit)) - - - if "loValue" in tmp: - unit = meta_dict["unit"] if "unit" in meta_dict else "" - ds_response = nx.tree.NXfield(tmp["loValue"].values, name=meta_dict["endpoint"], units=unit) - - if "upValue" in tmp: - unit = meta_dict["unit"] if "unit" in meta_dict else "" - name = "{}_upValue".format(meta_dict["endpoint"]) - ds_aux.append(nx.tree.NXfield(tmp["upValue"].values, name= name, units= unit)) - ds_aux_tags.append(name) - - if "errorValue" in tmp: - unit = meta_dict["unit"] if "unit" in meta_dict else "" - ds_errors = nx.tree.NXfield(tmp["errorValue"].values, name="{}_errors".format(meta_dict["endpoint"]), units=unit) - - for tag in ["loQualifier","upQualifier","textValue","errQualifier"]: - if tag in tmp: - vals = tmp[tag].unique() - if len(vals)==1 and (vals[0]=="" or vals[0]=="="): - #skip if all qualifiers are empty or '=' tbd also for nans - continue - if len(vals)==1 and tag != "textValue": - #skip if all qualifiers are empty or '=' tbd also for nans - _attributes[tag] = vals - continue - str_array = np.array(['='.encode('ascii', errors='ignore') if (x is None) else x.encode('ascii', errors='ignore') for x in tmp[tag].values]) - #nxdata.attrs[tag] =str_array - #print(str_array.dtype,str_array) - if ds_response is None and tag == "textValue": - ds_response = nx.tree.NXfield(str_array, name= tag) - else: - ds_aux.append(nx.tree.NXfield(str_array, name= tag)) - ds_aux_tags.append(tag) - - primary_axis = None - for tag in condcols: - if tag in tmp.columns: - if tag in ["REPLICATE","BIOLOGICAL_REPLICATE","TECHNICAL_REPLICATE","EXPERIMENT"]: - unit = None - try: - int_array = np.array([int(x) if isinstance(x,str) and x.isdigit() else np.nan if (x is None) or math.isnan(x) or (not isinstance(x, numbers.Number)) else int(x) for x in tmp[tag].values]) - ds_conditions.append(nx.tree.NXfield(int_array, name= tag)) - except Exception as err: - print(tmp[tag].values) - elif tag in ["MATERIAL","TREATMENT"]: - vals = tmp[tag].unique() - if len(vals)==1: - _attributes[tag] = vals - else: - try: - str_array = np.array(['' if (x is None ) else x.encode('ascii', errors='ignore') for x in tmp[tag].values]) - #add as axis - ds_conditions.append(nx.tree.NXfield(str_array, name= tag)) - except Exception as err_condition: - print(err_condition,tag,tmp[tag].values) - else: - tag_value = "{}_loValue".format(tag) - tag_unit = "{}_unit".format(tag) - if tag_value in tmp.columns: - unit = tmp[tag_unit].unique()[0] if tag_unit in tmp.columns else None - axis = nx.tree.NXfield(tmp[tag_value].values, name=tag, units=unit) - ds_conc.append(axis) - if tag == "CONCENTRATION" or tag == "DOSE" or tag == "AMOUNT_OF_MATERIAL" or tag=="TREATMENT_CONDITION": - primary_axis = tag - _interpretation = "spectrum" - - ds_conc.extend(ds_conditions) - - if len(ds_response)>0: - _interpretation = "spectrum" #means vector - - if len(ds_conc)>0: - nxdata = nx.tree.NXdata(ds_response, ds_conc, errors=ds_errors) - else: - nxdata = nx.tree.NXdata(ds_response, errors=ds_errors) - nxdata.attrs["interpretation"] = _interpretation - - nxdata.name = meta_dict["endpoint"] - _attributes["endpoint"] = meta_dict["endpoint"] - if not primary_axis is None: - nxdata.attrs["{}_indices".format(primary_axis)] = 0 - if "endpointtype" in meta_dict: - _attributes["endpointtype"] = meta_dict["endpointtype"] - - #unit is per axis/signal - #if "unit" in meta_dict and not (meta_dict["unit"] is None): - # nxdata.attrs["unit"] = meta_dict["unit"] - - if len(_attributes) > 0: - nxdata["META"] = nx.tree.NXnote() - for tag in _attributes: - nxdata["META"].attrs[tag] = _attributes[tag] - - - if len(ds_aux) > 0: - for index, a in enumerate(ds_aux_tags): - nxdata[a] = ds_aux[index] - nxdata.attrs["auxiliary_signals"] = ds_aux_tags - if debug: - print(nxdata.tree) - return nxdata,meta_dict - except Exception as err: - print("Exception traceback:\n%s", traceback.format_exc()) - raise Exception("EffectRecords: grouping error {} {} {}".format(selected_columns,group,err)) from err - -def effectarray2data(effect: mx.EffectArray): - - signal = nx.tree.NXfield(effect.signal.values, name=effect.endpoint, units=effect.signal.unit) - axes = [] - for key in effect.axes: - axes.append(nx.tree.NXfield( effect.axes[key].values, name=key, units= effect.axes[key].unit)) - return nx.tree.NXdata(signal,axes) - -def process_pa(pa: mx.ProtocolApplication,entry = nx.tree.NXentry(),nx_root : nx.NXroot = None): - #print(entry.tree) - effectarrays_only : List[mx.EffectArray] = list(filter(lambda item: isinstance(item, mx.EffectArray), pa.effects)) - _default = None - try: - _path = "/substance/{}".format(pa.owner.substance.uuid) - print(_path,nx_root[_path].name) - substance_name = nx_root[_path].name - except Exception as err: - substance_name = '' - - if effectarrays_only: # if we have EffectArray in the pa list - #_endpointtype_groups = {} - index = 0 - for effect in effectarrays_only: - index = index + 1 - _group_key = "DEFAULT" if effect.endpointtype is None else effect.endpointtype - if not _group_key in entry: - if effect.endpointtype == "RAW_DATA": - entry[_group_key] = nx.tree.NXgroup() - else: - entry[_group_key] = nx.tree.NXprocess() - entry[_group_key]["NOTE"] = nx.tree.NXnote() - entry[_group_key]["NOTE"].attrs["description"] = effect.endpointtype - # entry[_group_key] = _endpointtype_groups[_group_key] - - entryid = "{}_{}".format(effect.endpoint,index) - if entryid in entry[_group_key]: - del entry[_group_key][entryid] - print("replacing {}/{}".format(_group_key,entryid)) - - nxdata = effectarray2data(effect) - nxdata.attrs["interpretation"] = "spectrum" - entry[_group_key][entryid] = nxdata - if _default is None: - entry.attrs["default"] = _group_key - nxdata.title = "{} (by {}) {}".format(effect.endpoint,pa.citation.owner,substance_name) - - df_samples,df_controls,resultcols, condcols, df_aggregated = papp2df(pa, _cols=["CONCENTRATION","DOSE","AMOUNT_OF_MATERIAL","TREATMENT_CONDITION"],drop_parsed_cols=True) - - index = 1 - df_titles = ["data","controls","derived"] - for num,df in enumerate([df_samples,df_controls,df_aggregated]): - if df is None: - continue - - grouped_dataframes, selected_columns = group_samplesdf(df, cols_unique = None) - try: - for group, group_df in grouped_dataframes: - try: - - nxdata,meta_dict = nexus_data(selected_columns,group,group_df,condcols) - try: - method = entry["experiment_documentation"].attrs["method"] - except: - method = "" - nxdata.title = "{} ({} by {}) {}".format(meta_dict["endpoint"],method,pa.citation.owner,substance_name) - #print(meta_dict) - - entryid = "{}_{}_{}".format(df_titles[num],index,meta_dict["endpoint"]) - - endpointtype = format_name(meta_dict,"endpointtype","DEFAULT") - nxdata.name = meta_dict["endpoint"] - endpointtype_group = getattr(entry, endpointtype, None) - if endpointtype_group is None: - if endpointtype=="DEFAULT" or endpointtype=="RAW_DATA": - endpointtype_group = nx.tree.NXgroup() - else: - endpointtype_group = nx.tree.NXprocess() - endpointtype_group["NOTE"] = nx.tree.NXnote() - endpointtype_group["NOTE"].attrs["description"] = endpointtype - - endpointtype_group.name = endpointtype - entry[endpointtype] = endpointtype_group - endpointtype_group.attrs["default"] = entryid - - endpointtype_group[entryid] = nxdata - index = index + 1 - - except Exception as xx: - print(traceback.format_exc()) - except Exception as err: - raise Exception("ProtocolApplication: data parsing error {} {}".format(selected_columns,err)) from err - - return entry - - -def effects2df(effects,drop_parsed_cols=True): - # Convert the list of EffectRecord objects to a list of dictionaries - effectrecord_only = list(filter(lambda item: not isinstance(item, mx.EffectArray), effects)) - if not effectrecord_only: #empty - return (None,None, None,None) - effect_records_dicts = [er.dict() for er in effectrecord_only] - # Convert the list of dictionaries to a DataFrame - df = pd.DataFrame(effect_records_dicts) - _tag= "conditions" - conditions_df = pd.DataFrame(df[_tag].tolist()) - # Drop the original 'conditions' column from the main DataFrame - if drop_parsed_cols: - df.drop(columns=[_tag], inplace=True) - _tag= "result" - result_df = pd.DataFrame(df[_tag].tolist()) - if drop_parsed_cols: - df.drop(columns=[_tag], inplace=True) - # Concatenate the main DataFrame and the result and conditions DataFrame - return (pd.concat([df, result_df, conditions_df], axis=1),df.columns, result_df.columns, conditions_df.columns) - - -def papp_mash(df, dfcols, condcols, drop_parsed_cols=True): - for _col in condcols: - df_normalized = pd.json_normalize(df[_col]) - df_normalized = df_normalized.add_prefix(df[_col].name + '_') - #print(_col,df.shape,df_normalized.shape) - for col in df_normalized.columns: - df.loc[:, col] = df_normalized[col] - #if there are non dict values, leave the column, otherwise drop it, we have the values parsed - if drop_parsed_cols and df[_col].apply(lambda x: isinstance(x, dict)).all(): - df.drop(columns=[_col], inplace=True) - #print(_col,df.shape,df_normalized.shape,df_c.shape) - #break - df.dropna(axis=1,how="all",inplace=True) - #df.dropna(axis=0,how="all",inplace=True) - return df - -# from pynanomapper.datamodel.measurements import ProtocolApplication -# pa = ProtocolApplication(**json_data) -# from pynanomapper.datamodel import measurements2nexus as m2n -# df_samples, df_controls = m2n.papp2df(pa, _col="CONCENTRATION") -def papp2df(pa: mx.ProtocolApplication, _cols=["CONCENTRATION"],drop_parsed_cols=True): - df, dfcols,resultcols, condcols = effects2df(pa.effects,drop_parsed_cols) - #display(df) - if df is None: - return None,None,None,None,None - - df_samples = None - df_controls = None - df_aggregated = None - for _col in _cols: - if _col in condcols: - df_samples = df.loc[df[_col].apply(lambda x: isinstance(x, dict))] - df_controls = df.loc[df[_col].apply(lambda x: isinstance(x, str))] - #we can have aggregated values with NaN in concentraiton columns - df_aggregated = df.loc[df[_col].isna()] - break - if df_samples is None: - df_samples = df - df_controls = None - #df_string.dropna(axis=1,how="all",inplace=True) - df_samples = papp_mash(df_samples.reset_index(drop=True), dfcols, condcols,drop_parsed_cols) - if not (df_controls is None): - cols_to_process = [col for col in condcols if col !=_col] - df_controls = papp_mash(df_controls.reset_index(drop=True), dfcols, cols_to_process,drop_parsed_cols) - - if not (df_aggregated is None): - cols_to_process = [col for col in condcols if col !=_col] - df_aggregated = papp_mash(df_aggregated.reset_index(drop=True), dfcols, cols_to_process,drop_parsed_cols) - - return df_samples,df_controls,resultcols, condcols, df_aggregated - - -# -# def cb(selected_columns,group,group_df): -# display(group_df) -# grouped_dataframes = m2n.group_samplesdf(df_samples,callback=cb) -def group_samplesdf(df_samples, cols_unique=None,callback=None,_pattern = r'CONCENTRATION_.*loValue$'): - if cols_unique is None: - _pattern_c_unit = r'^CONCENTRATION.*_unit$' - #selected_columns = [col for col in df_samples.columns if col not in ["loValue","upValue","loQualifier","upQualifier","errQualifier","errorValue","textValue","REPLICATE","EXPERIMENT"] and not bool(re.match(_pattern, col))] - - selected_columns = [col for col in df_samples.columns if col in ["endpoint","endpointtype","unit"] or bool(re.match(_pattern_c_unit, col))] - - else: - selected_columns = [col for col in cols_unique if col in df_samples.columns] - #dropna is to include missing values - try: - grouped_dataframes = df_samples.groupby(selected_columns,dropna=False) - except Exception as err: - raise Exception("group_samplesdf: {} {}".format(selected_columns,err)) from err - if callback != None: - for group, group_df in grouped_dataframes: - callback(selected_columns,group,group_df) - return grouped_dataframes,selected_columns - -def extract_doi(input_str): - # Regular expression pattern to match DOI - doi_pattern = r"(10\.\d{4,}(?:\.\d+)*\/\S+)" - # Search for the DOI pattern in the input string - match = re.search(doi_pattern, input_str) - if match: - return match.group(1) # Return the matched DOI - else: - return None # Return None if DOI not found diff --git a/src/pynanomapper/datamodel/templates/blueprint.py b/src/pynanomapper/datamodel/templates/blueprint.py index dba478b..738b999 100644 --- a/src/pynanomapper/datamodel/templates/blueprint.py +++ b/src/pynanomapper/datamodel/templates/blueprint.py @@ -40,8 +40,8 @@ def json2frame(json_data,sortby=None): def get_method_metadata(json_blueprint): _header = { - "Project Work Package" : json_blueprint.get("provenance_project",""), - "Partner conducting test/assay" : json_blueprint.get("provenance_workpackage",""), + "Project Work Package" : json_blueprint.get("provenance_workpackage",""), + "Partner conducting test/assay" : json_blueprint.get("provenance_provider",""), "Test facility - Laboratory name" : json_blueprint.get("provenance_provider",""), "Lead Scientist & contact for test" : json_blueprint.get("provenance_contact",""), "Assay/Test work conducted by" : json_blueprint.get("provenance_operator",""), @@ -458,6 +458,8 @@ def iom_format_2excel(file_path, df_info,df_result,df_raw=None,df_conditions=Non workbook = writer.book worksheet = workbook.add_worksheet(_sheet) worksheet.set_column(1, 1, 20) + + #writer.sheets[_sheet] cell_format_def = { "group" : {'bg_color': _colors['grey'], 'font_color' : 'blue', 'text_wrap': True, 'bold': True}, diff --git a/tests/resources/teer.json b/tests/resources/templates/teer.json similarity index 100% rename from tests/resources/teer.json rename to tests/resources/templates/teer.json diff --git a/tests/test_basic_imports.py b/tests/test_basic_imports.py index 601d048..361ae86 100644 --- a/tests/test_basic_imports.py +++ b/tests/test_basic_imports.py @@ -5,4 +5,4 @@ def test_import(): from pynanomapper import client_ambit from pynanomapper import client_solr from pynanomapper import units - from pynanomapper.datamodel import ambit + from pyambit import datamodel