diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..11f38e7 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,6 @@ +bazel-bin +bazel-out +bazel-cpp +bazel-python +bazel-testlogs + diff --git a/src/BUILD b/src/BUILD deleted file mode 100644 index b28b04f..0000000 --- a/src/BUILD +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/src/ab.sh b/src/ab.sh index a49ab89..4da5b7b 100755 --- a/src/ab.sh +++ b/src/ab.sh @@ -1,3 +1,3 @@ #!/bin/bash -rm -f ../bin/* && ../scripts/bazel_build.sh && ../bin/attention_broker 37007 +rm -f ./bin/* && ./scripts/build.sh && ./bin/attention_broker_service 37007 diff --git a/src/.bazelversion b/src/cpp/.bazelversion similarity index 100% rename from src/.bazelversion rename to src/cpp/.bazelversion diff --git a/src/cpp/BUILD b/src/cpp/BUILD index f5b0d85..1c7feec 100644 --- a/src/cpp/BUILD +++ b/src/cpp/BUILD @@ -2,61 +2,61 @@ cc_binary( name = "attention_broker_service", srcs = [], defines = ["BAZEL_BUILD"], + linkstatic = 1, deps = [ - "//cpp/main:attention_broker_main_lib", - "//cpp/utils:utils_lib", - "@com_github_singnet_das_proto//:attention_broker_cc_grpc", + "//main:attention_broker_main_lib", + "//utils:utils_lib", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", + "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@mbedcrypto//:lib", ], - linkstatic = 1 ) cc_binary( name = "query_broker", srcs = [], defines = ["BAZEL_BUILD"], + linkstatic = 1, deps = [ - "//cpp/main:query_engine_main_lib", - "//cpp/utils:utils_lib", + "//main:query_engine_main_lib", + "//utils:utils_lib", "@mbedcrypto//:lib", ], - linkstatic = 1 ) cc_binary( name = "query", srcs = [], defines = ["BAZEL_BUILD"], + linkstatic = 1, deps = [ - "//cpp/main:query_client_main_lib", - "//cpp/utils:utils_lib", + "//main:query_client_main_lib", + "//utils:utils_lib", "@mbedcrypto//:lib", ], - linkstatic = 1 ) cc_binary( name = "link_creation_engine", srcs = [], defines = ["BAZEL_BUILD"], + linkstatic = 1, deps = [ - "//cpp/main:link_creation_engine_main_lib", - "//cpp/utils:utils_lib", + "//main:link_creation_engine_main_lib", + "//utils:utils_lib", "@mbedcrypto//:lib", ], - linkstatic = 1 ) cc_binary( name = "word_query", srcs = [], defines = ["BAZEL_BUILD"], + linkstatic = 1, deps = [ - "//cpp/main:word_query_main_lib", - "//cpp/utils:utils_lib", + "//main:word_query_main_lib", + "//utils:utils_lib", "@mbedcrypto//:lib", ], - linkstatic = 1 ) diff --git a/src/MODULE.bazel b/src/cpp/MODULE.bazel similarity index 100% rename from src/MODULE.bazel rename to src/cpp/MODULE.bazel diff --git a/src/MODULE.bazel.lock b/src/cpp/MODULE.bazel.lock similarity index 100% rename from src/MODULE.bazel.lock rename to src/cpp/MODULE.bazel.lock diff --git a/src/WORKSPACE b/src/cpp/WORKSPACE similarity index 94% rename from src/WORKSPACE rename to src/cpp/WORKSPACE index 3e79a40..864218c 100644 --- a/src/WORKSPACE +++ b/src/cpp/WORKSPACE @@ -1,3 +1,5 @@ +workspace(name = "rules_cpp_das") + load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") http_archive( @@ -19,24 +21,27 @@ http_archive( ) load("@com_github_grpc_grpc//bazel:grpc_deps.bzl", "grpc_deps") + grpc_deps() + load("@com_github_grpc_grpc//bazel:grpc_extra_deps.bzl", "grpc_extra_deps") + grpc_extra_deps() http_archive( name = "com_github_google_googletest", strip_prefix = "googletest-1.14.0", - urls = ["https://github.com/google/googletest/archive/refs/tags/v1.14.0.tar.gz"] + urls = ["https://github.com/google/googletest/archive/refs/tags/v1.14.0.tar.gz"], ) new_local_repository( name = "mbedcrypto", - path = "/opt/3rd-party/mbedcrypto", build_file_content = '\ cc_library(\ name = "lib",\ srcs = ["libmbedcrypto.a"],\ visibility = ["//visibility:public"],\ )\ -' +', + path = "/opt/3rd-party/mbedcrypto", ) diff --git a/src/cpp/attention_broker/BUILD b/src/cpp/attention_broker/BUILD index a7aee45..4d40640 100644 --- a/src/cpp/attention_broker/BUILD +++ b/src/cpp/attention_broker/BUILD @@ -6,8 +6,8 @@ cc_library( hdrs = glob(["*.h"]), includes = ["."], deps = [ - "//cpp/utils:utils_lib", - "//cpp/hasher:hasher_lib", + "//utils:utils_lib", + "//hasher:hasher_lib", "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", diff --git a/src/cpp/main/BUILD b/src/cpp/main/BUILD index 20ba8e0..a798561 100644 --- a/src/cpp/main/BUILD +++ b/src/cpp/main/BUILD @@ -5,7 +5,7 @@ cc_library( srcs = ["attention_broker_main.cc"], hdrs = glob(["*.h"]), deps = [ - "//cpp/attention_broker:attention_broker_server_lib", + "//attention_broker:attention_broker_server_lib", "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", @@ -17,7 +17,7 @@ cc_library( srcs = ["query_engine_main.cc"], hdrs = glob(["*.h"]), deps = [ - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], ) @@ -26,7 +26,7 @@ cc_library( srcs = ["query_client_main.cc"], hdrs = glob(["*.h"]), deps = [ - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], ) @@ -35,7 +35,7 @@ cc_library( srcs = ["link_creation_engine_main.cc"], hdrs = glob(["*.h"]), deps = [ - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], ) @@ -44,6 +44,6 @@ cc_library( srcs = ["word_query_main.cc"], hdrs = glob(["*.h"]), deps = [ - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], ) diff --git a/src/cpp/query_engine/BUILD b/src/cpp/query_engine/BUILD index 107238a..f0e2871 100644 --- a/src/cpp/query_engine/BUILD +++ b/src/cpp/query_engine/BUILD @@ -6,9 +6,9 @@ cc_library( hdrs = glob(["*.h", "query_element/*.h"]), includes = [".", "query_element"], deps = [ - "//cpp/utils:utils_lib", - "//cpp/hasher:hasher_lib", - "//cpp/attention_broker:attention_broker_server_lib", + "//utils:utils_lib", + "//hasher:hasher_lib", + "//attention_broker:attention_broker_server_lib", "@com_github_singnet_das_node//:atomspacenode", ], linkopts = [ diff --git a/src/cpp/tests/BUILD b/src/cpp/tests/BUILD index 03b76ca..396b534 100644 --- a/src/cpp/tests/BUILD +++ b/src/cpp/tests/BUILD @@ -11,7 +11,7 @@ cc_test( "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", - "//cpp/attention_broker:attention_broker_server_lib", + "//attention_broker:attention_broker_server_lib", "@mbedcrypto//:lib", ], linkstatic = 1 @@ -30,7 +30,7 @@ cc_test( "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", - "//cpp/attention_broker:attention_broker_server_lib", + "//attention_broker:attention_broker_server_lib", "@mbedcrypto//:lib", ], linkstatic = 1 @@ -49,8 +49,8 @@ cc_test( "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", - "//cpp/attention_broker:attention_broker_server_lib", - "//cpp/utils:utils_lib", + "//attention_broker:attention_broker_server_lib", + "//utils:utils_lib", "@mbedcrypto//:lib", ], linkstatic = 1 @@ -69,7 +69,7 @@ cc_test( "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", - "//cpp/attention_broker:attention_broker_server_lib", + "//attention_broker:attention_broker_server_lib", "@mbedcrypto//:lib", ], linkstatic = 1 @@ -88,7 +88,7 @@ cc_test( "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", - "//cpp/attention_broker:attention_broker_server_lib", + "//attention_broker:attention_broker_server_lib", "@mbedcrypto//:lib", ], linkstatic = 1 @@ -107,8 +107,8 @@ cc_test( "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", - "//cpp/attention_broker:attention_broker_server_lib", - "//cpp/utils:utils_lib", + "//attention_broker:attention_broker_server_lib", + "//utils:utils_lib", "@mbedcrypto//:lib", ], linkstatic = 1 @@ -127,7 +127,7 @@ cc_test( "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", - "//cpp/attention_broker:attention_broker_server_lib", + "//attention_broker:attention_broker_server_lib", "@mbedcrypto//:lib", ], linkstatic = 1 @@ -146,7 +146,7 @@ cc_test( "@com_github_singnet_das_proto//:attention_broker_cc_grpc", "@com_github_grpc_grpc//:grpc++", "@com_github_grpc_grpc//:grpc++_reflection", - "//cpp/attention_broker:attention_broker_server_lib", + "//attention_broker:attention_broker_server_lib", "@mbedcrypto//:lib", ], linkstatic = 1 @@ -162,7 +162,7 @@ cc_test( ], deps = [ "@com_github_google_googletest//:gtest_main", - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], linkopts = [ #"-L/opt/3rd-party/mbedcrypto", @@ -186,7 +186,7 @@ cc_test( ], deps = [ "@com_github_google_googletest//:gtest_main", - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], linkopts = [ "-lmbedcrypto", @@ -209,7 +209,7 @@ cc_test( ], deps = [ "@com_github_google_googletest//:gtest_main", - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], linkopts = [ ], @@ -226,7 +226,7 @@ cc_test( ], deps = [ "@com_github_google_googletest//:gtest_main", - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], linkopts = [ #"-L/opt/3rd-party/mbedcrypto", @@ -250,7 +250,7 @@ cc_test( ], deps = [ "@com_github_google_googletest//:gtest_main", - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], linkopts = [ #"-L/opt/3rd-party/mbedcrypto", @@ -274,7 +274,7 @@ cc_test( ], deps = [ "@com_github_google_googletest//:gtest_main", - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], linkopts = [ "-lmbedcrypto", @@ -297,7 +297,7 @@ cc_test( ], deps = [ "@com_github_google_googletest//:gtest_main", - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], linkopts = [ "-lmbedcrypto", @@ -320,7 +320,7 @@ cc_test( ], deps = [ "@com_github_google_googletest//:gtest_main", - "//cpp/query_engine:query_engine_lib", + "//query_engine:query_engine_lib", ], linkstatic = 1 ) diff --git a/src/python/BUILD b/src/python/BUILD new file mode 100644 index 0000000..ffd0fb0 --- /dev/null +++ b/src/python/BUILD @@ -0,0 +1 @@ +package(default_visibility = ["//visibility:public"]) diff --git a/src/python/MODULE.bazel b/src/python/MODULE.bazel new file mode 100644 index 0000000..00bb183 --- /dev/null +++ b/src/python/MODULE.bazel @@ -0,0 +1,6 @@ +############################################################################### +# Bazel now uses Bzlmod by default to manage external dependencies. +# Please consider migrating your external dependencies from WORKSPACE to MODULE.bazel. +# +# For more details, please check https://github.com/bazelbuild/bazel/issues/18958 +############################################################################### diff --git a/src/python/MODULE.bazel.lock b/src/python/MODULE.bazel.lock new file mode 100644 index 0000000..edb9181 --- /dev/null +++ b/src/python/MODULE.bazel.lock @@ -0,0 +1,160 @@ +{ + "lockFileVersion": 16, + "registryFileHashes": { + "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497", + "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2", + "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/MODULE.bazel": "70390338f7a5106231d20620712f7cccb659cd0e9d073d1991c038eb9fc57589", + "https://bcr.bazel.build/modules/abseil-cpp/20230125.1/MODULE.bazel": "89047429cb0207707b2dface14ba7f8df85273d484c2572755be4bab7ce9c3a0", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0.bcr.1/MODULE.bazel": "1c8cec495288dccd14fdae6e3f95f772c1c91857047a098fad772034264cc8cb", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.0/MODULE.bazel": "d253ae36a8bd9ee3c5955384096ccb6baf16a1b1e93e858370da0a3b94f77c16", + "https://bcr.bazel.build/modules/abseil-cpp/20230802.1/MODULE.bazel": "fa92e2eb41a04df73cdabeec37107316f7e5272650f81d6cc096418fe647b915", + "https://bcr.bazel.build/modules/abseil-cpp/20240116.1/MODULE.bazel": "37bcdb4440fbb61df6a1c296ae01b327f19e9bb521f9b8e26ec854b6f97309ed", + "https://bcr.bazel.build/modules/abseil-cpp/20240116.1/source.json": "9be551b8d4e3ef76875c0d744b5d6a504a27e3ae67bc6b28f46415fd2d2957da", + "https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd", + "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8", + "https://bcr.bazel.build/modules/bazel_features/1.15.0/MODULE.bazel": "d38ff6e517149dc509406aca0db3ad1efdd890a85e049585b7234d04238e2a4d", + "https://bcr.bazel.build/modules/bazel_features/1.17.0/MODULE.bazel": "039de32d21b816b47bd42c778e0454217e9c9caac4a3cf8e15c7231ee3ddee4d", + "https://bcr.bazel.build/modules/bazel_features/1.18.0/MODULE.bazel": "1be0ae2557ab3a72a57aeb31b29be347bcdc5d2b1eb1e70f39e3851a7e97041a", + "https://bcr.bazel.build/modules/bazel_features/1.19.0/MODULE.bazel": "59adcdf28230d220f0067b1f435b8537dd033bfff8db21335ef9217919c7fb58", + "https://bcr.bazel.build/modules/bazel_features/1.21.0/MODULE.bazel": "675642261665d8eea09989aa3b8afb5c37627f1be178382c320d1b46afba5e3b", + "https://bcr.bazel.build/modules/bazel_features/1.21.0/source.json": "3e8379efaaef53ce35b7b8ba419df829315a880cb0a030e5bb45c96d6d5ecb5f", + "https://bcr.bazel.build/modules/bazel_features/1.4.1/MODULE.bazel": "e45b6bb2350aff3e442ae1111c555e27eac1d915e77775f6fdc4b351b758b5d7", + "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a", + "https://bcr.bazel.build/modules/bazel_skylib/1.0.3/MODULE.bazel": "bcb0fd896384802d1ad283b4e4eb4d718eebd8cb820b0a2c3a347fb971afd9d8", + "https://bcr.bazel.build/modules/bazel_skylib/1.1.1/MODULE.bazel": "1add3e7d93ff2e6998f9e118022c84d163917d912f5afafb3058e3d2f1545b5e", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.0/MODULE.bazel": "44fe84260e454ed94ad326352a698422dbe372b21a1ac9f3eab76eb531223686", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a", + "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5", + "https://bcr.bazel.build/modules/bazel_skylib/1.4.1/MODULE.bazel": "a0dcb779424be33100dcae821e9e27e4f2901d9dfd5333efe5ac6a8d7ab75e1d", + "https://bcr.bazel.build/modules/bazel_skylib/1.4.2/MODULE.bazel": "3bd40978e7a1fac911d5989e6b09d8f64921865a45822d8b09e815eaa726a651", + "https://bcr.bazel.build/modules/bazel_skylib/1.5.0/MODULE.bazel": "32880f5e2945ce6a03d1fbd588e9198c0a959bb42297b2cfaf1685b7bc32e138", + "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917", + "https://bcr.bazel.build/modules/bazel_skylib/1.7.0/MODULE.bazel": "0db596f4563de7938de764cc8deeabec291f55e8ec15299718b93c4423e9796d", + "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b", + "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/source.json": "f121b43eeefc7c29efbd51b83d08631e2347297c95aac9764a701f2a6a2bb953", + "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", + "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8", + "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb", + "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4", + "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6", + "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/source.json": "41e9e129f80d8c8bf103a7acc337b76e54fad1214ac0a7084bf24f4cd924b8b4", + "https://bcr.bazel.build/modules/googletest/1.14.0/MODULE.bazel": "cfbcbf3e6eac06ef9d85900f64424708cc08687d1b527f0ef65aa7517af8118f", + "https://bcr.bazel.build/modules/jsoncpp/1.9.5/MODULE.bazel": "31271aedc59e815656f5736f282bb7509a97c7ecb43e927ac1a37966e0578075", + "https://bcr.bazel.build/modules/jsoncpp/1.9.5/source.json": "4108ee5085dd2885a341c7fab149429db457b3169b86eb081fa245eadf69169d", + "https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902", + "https://bcr.bazel.build/modules/platforms/0.0.10/MODULE.bazel": "8cb8efaf200bdeb2150d93e162c40f388529a25852b332cec879373771e48ed5", + "https://bcr.bazel.build/modules/platforms/0.0.10/source.json": "f22828ff4cf021a6b577f1bf6341cb9dcd7965092a439f64fc1bb3b7a5ae4bd5", + "https://bcr.bazel.build/modules/platforms/0.0.4/MODULE.bazel": "9b328e31ee156f53f3c416a64f8491f7eb731742655a47c9eec4703a71644aee", + "https://bcr.bazel.build/modules/platforms/0.0.5/MODULE.bazel": "5733b54ea419d5eaf7997054bb55f6a1d0b5ff8aedf0176fef9eea44f3acda37", + "https://bcr.bazel.build/modules/platforms/0.0.6/MODULE.bazel": "ad6eeef431dc52aefd2d77ed20a4b353f8ebf0f4ecdd26a807d2da5aa8cd0615", + "https://bcr.bazel.build/modules/platforms/0.0.7/MODULE.bazel": "72fd4a0ede9ee5c021f6a8dd92b503e089f46c227ba2813ff183b71616034814", + "https://bcr.bazel.build/modules/platforms/0.0.8/MODULE.bazel": "9f142c03e348f6d263719f5074b21ef3adf0b139ee4c5133e2aa35664da9eb2d", + "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7", + "https://bcr.bazel.build/modules/protobuf/27.0/MODULE.bazel": "7873b60be88844a0a1d8f80b9d5d20cfbd8495a689b8763e76c6372998d3f64c", + "https://bcr.bazel.build/modules/protobuf/27.1/MODULE.bazel": "703a7b614728bb06647f965264967a8ef1c39e09e8f167b3ca0bb1fd80449c0d", + "https://bcr.bazel.build/modules/protobuf/29.0-rc2/MODULE.bazel": "6241d35983510143049943fc0d57937937122baf1b287862f9dc8590fc4c37df", + "https://bcr.bazel.build/modules/protobuf/29.0/MODULE.bazel": "319dc8bf4c679ff87e71b1ccfb5a6e90a6dbc4693501d471f48662ac46d04e4e", + "https://bcr.bazel.build/modules/protobuf/29.0/source.json": "b857f93c796750eef95f0d61ee378f3420d00ee1dd38627b27193aa482f4f981", + "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0", + "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/MODULE.bazel": "88af1c246226d87e65be78ed49ecd1e6f5e98648558c14ce99176da041dc378e", + "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/source.json": "be4789e951dd5301282729fe3d4938995dc4c1a81c2ff150afc9f1b0504c6022", + "https://bcr.bazel.build/modules/re2/2023-09-01/MODULE.bazel": "cb3d511531b16cfc78a225a9e2136007a48cf8a677e4264baeab57fe78a80206", + "https://bcr.bazel.build/modules/re2/2023-09-01/source.json": "e044ce89c2883cd957a2969a43e79f7752f9656f6b20050b62f90ede21ec6eb4", + "https://bcr.bazel.build/modules/rules_android/0.1.1/MODULE.bazel": "48809ab0091b07ad0182defb787c4c5328bd3a278938415c00a7b69b50c4d3a8", + "https://bcr.bazel.build/modules/rules_android/0.1.1/source.json": "e6986b41626ee10bdc864937ffb6d6bf275bb5b9c65120e6137d56e6331f089e", + "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647", + "https://bcr.bazel.build/modules/rules_cc/0.0.10/MODULE.bazel": "ec1705118f7eaedd6e118508d3d26deba2a4e76476ada7e0e3965211be012002", + "https://bcr.bazel.build/modules/rules_cc/0.0.13/MODULE.bazel": "0e8529ed7b323dad0775ff924d2ae5af7640b23553dfcd4d34344c7e7a867191", + "https://bcr.bazel.build/modules/rules_cc/0.0.14/MODULE.bazel": "5e343a3aac88b8d7af3b1b6d2093b55c347b8eefc2e7d1442f7a02dc8fea48ac", + "https://bcr.bazel.build/modules/rules_cc/0.0.15/MODULE.bazel": "6704c35f7b4a72502ee81f61bf88706b54f06b3cbe5558ac17e2e14666cd5dcc", + "https://bcr.bazel.build/modules/rules_cc/0.0.16/MODULE.bazel": "7661303b8fc1b4d7f532e54e9d6565771fea666fbdf839e0a86affcd02defe87", + "https://bcr.bazel.build/modules/rules_cc/0.0.16/source.json": "227e83737046aa4f50015da48e98e0d8ab42fd0ec74d8d653b6cc9f9a357f200", + "https://bcr.bazel.build/modules/rules_cc/0.0.2/MODULE.bazel": "6915987c90970493ab97393024c156ea8fb9f3bea953b2f3ec05c34f19b5695c", + "https://bcr.bazel.build/modules/rules_cc/0.0.6/MODULE.bazel": "abf360251023dfe3efcef65ab9d56beefa8394d4176dd29529750e1c57eaa33f", + "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e", + "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5", + "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6", + "https://bcr.bazel.build/modules/rules_fuzzing/0.5.2/MODULE.bazel": "40c97d1144356f52905566c55811f13b299453a14ac7769dfba2ac38192337a8", + "https://bcr.bazel.build/modules/rules_fuzzing/0.5.2/source.json": "c8b1e2c717646f1702290959a3302a178fb639d987ab61d548105019f11e527e", + "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74", + "https://bcr.bazel.build/modules/rules_java/5.3.5/MODULE.bazel": "a4ec4f2db570171e3e5eb753276ee4b389bae16b96207e9d3230895c99644b86", + "https://bcr.bazel.build/modules/rules_java/6.0.0/MODULE.bazel": "8a43b7df601a7ec1af61d79345c17b31ea1fedc6711fd4abfd013ea612978e39", + "https://bcr.bazel.build/modules/rules_java/6.4.0/MODULE.bazel": "e986a9fe25aeaa84ac17ca093ef13a4637f6107375f64667a15999f77db6c8f6", + "https://bcr.bazel.build/modules/rules_java/6.5.2/MODULE.bazel": "1d440d262d0e08453fa0c4d8f699ba81609ed0e9a9a0f02cd10b3e7942e61e31", + "https://bcr.bazel.build/modules/rules_java/7.10.0/MODULE.bazel": "530c3beb3067e870561739f1144329a21c851ff771cd752a49e06e3dc9c2e71a", + "https://bcr.bazel.build/modules/rules_java/7.12.2/MODULE.bazel": "579c505165ee757a4280ef83cda0150eea193eed3bef50b1004ba88b99da6de6", + "https://bcr.bazel.build/modules/rules_java/7.2.0/MODULE.bazel": "06c0334c9be61e6cef2c8c84a7800cef502063269a5af25ceb100b192453d4ab", + "https://bcr.bazel.build/modules/rules_java/7.3.2/MODULE.bazel": "50dece891cfdf1741ea230d001aa9c14398062f2b7c066470accace78e412bc2", + "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe", + "https://bcr.bazel.build/modules/rules_java/8.6.1/MODULE.bazel": "f4808e2ab5b0197f094cabce9f4b006a27766beb6a9975931da07099560ca9c2", + "https://bcr.bazel.build/modules/rules_java/8.6.1/source.json": "f18d9ad3c4c54945bf422ad584fa6c5ca5b3116ff55a5b1bc77e5c1210be5960", + "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7", + "https://bcr.bazel.build/modules/rules_jvm_external/5.1/MODULE.bazel": "33f6f999e03183f7d088c9be518a63467dfd0be94a11d0055fe2d210f89aa909", + "https://bcr.bazel.build/modules/rules_jvm_external/5.2/MODULE.bazel": "d9351ba35217ad0de03816ef3ed63f89d411349353077348a45348b096615036", + "https://bcr.bazel.build/modules/rules_jvm_external/5.3/MODULE.bazel": "bf93870767689637164657731849fb887ad086739bd5d360d90007a581d5527d", + "https://bcr.bazel.build/modules/rules_jvm_external/6.1/MODULE.bazel": "75b5fec090dbd46cf9b7d8ea08cf84a0472d92ba3585b476f44c326eda8059c4", + "https://bcr.bazel.build/modules/rules_jvm_external/6.3/MODULE.bazel": "c998e060b85f71e00de5ec552019347c8bca255062c990ac02d051bb80a38df0", + "https://bcr.bazel.build/modules/rules_jvm_external/6.3/source.json": "6f5f5a5a4419ae4e37c35a5bb0a6ae657ed40b7abc5a5189111b47fcebe43197", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.0/MODULE.bazel": "ef85697305025e5a61f395d4eaede272a5393cee479ace6686dba707de804d59", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/MODULE.bazel": "d269a01a18ee74d0335450b10f62c9ed81f2321d7958a2934e44272fe82dcef3", + "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/source.json": "2faa4794364282db7c06600b7e5e34867a564ae91bda7cae7c29c64e9466b7d5", + "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0", + "https://bcr.bazel.build/modules/rules_license/0.0.7/MODULE.bazel": "088fbeb0b6a419005b89cf93fe62d9517c0a2b8bb56af3244af65ecfe37e7d5d", + "https://bcr.bazel.build/modules/rules_license/1.0.0/MODULE.bazel": "a7fda60eefdf3d8c827262ba499957e4df06f659330bbe6cdbdb975b768bb65c", + "https://bcr.bazel.build/modules/rules_license/1.0.0/source.json": "a52c89e54cc311196e478f8382df91c15f7a2bfdf4c6cd0e2675cc2ff0b56efb", + "https://bcr.bazel.build/modules/rules_pkg/0.7.0/MODULE.bazel": "df99f03fc7934a4737122518bb87e667e62d780b610910f0447665a7e2be62dc", + "https://bcr.bazel.build/modules/rules_pkg/1.0.1/MODULE.bazel": "5b1df97dbc29623bccdf2b0dcd0f5cb08e2f2c9050aab1092fd39a41e82686ff", + "https://bcr.bazel.build/modules/rules_pkg/1.0.1/source.json": "bd82e5d7b9ce2d31e380dd9f50c111d678c3bdaca190cb76b0e1c71b05e1ba8a", + "https://bcr.bazel.build/modules/rules_proto/4.0.0/MODULE.bazel": "a7a7b6ce9bee418c1a760b3d84f83a299ad6952f9903c67f19e4edd964894e06", + "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7", + "https://bcr.bazel.build/modules/rules_proto/6.0.2/MODULE.bazel": "ce916b775a62b90b61888052a416ccdda405212b6aaeb39522f7dc53431a5e73", + "https://bcr.bazel.build/modules/rules_proto/7.0.2/MODULE.bazel": "bf81793bd6d2ad89a37a40693e56c61b0ee30f7a7fdbaf3eabbf5f39de47dea2", + "https://bcr.bazel.build/modules/rules_proto/7.0.2/source.json": "1e5e7260ae32ef4f2b52fd1d0de8d03b606a44c91b694d2f1afb1d3b28a48ce1", + "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f", + "https://bcr.bazel.build/modules/rules_python/0.23.1/MODULE.bazel": "49ffccf0511cb8414de28321f5fcf2a31312b47c40cc21577144b7447f2bf300", + "https://bcr.bazel.build/modules/rules_python/0.25.0/MODULE.bazel": "72f1506841c920a1afec76975b35312410eea3aa7b63267436bfb1dd91d2d382", + "https://bcr.bazel.build/modules/rules_python/0.28.0/MODULE.bazel": "cba2573d870babc976664a912539b320cbaa7114cd3e8f053c720171cde331ed", + "https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58", + "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c", + "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7", + "https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320", + "https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c", + "https://bcr.bazel.build/modules/rules_shell/0.2.0/source.json": "7f27af3c28037d9701487c4744b5448d26537cc66cdef0d8df7ae85411f8de95", + "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", + "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c", + "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef", + "https://bcr.bazel.build/modules/stardoc/0.7.0/MODULE.bazel": "05e3d6d30c099b6770e97da986c53bd31844d7f13d41412480ea265ac9e8079c", + "https://bcr.bazel.build/modules/stardoc/0.7.1/MODULE.bazel": "3548faea4ee5dda5580f9af150e79d0f6aea934fc60c1cc50f4efdd9420759e7", + "https://bcr.bazel.build/modules/stardoc/0.7.1/source.json": "b6500ffcd7b48cd72c29bb67bcac781e12701cc0d6d55d266a652583cfcdab01", + "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43", + "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0", + "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.3/MODULE.bazel": "af322bc08976524477c79d1e45e241b6efbeb918c497e8840b8ab116802dda79", + "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.3/source.json": "2be409ac3c7601245958cd4fcdff4288be79ed23bd690b4b951f500d54ee6e7d", + "https://bcr.bazel.build/modules/zlib/1.3.1/MODULE.bazel": "751c9940dcfe869f5f7274e1295422a34623555916eb98c174c1e945594bf198" + }, + "selectedYankedVersions": {}, + "moduleExtensions": { + "@@rules_java+//java:rules_java_deps.bzl%compatibility_proxy": { + "general": { + "bzlTransitiveDigest": "84xJEZ1jnXXwo8BXMprvBm++rRt4jsTu9liBxz0ivps=", + "usagesDigest": "jTQDdLDxsS43zuRmg1faAjIEPWdLAbDAowI1pInQSoo=", + "recordedFileInputs": {}, + "recordedDirentsInputs": {}, + "envVariables": {}, + "generatedRepoSpecs": { + "compatibility_proxy": { + "repoRuleId": "@@rules_java+//java:rules_java_deps.bzl%_compatibility_proxy_repo_rule", + "attributes": {} + } + }, + "recordedRepoMappingEntries": [ + [ + "rules_java+", + "bazel_tools", + "bazel_tools" + ] + ] + } + } + } +} diff --git a/src/python/WORKSPACE b/src/python/WORKSPACE new file mode 100644 index 0000000..36f4ee6 --- /dev/null +++ b/src/python/WORKSPACE @@ -0,0 +1,34 @@ +workspace(name = "rules_python_das") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "rules_python", + sha256 = "4f7e2aa1eb9aa722d96498f5ef514f426c1f55161c3c9ae628c857a7128ceb07", + strip_prefix = "rules_python-1.0.0", + url = "https://github.com/bazelbuild/rules_python/releases/download/1.0.0/rules_python-1.0.0.tar.gz", +) + +load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains") + +py_repositories() + +python_register_toolchains( + name = "python_3_10", + python_version = "3.10", +) + +load("@rules_python//python:pip.bzl", "pip_parse") + +# Create a central repo that knows about the dependencies needed from +# requirements_lock.txt. +pip_parse( + name = "pypi", + requirements_lock = "//deps:requirements_lock.txt", +) + +# Load the starlark macro, which will define your dependencies. +load("@pypi//:requirements.bzl", "install_deps") + +# Call it to define repos for your requirements. +install_deps() diff --git a/src/python/deps/BUILD b/src/python/deps/BUILD new file mode 100644 index 0000000..28fe011 --- /dev/null +++ b/src/python/deps/BUILD @@ -0,0 +1,11 @@ +load("@rules_python//python:pip.bzl", "compile_pip_requirements") + +package(default_visibility = ["//visibility:public"]) + +compile_pip_requirements( + name = "requirements", + src = "requirements.in", + generate_hashes = False, + requirements_txt = "requirements_lock.txt", + visibility = ["//visibility:public"], +) diff --git a/src/python/deps/requirements.in b/src/python/deps/requirements.in new file mode 100644 index 0000000..839ea75 --- /dev/null +++ b/src/python/deps/requirements.in @@ -0,0 +1,6 @@ +redis>=5.0.0,<6.0.0 +pymongo>=4.5.0,<4.6.0 +python-dotenv>=1.0.0,<2.0.0 +mongomock>=4.1.2,<5.0.0 +hyperon-das-atomdb-cpp==0.0.2 + diff --git a/src/python/deps/requirements_lock.txt b/src/python/deps/requirements_lock.txt new file mode 100644 index 0000000..45b2f23 --- /dev/null +++ b/src/python/deps/requirements_lock.txt @@ -0,0 +1,26 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# bazel run //deps:requirements.update +# +async-timeout==5.0.1 + # via redis +dnspython==2.7.0 + # via pymongo +hyperon-das-atomdb-cpp==0.0.2 + # via -r deps/requirements.in +mongomock==4.3.0 + # via -r deps/requirements.in +packaging==24.2 + # via mongomock +pymongo==4.5.0 + # via -r deps/requirements.in +python-dotenv==1.0.1 + # via -r deps/requirements.in +pytz==2024.2 + # via mongomock +redis==5.2.1 + # via -r deps/requirements.in +sentinels==1.0.0 + # via mongomock diff --git a/src/python/hyperon_das_atomdb/BUILD b/src/python/hyperon_das_atomdb/BUILD new file mode 100644 index 0000000..d60aca1 --- /dev/null +++ b/src/python/hyperon_das_atomdb/BUILD @@ -0,0 +1,84 @@ +load("@pypi//:requirements.bzl", "requirement") +load("@rules_python//python:defs.bzl", "py_library") +load("@rules_python//python:packaging.bzl", "py_wheel") + +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "py_files", + srcs = glob(["*.py"]), +) + +py_library( + name = "hyperon_das_atomdb", + srcs = [ + ":py_files", + "//hyperon_das_atomdb/adapters:py_files", + "//hyperon_das_atomdb/utils:py_files", + ], + deps = [ + ":database", + ":exceptions", + ":index", + ":logger", + "//hyperon_das_atomdb/adapters:all_adapters", + "//hyperon_das_atomdb/utils:expression_hasher", + "//hyperon_das_atomdb/utils:settings", + requirement("hyperon_das_atomdb_cpp"), + ], +) + +py_wheel( + name = "hyperon_das_atomdb_wheel", + abi = "none", + author = "Andre Senna", + author_email = "andre@singularitynet.io", + classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + ], + description_content_type = "text/markdown", + description_file = "README.md", + distribution = "hyperon_das_atomdb", + platform = "any", + python_requires = ">=3.10", + python_tag = "py3", + requires_file = "//deps:requirements_lock.txt", + stamp = 1, + summary = "Persistence layer for Distributed AtomSpace", + version = "$(ATOMDB_VERSION)", # must be defined when calling `bazel build` with `--define=ATOMDB_VERSION=` + deps = [":hyperon_das_atomdb"], +) + +py_library( + name = "database", + srcs = ["database.py"], + deps = [ + requirement("hyperon_das_atomdb_cpp"), + ], +) + +py_library( + name = "exceptions", + srcs = ["exceptions.py"], + deps = [ + requirement("hyperon_das_atomdb_cpp"), + ], +) + +py_library( + name = "index", + srcs = ["index.py"], + deps = [ + "//hyperon_das_atomdb/utils:expression_hasher", + ], +) + +py_library( + name = "logger", + srcs = ["logger.py"], + deps = [], +) diff --git a/src/python/hyperon_das_atomdb/README.md b/src/python/hyperon_das_atomdb/README.md new file mode 100644 index 0000000..4e41ffa --- /dev/null +++ b/src/python/hyperon_das_atomdb/README.md @@ -0,0 +1,97 @@ +# Hyperon DAS AtomDB + +Persistence layer for Distributed AtomSpace + +## Installation + +This package requires: +[python](https://www.python.org/) >= 3.10 to run. + +#### This package is deployed on [Pypi](https://pypi.org/project/hyperon-das/). If you want, you can install using the pip command + +``` +pip install hyperon-das-atomdb +``` + +#### If you want to run it without installing it by pip, you can follow the following approach + +We use the [Poetry](https://python-poetry.org/) package to manage project dependencies and other things. So, if you have Poetry on your machine, you can run the commands below to prepare your environment + +**1. poetry install** + +**2. poetry shell** (activate virtual environment) + +## Environment Variables + +You must have the following variables set in your environment with their respective values: + +``` +DAS_MONGODB_HOSTNAME=172.17.0.2 +DAS_MONGODB_PORT=27017 +DAS_MONGODB_USERNAME=mongo +DAS_MONGODB_PASSWORD=mongo +DAS_MONGODB_TLS_CA_FILE=global-bundle.pem [optional] +DAS_REDIS_HOSTNAME=127.0.0.1 +DAS_REDIS_PORT=6379 +DAS_REDIS_USERNAME=admin [optional] +DAS_REDIS_PASSWORD=admin [optional] +DAS_USE_REDIS_CLUSTER=false [default: true] +DAS_USE_REDIS_SSL=false [default: true] +``` + +## Usage + +**1 - Redis and MongoDB** + +- You must have Redis and MongoDB running in your environment +- To initialize the databases you must pass the parameters with the necessary values. Otherwise, default values will be used. See below which parameters it is possible to pass and their respective default values: + +```python +from hyperon_das_atomdb.adapters import RedisMongoDB + +redis_mongo_db = RedisMongoDB( + mongo_hostname='localhost', + mongo_port=27017, + mongo_username='mongo', + mongo_password='mongo', + mongo_tls_ca_file=None, + redis_hostname='localhost', + redis_port=6379, + redis_username=None, + redis_password=None, + redis_cluster=True, + redis_ssl=True, +) +``` + +**2 - In Memory DB** + +```python +from hyperon_das_atomdb.adapters import InMemoryDB + +in_memory_db = InMemoryDB() +``` + +## Pre-Commit Setup + +Before pushing your changes, it's recommended to set up pre-commit to run automated tests locally. Run the following command (needs to be done once): + +```bash +pre-commit install +``` + +## Tests + +You can ran the command below to execute the unit tests + +```bash +make unit-tests +``` + +## Documentation References + +[Repositories documentation](https://docs.google.com/document/d/1njmP_oXw_0FLwoXY5ttGBMFGV2n60-ugAltWIuoQO10/) + +## Release Notes + +[DAS AtomDB Releases](https://github.com/singnet/das-atom-db/releases) diff --git a/src/python/hyperon_das_atomdb/__init__.py b/src/python/hyperon_das_atomdb/__init__.py new file mode 100644 index 0000000..9fb9b3b --- /dev/null +++ b/src/python/hyperon_das_atomdb/__init__.py @@ -0,0 +1,19 @@ +"""This module initializes the AtomDB package and imports key components.""" + +# pylint: disable=wrong-import-position + +import sys + +if sys.version_info < (3, 10): + raise RuntimeError("hyperon-das-atomdb requires Python 3.10 or higher") + +from .database import WILDCARD, AtomDB +from .exceptions import AtomDoesNotExist + +__all__ = [ + "AtomDB", + "WILDCARD", + "AtomDoesNotExist", +] + +__version__ = '0.8.10' diff --git a/src/python/hyperon_das_atomdb/adapters/BUILD b/src/python/hyperon_das_atomdb/adapters/BUILD new file mode 100644 index 0000000..bbc77ab --- /dev/null +++ b/src/python/hyperon_das_atomdb/adapters/BUILD @@ -0,0 +1,40 @@ +load("@pypi//:requirements.bzl", "requirement") +load("@rules_python//python:defs.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "py_files", + srcs = glob(["*.py"]), +) + +py_library( + name = "all_adapters", + srcs = [":py_files"], + deps = [ + ":ram_only", + ":redis_mongo_db", + ], +) + +py_library( + name = "ram_only", + srcs = ["ram_only.py"], + deps = [ + requirement("hyperon_das_atomdb_cpp"), + ], +) + +py_library( + name = "redis_mongo_db", + srcs = ["redis_mongo_db.py"], + deps = [ + "//hyperon_das_atomdb:database", + "//hyperon_das_atomdb:exceptions", + "//hyperon_das_atomdb:index", + "//hyperon_das_atomdb:logger", + "//hyperon_das_atomdb/utils:expression_hasher", + requirement("pymongo"), + requirement("redis"), + ], +) diff --git a/src/python/hyperon_das_atomdb/adapters/__init__.py b/src/python/hyperon_das_atomdb/adapters/__init__.py new file mode 100644 index 0000000..668157f --- /dev/null +++ b/src/python/hyperon_das_atomdb/adapters/__init__.py @@ -0,0 +1,6 @@ +"""This module imports the InMemoryDB and RedisMongoDB classes and defines the public API.""" + +from .ram_only import InMemoryDB +from .redis_mongo_db import RedisMongoDB + +__all__ = ["RedisMongoDB", "InMemoryDB"] diff --git a/src/python/hyperon_das_atomdb/adapters/ram_only.py b/src/python/hyperon_das_atomdb/adapters/ram_only.py new file mode 100644 index 0000000..2ee2520 --- /dev/null +++ b/src/python/hyperon_das_atomdb/adapters/ram_only.py @@ -0,0 +1,3 @@ +from hyperon_das_atomdb_cpp.adapters import InMemoryDB + +__all__ = ["InMemoryDB"] diff --git a/src/python/hyperon_das_atomdb/adapters/redis_mongo_db.py b/src/python/hyperon_das_atomdb/adapters/redis_mongo_db.py new file mode 100644 index 0000000..da56c10 --- /dev/null +++ b/src/python/hyperon_das_atomdb/adapters/redis_mongo_db.py @@ -0,0 +1,1549 @@ +""" +This module provides a concrete implementation of the AtomDB using Redis and MongoDB. + +It includes classes and methods for managing nodes and links, handling database connections, +and performing various database operations such as creating indexes, retrieving documents, +and updating indexes. The module integrates with MongoDB for persistent storage and Redis +for caching and fast access to frequently used data. +""" + +import base64 +import collections +import itertools +import pickle +import re +import sys +from copy import deepcopy +from enum import Enum +from typing import Any, Iterable, Mapping, Optional, OrderedDict, TypeAlias + +from pymongo import ASCENDING, MongoClient +from pymongo import errors as pymongo_errors +from pymongo.collection import Collection +from pymongo.database import Database +from redis import Redis +from redis.cluster import RedisCluster + +from hyperon_das_atomdb.database import ( + WILDCARD, + AtomDB, + AtomT, + FieldIndexType, + FieldNames, + HandleListT, + HandleSetT, + LinkT, + NodeT, +) +from hyperon_das_atomdb.exceptions import ( + AtomDoesNotExist, + ConnectionMongoDBException, + InvalidOperationException, +) +from hyperon_das_atomdb.index import Index +from hyperon_das_atomdb.logger import logger +from hyperon_das_atomdb.utils.expression_hasher import ExpressionHasher + +# pylint: disable=invalid-name +# Type aliases +DocumentT: TypeAlias = dict[str, Any] +# pylint: enable=invalid-name + + +def _build_redis_key(prefix: str, key: str | list[Any]) -> str: + """ + Build a Redis key by concatenating the given prefix and key with a colon separator. + + Args: + prefix (str): The prefix to be used in the Redis key. + key (str): The key to be concatenated with the prefix. + + Returns: + str: The concatenated Redis key. + """ + return prefix + ":" + str(key) + + +class MongoCollectionNames(str, Enum): + """Enum for MongoDB collection names used in the AtomDB.""" + + ATOMS = "atoms" + ATOM_TYPES = "atom_types" + DAS_CONFIG = "das_config" + + +class KeyPrefix(str, Enum): + """Enum for key prefixes used in Redis.""" + + INCOMING_SET = "incoming_set" + OUTGOING_SET = "outgoing_set" + PATTERNS = "patterns" + TEMPLATES = "templates" + NAMED_ENTITIES = "names" + CUSTOM_INDEXES = "custom_indexes" + + +class MongoIndexType(str, Enum): + """Enum for MongoDB index types.""" + + FIELD = "field" + COMPOUND = "compound" + TEXT = "text" + + +class _HashableDocument: + """Class for making documents hashable.""" + + def __init__(self, base: DocumentT): + self.base = base + + def __hash__(self) -> int: + return hash(self.base["_id"]) + + def __str__(self) -> str: + return str(self.base) + + +class MongoDBIndex(Index): + """Class for managing MongoDB indexes.""" + + def __init__(self, collection: Collection) -> None: + """ + Initialize the NodeDocuments class with a MongoDB collection. + + Args: + collection (Collection): The MongoDB collection to manage node documents. + """ + self.collection = collection + + def create( + self, + atom_type: str, + fields: list[str], + index_type: Optional[MongoIndexType] = None, + **kwargs, + ) -> tuple[str, Any]: + conditionals = {} + if fields is None or len(fields) == 0: + raise ValueError("Fields can not be empty or None") + + if kwargs: + key, value = next(iter(kwargs.items())) # only one key-value pair + conditionals = {key: {"$eq": value}} + + index_id = f"{atom_type}_{self.generate_index_id(','.join(fields), conditionals)}" + ( + f"_{index_type.value}" if index_type else "" + ) + idx_type: MongoIndexType = index_type or ( + MongoIndexType.COMPOUND if len(fields) > 1 else MongoIndexType.FIELD + ) + index_props = { + "index_type": idx_type, + "conditionals": conditionals, + "index_name": index_id, + "fields": fields, + } + + index_conditionals: dict[str, Any] = {"name": index_id} + + if conditionals: + index_conditionals["partialFilterExpression"] = index_props["conditionals"] + + index_list: list[tuple[str, Any]] + if idx_type == MongoIndexType.TEXT: + index_list = [(f, "text") for f in fields] + else: + index_list = [(f, ASCENDING) for f in fields] # store the index in ascending order + + if not self.index_exists(index_id): + return ( + self.collection.create_index(index_list, **index_conditionals), + index_props, + ) + else: + return index_id, index_props + + def index_exists(self, index_id: str) -> bool: + indexes = self.collection.list_indexes() + index_ids = [index.get("name") for index in indexes] + return index_id in index_ids + + +class RedisMongoDB(AtomDB): + """A concrete implementation using Redis and Mongo database""" + + mongo_db: Database + + def __repr__(self) -> str: + return "" # pragma no cover + + def __init__(self, **kwargs: Optional[dict[str, Any]]) -> None: + """Initialize an instance of a custom class with Redis and MongoDB connections.""" + super().__init__() + self.database_name = "das" + self.max_pos_size_custom_index_template = 4 + + self._setup_databases(**kwargs) + + self.mongo_atoms_collection = self.mongo_db.get_collection(MongoCollectionNames.ATOMS) + self.mongo_types_collection = self.mongo_db.get_collection(MongoCollectionNames.ATOM_TYPES) + self.all_mongo_collections = [ + (MongoCollectionNames.ATOMS, self.mongo_atoms_collection), + (MongoCollectionNames.ATOM_TYPES, self.mongo_types_collection), + ] + self.pattern_index_templates: list[dict[str, Any]] | None = None + self.pattern_templates: list | None = None + self.mongo_das_config_collection: Collection | None = None + if MongoCollectionNames.DAS_CONFIG not in self.mongo_db.list_collection_names(): + self.mongo_db.create_collection(MongoCollectionNames.DAS_CONFIG) + + self.mongo_das_config_collection = self.mongo_db.get_collection( + MongoCollectionNames.DAS_CONFIG + ) + self._setup_indexes(kwargs) + self.wildcard_hash = ExpressionHasher.compute_hash(WILDCARD) + self.typedef_mark_hash = ExpressionHasher.compute_hash(":") + self.typedef_base_type_hash = ExpressionHasher.compute_hash("Type") + + self.named_type_hash: dict[str, str] = {} + self.hash_length = len(self.typedef_base_type_hash) + self.typedef_composite_type_hash = ExpressionHasher.composite_hash( + [ + self.typedef_mark_hash, + self.typedef_base_type_hash, + self.typedef_base_type_hash, + ] + ) + self.mongo_bulk_insertion_buffer: dict[ + MongoCollectionNames, + tuple[Collection[Mapping[str, Any]], set[_HashableDocument]], + ] = { + collection_name: (collection, set()) + for collection_name, collection in self.all_mongo_collections + } + self.mongo_bulk_insertion_limit = 100000 + self.max_mongo_db_document_size = 16000000 + logger().info("Database setup finished") + + def _setup_databases(self, **kwargs) -> None: + """ + Set up connections to MongoDB and Redis databases with the provided parameters. + + Args: + **kwargs: Additional keyword arguments for database configuration, including: + - mongo_hostname (str) : The hostname for the MongoDB server. + Defaults to 'localhost'. + - mongo_port (int) : The port number for the MongoDB server. + Defaults to 27017. + - mongo_username (str) : The username for MongoDB authentication. + Defaults to 'mongo'. + - mongo_password (str) : The password for MongoDB authentication. + Defaults to 'mongo'. + - mongo_tls_ca_file (str): The path to the TLS CA file for MongoDB. + Defaults to None. + - redis_hostname (str) : The hostname for the Redis server. + Defaults to 'localhost'. + - redis_port (int) : The port number for the Redis server. + Defaults to 6379. + - redis_username (str) : The username for Redis authentication. + Defaults to None. + - redis_password (str) : The password for Redis authentication. + Defaults to None. + - redis_cluster (bool) : Whether to use Redis in cluster mode. + Defaults to True. + - redis_ssl (bool) : Whether to use SSL for Redis connection. + Defaults to True. + + Raises: + ConnectionMongoDBException: If there is an error connecting to the MongoDB server. + ConnectionRedisException: If there is an error connecting to the Redis server. + """ + mongo_hostname: str = kwargs.get("mongo_hostname", "localhost") + mongo_port: int = kwargs.get("mongo_port", 27017) + mongo_username: str = kwargs.get("mongo_username", "mongo") + mongo_password: str = kwargs.get("mongo_password", "mongo") + mongo_tls_ca_file: str | None = kwargs.get("mongo_tls_ca_file", None) + redis_hostname: str = kwargs.get("redis_hostname", "localhost") + redis_port: int = kwargs.get("redis_port", 6379) + redis_username: str | None = kwargs.get("redis_username", None) + redis_password: str | None = kwargs.get("redis_password", None) + redis_cluster: bool = kwargs.get("redis_cluster", True) + redis_ssl: bool = kwargs.get("redis_ssl", True) + + self.mongo_db = self._connection_mongo_db( + mongo_hostname, + mongo_port, + mongo_username, + mongo_password, + mongo_tls_ca_file, + ) + self.redis = self._connection_redis( + redis_hostname, + redis_port, + redis_username, + redis_password, + redis_cluster, + redis_ssl, + ) + + def _connection_mongo_db( + self, + mongo_hostname: str, + mongo_port: int, + mongo_username: str, + mongo_password: str, + mongo_tls_ca_file: str | None, + ) -> Database: + """ + Establish a connection to the MongoDB database using the provided parameters. + + This method constructs a MongoDB connection URL using the provided hostname, port, + username, password, and optional TLS CA file. It attempts to connect to the MongoDB + database and returns the connected database instance. If an error occurs during the + connection, it logs the error and raises a ConnectionMongoDBException. + + Args: + mongo_hostname (str): The hostname for the MongoDB server. + mongo_port (int): The port number for the MongoDB server. + mongo_username (str): The username for MongoDB authentication. + mongo_password (str): The password for MongoDB authentication. + mongo_tls_ca_file (str | None): The path to the TLS CA file for MongoDB. Defaults to None. + + Returns: + Database: The connected MongoDB database instance. + + Raises: + ConnectionMongoDBException: If there is an error creating the MongoDB client. + """ + mongo_url = f"mongodb://{mongo_username}:{mongo_password}@{mongo_hostname}:{mongo_port}" + if mongo_tls_ca_file: # aws + mongo_url += ( + f"?tls=true&tlsCAFile={mongo_tls_ca_file}" + "&replicaSet=rs0&readPreference=secondaryPreferred&retryWrites=false" + ) + logger().info(f"Connecting to MongoDB at {mongo_url}") + try: + self.mongo_db = MongoClient(mongo_url)[self.database_name] + return self.mongo_db + except ValueError as e: + logger().error(f"An error occurred while creating a MongoDB client - Details: {str(e)}") + raise ConnectionMongoDBException(message="error creating a MongoClient", details=str(e)) + + @staticmethod + def _connection_redis( + redis_hostname: str, + redis_port: int, + redis_username: str | None, + redis_password: str | None, + redis_cluster: bool = False, + redis_ssl: bool = False, + ) -> Redis | RedisCluster: + """ + Establish a connection to the Redis database using the provided parameters. + + Args: + redis_hostname (str): The hostname for the Redis server. + redis_port (int): The port number for the Redis server. + redis_username (str | None): The username for Redis authentication. + redis_password (str | None): The password for Redis authentication. + redis_cluster (bool): Whether to use Redis in cluster mode. Defaults to False. + redis_ssl (bool): Whether to use SSL for Redis connection. Defaults to False. + + Returns: + Redis | RedisCluster: The connected Redis or RedisCluster instance. + """ + redis_type = "Redis cluster" if redis_cluster else "Standalone Redis" + + message = ( + f"Connecting to {redis_type} at " + + ( + f"{redis_username}:{len(redis_password)*'*'}@" + if redis_username and redis_password + else "" + ) + + f"{redis_hostname}:{redis_port}. ssl: {redis_ssl}" + ) + + logger().info(message) + + redis_connection = { + "host": redis_hostname, + "port": redis_port, + "decode_responses": True, + "ssl": redis_ssl, + } + + if redis_password and redis_username: + redis_connection["password"] = redis_password + redis_connection["username"] = redis_username + + if redis_cluster: + return RedisCluster(**redis_connection) # type: ignore + else: + return Redis(**redis_connection) # type: ignore + + def _validate_index_templates(self, templates): + validator = { + "field": r"(named_type|targets\[\d+\])", + "value": r".+", + "positions": r"\[(\d+(,\s*\d+)*)?\]", + "arity": r"[0-9]+", + } + for template in templates: + if not isinstance(template, dict): + raise ValueError("Templates values must be a dict") + for k in template.keys(): + if re.search(validator[k], str(template[k])) is None: + raise ValueError(f"Value '{template[k]}' is not supported in '{k}'.") + + def _save_pattern_index(self, pattern_index): + self._validate_index_templates(pattern_index) + if self.mongo_das_config_collection is not None: + self.mongo_das_config_collection.replace_one( + {"_id": "pattern_index_templates"}, + {"_id": "pattern_index_templates", "templates": pattern_index}, + upsert=True, + ) + self.pattern_templates = pattern_index + + def _load_pattern_index(self, options: dict[str, Any] | None) -> None: + """ + This method initializes the default pattern index templates based on various + combinations of named type and selected positions. It retrieves the custom + pattern index templates from the collection the DAS_CONFIG collection in the + MongoDB database if exists. Otherwise, it sets the default pattern index templates + and save it in the MongoDB database. + + Args: + options (dict | None): Dict containing the key 'pattern_index_templates', a list of + templates. + + """ + default_templates = [ + {"field": "named_type", "value": "*", "positions": [0, 1, 2], "arity": 3} + ] + user_templates = None + found = None + if options is not None: + user_templates = options.get("pattern_index_templates", None) + + if self.mongo_das_config_collection is not None: + found = self.mongo_das_config_collection.find_one({"_id": "pattern_index_templates"}) + + if found is not None: + self.pattern_templates = found.get("templates", None) if found else None + if self.pattern_templates is not None: + if user_templates is not None and user_templates != self.pattern_templates: + raise ValueError( + "'pattern_index_templates' value doesn't match with found on database" + ) + else: + self._save_pattern_index(user_templates if user_templates else default_templates) + + def _validate_template_index_and_get_position(self, template): + if not isinstance(template["arity"], int) or template["arity"] < 0: + raise ValueError("'arity' must be an integer greater than or equal to zero.") + + if len(template["positions"]) > self.max_pos_size_custom_index_template: + raise ValueError( + f"'positions' array should be less than {self.max_pos_size_custom_index_template}." + ) + + if any(pos >= template["arity"] for pos in template["positions"]): + raise ValueError("'positions' parameter must be in range of the arity.") + + if template["field"] != "named_type" and ( + found := re.search(r"targets\[(\d+)]", template["field"]) + ): + target_pos = int(found.group(1)) + if target_pos >= template["arity"]: + raise ValueError("'target[]' index must be in range of arity.") + else: + target_pos = None + + return target_pos + + def _setup_indexes(self, options: dict[str, Any] | None) -> None: + """ + This method reads the template list and generate the index templates. + Additionally, it creates a field index for node names. + + Args: + options (dict | None): Dict containing the key 'pattern_index_templates', a list of + templates. + """ + self._load_pattern_index(options) + if not self.pattern_templates: + raise ValueError("Index not loaded") + + self.pattern_index_templates = [] + for template in self.pattern_templates: + is_named_type = template["field"] == "named_type" + p_size = len(template["positions"]) + arity = template["arity"] + i_size = p_size + 1 + is_wild_card = template["value"] == "*" + target_pos = self._validate_template_index_and_get_position(template) + + values = itertools.product([True, False], repeat=i_size) + + for v in values: + if is_wild_card and all(v) and arity == p_size: + continue + if p_size == 0 and not is_wild_card: + continue + if not is_wild_card and not v[0] and is_named_type: + continue + t = { + FieldNames.TYPE_NAME: v[0] + if is_wild_card or not is_named_type + else ExpressionHasher.named_type_hash(template["value"]), + "target_position": target_pos, + "target_value": None if is_named_type else template["value"], + "selected_positions": [ + template["positions"][i] for i, pos in enumerate(v[1:]) if pos + ], + } + self.pattern_index_templates.append(t) + # NOTE creating index for name search + self.create_field_index("node", fields=["name"]) + + def _retrieve_document(self, handle: str) -> DocumentT | None: + """ + Retrieve a document from the MongoDB collection using the given handle. + + This method searches for a document in the MongoDB collection with the specified + handle. If the document is found and it is a link, it adds the targets to the + document before returning it. + + Args: + handle (str): The unique identifier for the document to be retrieved. + + Returns: + DocumentT | None: The retrieved document if found, otherwise None. + """ + mongo_filter = {FieldNames.ID_HASH: handle} + if document := self.mongo_atoms_collection.find_one(mongo_filter): + if self._is_document_link(document) and FieldNames.TARGETS not in document: + document[FieldNames.TARGETS] = self._get_document_keys(document) + return document + return None + + def _build_named_type_hash_template(self, template: str | list[Any]) -> str | list[Any]: + """ + Build a named type hash template from the given template. + + This method processes the provided template, which can be a string or a nested list of + strings, and converts it into a hash template. If the template is a string, it retrieves + the hash for the named type. If the template is a list, it recursively processes each + element in the list to build the hash template. + + Args: + template (str | list[Any]): The template to be processed into a hash template. It + can be a string representing a named type or a nested list of strings representing + multiple named types. + + Returns: + str | list[Any]: The processed hash template corresponding to the provided template. + + Raises: + ValueError: If the template is not a string or a list of strings. + """ + if isinstance(template, str): + return ExpressionHasher.named_type_hash(template) + if isinstance(template, list): + return ExpressionHasher.composite_hash( + [self._build_named_type_hash_template(element) for element in template] + ) + raise ValueError("Template must be a string or an iterable of anything") + + @staticmethod + def _get_document_keys(document: DocumentT) -> HandleListT: + """ + Retrieve the keys from the given document. + + This method extracts the keys from the provided document. If the keys are not + directly available, it constructs them by iterating through the document with + a specific prefix pattern. + + Args: + document (DocumentT): The document from which to retrieve the keys. + + Returns: + HandleListT: A list of keys extracted from the document. + """ + answer = document.get(FieldNames.TARGETS, document.get(FieldNames.KEYS, None)) + if isinstance(answer, list): + return answer + elif isinstance(answer, dict): + return list(answer.values()) + + answer = [] + index = 0 + while (key := document.get(f"{FieldNames.KEY_PREFIX}_{index}", None)) is not None: + answer.append(key) + index += 1 + return answer + + def _filter_non_toplevel(self, matches: HandleSetT) -> HandleSetT: + """ + Filter out non-toplevel links from the given list of matches. + + This method iterates through the provided list of matches, retrieves the corresponding + link documents, and checks if each link is marked as toplevel. Only the toplevel links + are included in the returned list. + + Args: + matches (HandleSetT): A set of link handles to be filtered. + + Returns: + HandleSetT: A set of handles corresponding to toplevel links. + """ + return { + link_handle + for link_handle in matches + if ( + (link := self._retrieve_document(link_handle)) + and link.get(FieldNames.IS_TOPLEVEL, False) + ) + } + + def get_node_handle(self, node_type: str, node_name: str) -> str: + node_handle = self.node_handle(node_type, node_name) + document = self._retrieve_document(node_handle) + if document is not None: + return document[FieldNames.ID_HASH] + else: + logger().error( + f"Failed to retrieve node handle for {node_type}:{node_name}. " + f"This node may not exist." + ) + raise AtomDoesNotExist("Nonexistent atom", f"{node_type}:{node_name}") + + def get_node_name(self, node_handle: str) -> str: + answer = self._retrieve_name(node_handle) + if not answer: + logger().error( + f"Failed to retrieve node name for handle: {node_handle}. " + "The handle may be invalid or the corresponding node does not exist." + ) + raise ValueError(f"Invalid handle: {node_handle}") + return answer + + def get_node_type(self, node_handle: str) -> str | None: + document = self.get_atom(node_handle) + return document.named_type if isinstance(document, NodeT) else None # type: ignore + + def get_node_by_name(self, node_type: str, substring: str) -> HandleListT: + node_type_hash = ExpressionHasher.named_type_hash(node_type) + mongo_filter = { + FieldNames.COMPOSITE_TYPE_HASH: node_type_hash, + FieldNames.NODE_NAME: {"$regex": substring}, + } + return [ + document[FieldNames.ID_HASH] + for document in self.mongo_atoms_collection.find(mongo_filter) + ] + + def get_atoms_by_field(self, query: list[OrderedDict[str, str]]) -> HandleListT: + mongo_filter = collections.OrderedDict([(q["field"], q["value"]) for q in query]) + return [ + document[FieldNames.ID_HASH] + for document in self.mongo_atoms_collection.find(mongo_filter) + ] + + def get_atoms_by_index( + self, + index_id: str, + query: list[dict[str, Any]], + cursor: int = 0, + chunk_size: int = 500, + ) -> tuple[int, list[AtomT]]: + mongo_filter = collections.OrderedDict([(q["field"], q["value"]) for q in query]) + return self._get_atoms_by_index( + index_id, cursor=cursor, chunk_size=chunk_size, **mongo_filter + ) + + def get_atoms_by_text_field( + self, + text_value: str, + field: Optional[str] = None, + text_index_id: Optional[str] = None, + ) -> HandleListT: + if field is not None: + mongo_filter = { + field: {"$regex": text_value}, + } + else: + mongo_filter = {"$text": {"$search": text_value}} + + if text_index_id is not None: + return [ + document[FieldNames.ID_HASH] + for document in self.mongo_atoms_collection.find(mongo_filter).hint(text_index_id) + ] + + return [ + document[FieldNames.ID_HASH] + for document in self.mongo_atoms_collection.find(mongo_filter) + ] + + def get_node_by_name_starting_with(self, node_type: str, startswith: str): + node_type_hash = ExpressionHasher.named_type_hash(node_type) + mongo_filter = { + FieldNames.COMPOSITE_TYPE_HASH: node_type_hash, + FieldNames.NODE_NAME: {"$regex": f"^{startswith}"}, + } + # NOTE check projection to return only required fields, less data, but is faster? + # ex: self.mongo_atoms_collection.find(mongo_filter, projection={FieldNames.ID_HASH: 1} + return [ + document[FieldNames.ID_HASH] + for document in self.mongo_atoms_collection.find(mongo_filter) + ] + + def get_all_nodes_handles(self, node_type: str) -> list[str]: + return [ + document[FieldNames.ID_HASH] + for document in self.mongo_atoms_collection.find({FieldNames.TYPE_NAME: node_type}) + ] + + def get_all_nodes_names(self, node_type: str) -> list[str]: + return [ + document[FieldNames.NODE_NAME] + for document in self.mongo_atoms_collection.find({FieldNames.TYPE_NAME: node_type}) + ] + + def get_all_links(self, link_type: str, **kwargs) -> HandleSetT: + pymongo_cursor = self.mongo_atoms_collection.find({FieldNames.TYPE_NAME: link_type}) + return {document[FieldNames.ID_HASH] for document in pymongo_cursor} + + def get_link_handle(self, link_type: str, target_handles: HandleListT) -> str: + link_handle = self.link_handle(link_type, target_handles) + document = self._retrieve_document(link_handle) + if document is not None: + return document[FieldNames.ID_HASH] + else: + logger().error( + f"Failed to retrieve link handle for {link_type}:{target_handles}. " + "This link may not exist." + ) + raise AtomDoesNotExist("Nonexistent atom", f"{link_type}:{target_handles}") + + def get_link_targets(self, link_handle: str) -> HandleListT: + answer = self._retrieve_outgoing_set(link_handle) + if not answer: + logger().error( + f"Failed to retrieve link targets for handle: {link_handle}. " + "The handle may be invalid or the corresponding link does not exist." + ) + raise ValueError(f"Invalid handle: {link_handle}") + return answer + + def get_matched_links( + self, link_type: str, target_handles: HandleListT, **kwargs + ) -> HandleSetT: + if link_type != WILDCARD and WILDCARD not in target_handles: + try: + link_handle = self.get_link_handle(link_type, target_handles) + return {link_handle} + except AtomDoesNotExist: + return set() + + link_type_hash = ( + WILDCARD if link_type == WILDCARD else ExpressionHasher.named_type_hash(link_type) + ) + + pattern_hash = ExpressionHasher.composite_hash([link_type_hash, *target_handles]) + patterns_matched = self._retrieve_hash_targets_value(KeyPrefix.PATTERNS, pattern_hash) + if kwargs.get("toplevel_only", False): + return self._filter_non_toplevel(patterns_matched) + else: + return patterns_matched + + def get_incoming_links_handles(self, atom_handle: str, **kwargs) -> HandleListT: + links = self._retrieve_incoming_set(atom_handle, **kwargs) + return list(links) + + def get_incoming_links_atoms(self, atom_handle: str, **kwargs) -> list[AtomT]: + links = self._retrieve_incoming_set(atom_handle, **kwargs) + return [self.get_atom(handle, **kwargs) for handle in links] + + def get_matched_type_template(self, template: list[Any], **kwargs) -> HandleSetT: + try: + template_hash = self._build_named_type_hash_template(template) + templates_matched = self._retrieve_hash_targets_value( + KeyPrefix.TEMPLATES, template_hash + ) + if kwargs.get("toplevel_only", False): + return self._filter_non_toplevel(templates_matched) + else: + return templates_matched + except Exception as exception: + logger().error(f"Failed to get matched type template - Details: {str(exception)}") + raise ValueError(str(exception)) + + def get_matched_type(self, link_type: str, **kwargs) -> HandleSetT: + named_type_hash = ExpressionHasher.named_type_hash(link_type) + templates_matched = self._retrieve_hash_targets_value(KeyPrefix.TEMPLATES, named_type_hash) + if kwargs.get("toplevel_only", False): + return self._filter_non_toplevel(templates_matched) + else: + return templates_matched + + def get_link_type(self, link_handle: str) -> str | None: + document = self.get_atom(link_handle) + return document.named_type if isinstance(document, LinkT) else None # type: ignore + + def _build_atom_from_dict(self, document: DocumentT) -> AtomT: + """ + Builds an Atom object from a dictionary. + + Args: + document (DocumentT): The dictionary representing the atom. + + Returns: + AtomT: The constructed Atom object. + """ + if "targets" in document: + link = LinkT( + handle=document[FieldNames.ID_HASH], + _id=document[FieldNames.ID_HASH], + named_type=document[FieldNames.TYPE_NAME], + targets=document[FieldNames.TARGETS], + composite_type=document[FieldNames.COMPOSITE_TYPE], + is_toplevel=document.get(FieldNames.IS_TOPLEVEL, True), + named_type_hash=document[FieldNames.TYPE_NAME_HASH], + composite_type_hash=document[FieldNames.COMPOSITE_TYPE_HASH], + custom_attributes=document.get(FieldNames.CUSTOM_ATTRIBUTES, dict()), + ) + return link + elif "name" in document: + node = NodeT( + handle=document[FieldNames.ID_HASH], + _id=document[FieldNames.ID_HASH], + named_type=document[FieldNames.TYPE_NAME], + name=document[FieldNames.NODE_NAME], + composite_type_hash=document[FieldNames.COMPOSITE_TYPE_HASH], + custom_attributes=document.get(FieldNames.CUSTOM_ATTRIBUTES, dict()), + ) + return node + else: + raise ValueError("Invalid atom type") + + def _get_atom(self, handle: str) -> AtomT | None: + document = self._retrieve_document(handle) + if not document: + return None + return self._build_atom_from_dict(document) + + def get_atom_type(self, handle: str) -> str | None: + atom = self._retrieve_document(handle) + if atom is None: + return None + return atom[FieldNames.TYPE_NAME] + + def count_atoms(self, parameters: dict[str, Any] | None = None) -> dict[str, int]: + atom_count = self.mongo_atoms_collection.estimated_document_count() + return_count = {"atom_count": atom_count} + if parameters and parameters.get("precise"): + nodes_count = self.mongo_atoms_collection.count_documents( + {FieldNames.COMPOSITE_TYPE: {"$exists": False}} + ) + links_count = self.mongo_atoms_collection.count_documents( + {FieldNames.COMPOSITE_TYPE: {"$exists": True}} + ) + return_count["node_count"] = nodes_count + return_count["link_count"] = links_count + return return_count + + return return_count + + def clear_database(self) -> None: + """ + from the connected MongoDB and Redis databases. + + This method drops all collections in the MongoDB database and flushes + all data from the Redis cache, effectively wiping the databases clean. + """ + mongo_collections = self.mongo_db.list_collection_names() + + for collection in mongo_collections: + self.mongo_db[collection].drop() + + self.redis.flushall() + + def commit(self, **kwargs) -> None: + id_tag = FieldNames.ID_HASH + + if kwargs.get("buffer"): + try: + for document in kwargs["buffer"]: + self.mongo_atoms_collection.replace_one( + {id_tag: document[id_tag]}, document, upsert=True + ) + self._update_atom_indexes([document]) + + except Exception as e: + logger().error(f"Failed to commit buffer - Details: {str(e)}") + raise e + else: + for key, (collection, buffer) in self.mongo_bulk_insertion_buffer.items(): + if buffer: + if key == MongoCollectionNames.ATOM_TYPES: + msg = "Failed to commit Atom Types. This operation is not allowed" + logger().error(msg) + raise InvalidOperationException(msg) + + for hashtable in buffer: + document = hashtable.base + collection.replace_one({id_tag: document[id_tag]}, document, upsert=True) + self._update_atom_indexes([document]) + + buffer.clear() + + def add_node(self, node_params: NodeT) -> NodeT | None: + node: NodeT = self._build_node(node_params) + if sys.getsizeof(node_params.name) < self.max_mongo_db_document_size: + _, buffer = self.mongo_bulk_insertion_buffer[MongoCollectionNames.ATOMS] + buffer.add(_HashableDocument(node.to_dict())) + if len(buffer) >= self.mongo_bulk_insertion_limit: + self.commit() + return node + else: + logger().warning(f"Discarding atom whose name is too large: {node.name}") + return None + + def _build_link(self, link_params: LinkT, toplevel: bool = True) -> LinkT | None: + # This is necessary because `_build_link` in the parent class (implemented in C++) + # calls back to `add_link`. Without this, `nanobind` is not able to find `add_link` + # implementation in the child class, and raises a `RuntimeError` with the message that + # it is trying to call an abstract method (virtual pure). + return super()._build_link(link_params, toplevel) + + def add_link(self, link_params: LinkT, toplevel: bool = True) -> LinkT | None: + link: LinkT | None = self._build_link(link_params, toplevel) + if link is None: + return None + _, buffer = self.mongo_bulk_insertion_buffer[MongoCollectionNames.ATOMS] + buffer.add(_HashableDocument(link.to_dict())) + if len(buffer) >= self.mongo_bulk_insertion_limit: + self.commit() + return link + + def _get_and_delete_links_by_handles(self, handles: HandleListT) -> list[DocumentT]: + documents = [] + for handle in handles: + if document := self.mongo_atoms_collection.find_one_and_delete( + {FieldNames.ID_HASH: handle} + ): + documents.append(document) + return documents + + @staticmethod + def _apply_index_template( + template: dict[str, Any], named_type: str, targets: HandleListT, arity: int + ) -> str | None: + """ + Apply the index template to generate a Redis key. + + This method constructs a Redis key by applying the provided index template. The key + is built using the named type and the targets, with specific positions selected based + on the template. The key is then hashed to create a unique identifier. + + Args: + template (dict[str, Any]): The index template containing type name and selected + positions. + named_type (str): The named type to be included in the key. + targets (HandleListT): The list of target handles to be included in the key. + arity (int): The arity of the link, indicating the number of targets. + + Returns: + str: The generated Redis key after applying the index template. + """ + key = None + if isinstance(template[FieldNames.TYPE_NAME], bool): + key = [WILDCARD] if template[FieldNames.TYPE_NAME] else [named_type] + else: + if named_type == template[FieldNames.TYPE_NAME]: + key = [template[FieldNames.TYPE_NAME]] + + if key is None: + return None + + target_selected_pos = template["selected_positions"] + if template["target_position"] is not None and len(targets) > template["target_position"]: + if targets[template["target_position"]] == template["target_value"]: + for cursor in range(arity): + key.append(WILDCARD if cursor in target_selected_pos else targets[cursor]) + else: + return None + else: + for cursor in range(arity): + key.append(WILDCARD if cursor in target_selected_pos else targets[cursor]) + return _build_redis_key(KeyPrefix.PATTERNS, ExpressionHasher.composite_hash(key)) + + def _retrieve_incoming_set(self, handle: str, **kwargs) -> HandleSetT: + """ + Retrieve the incoming set for the given handle from Redis. + + This method constructs a Redis key using the provided handle and retrieves the members + of the incoming set associated with that key. + + Args: + handle (str): The unique identifier for the atom whose incoming set is to be retrieved. + **kwargs: Additional keyword arguments. + + Returns: + HandleSetT: Set of members for the given key + """ + key = _build_redis_key(KeyPrefix.INCOMING_SET, handle) + return self._get_redis_members(key) + + def _delete_smember_incoming_set(self, handle: str, smember: str) -> None: + """ + Remove a specific member from the incoming set of the given handle in Redis. + + This method constructs a Redis key using the provided handle and removes the specified + member from the incoming set associated with that key. + + Args: + handle (str): The unique identifier for the atom whose incoming set member is to be + removed. + smember (str): The member to be removed from the incoming set. + + Returns: + None + """ + key = _build_redis_key(KeyPrefix.INCOMING_SET, handle) + self.redis.srem(key, smember) + + def _retrieve_and_delete_incoming_set(self, handle: str) -> HandleListT: + """ + Retrieve and delete the incoming set for the given handle from Redis. + + This method constructs a Redis key using the provided handle, retrieves all members + of the incoming set associated with that key, and then deletes the key from Redis. + + Args: + handle (str): The unique identifier for the atom whose incoming set is to be + retrieved and deleted. + + Returns: + HandleListT: A list of members in the incoming set before deletion. + """ + key = _build_redis_key(KeyPrefix.INCOMING_SET, handle) + data: HandleListT = list(self.redis.smembers(key)) # type: ignore + self.redis.delete(key) + return data + + def _retrieve_outgoing_set(self, handle: str, delete: bool = False) -> HandleListT: + """ + Retrieve the outgoing set for the given handle from Redis. + + This method constructs a Redis key using the provided handle and retrieves the members + of the outgoing set associated with that key. If the delete flag is set to True, the + key is deleted from Redis after retrieving the members. + + Args: + handle (str): The unique identifier for the atom whose outgoing set is to be retrieved. + delete (bool): Whether to delete the key from Redis after retrieving the members. + Defaults to False. + + Returns: + HandleListT: A list of members in the outgoing set. + """ + key = _build_redis_key(KeyPrefix.OUTGOING_SET, handle) + value: str + if delete: + value = self.redis.getdel(key) # type: ignore + else: + value = self.redis.get(key) # type: ignore + if value is None: + return [] + arity = len(value) // self.hash_length + return [ + value[(offset * self.hash_length) : ((offset + 1) * self.hash_length)] # noqa: E203 + for offset in range(arity) + ] + + def _retrieve_name(self, handle: str) -> str | None: + """ + Retrieve the name associated with the given handle from Redis. + + This method constructs a Redis key using the provided handle and retrieves the name + associated with that key. If the name is not found, it returns None. + + Args: + handle (str): The unique identifier for the atom whose name is to be retrieved. + + Returns: + str | None: The name associated with the given handle if found, otherwise None. + """ + key = _build_redis_key(KeyPrefix.NAMED_ENTITIES, handle) + name: str = self.redis.get(key) # type: ignore + if name: + return name + else: + return None + + def _retrieve_hash_targets_value(self, key_prefix: str, handle: str | list[Any]) -> HandleSetT: + """ + Retrieve the hash targets value for the given handle from Redis. + + This method constructs a Redis key using the provided key prefix and handle, and retrieves + the members associated with that key. + If the members are not found, it returns an empty list. + + Args: + key_prefix (str): The prefix to be used in the Redis key. + handle (str): The unique identifier for the atom whose hash targets value is to be + retrieved. + + Returns: + HandleSetT: Set of members in the hash targets value. + """ + key = _build_redis_key(key_prefix, handle) + return self._get_redis_members(key) + + def _delete_smember_template(self, handle: str, smember: str) -> None: + """ + Remove a specific member from the template set of the given handle in Redis. + + This method constructs a Redis key using the provided handle and removes the specified + member from the template set associated with that key. + + Args: + handle (str): The unique identifier for the atom whose template member is to be + removed. + smember (str): The member to be removed from the template set. + """ + key = _build_redis_key(KeyPrefix.TEMPLATES, handle) + self.redis.srem(key, smember) + + def _retrieve_custom_index(self, index_id: str) -> dict[str, Any] | None: + """ + Retrieve a custom index from Redis using the given index ID. + + This method constructs a Redis key using the provided index ID and attempts to retrieve + the custom index associated with that key. The custom index is expected to be stored as + a base64-encoded, pickled dictionary. If the custom index is not found or if there is an + error during retrieval, appropriate logging is performed and the method returns None. + + Args: + index_id (str): The unique identifier for the custom index to be retrieved. + + Returns: + dict[str, Any] | None: The retrieved custom index as a dictionary if found, otherwise + None. + + Raises: + ConnectionError: If there is an error connecting to Redis. + Exception: If there is an unexpected error during retrieval. + """ + try: + key = _build_redis_key(KeyPrefix.CUSTOM_INDEXES, index_id) + custom_index_str: str | None = self.redis.get(key) # type: ignore + + if custom_index_str is None: + logger().info(f"Custom index with ID {index_id} not found in Redis") + return None + + custom_index_bytes = base64.b64decode(custom_index_str) + custom_index: dict[str, Any] = pickle.loads(custom_index_bytes) + + if not isinstance(custom_index, dict): + logger().error(f"Custom index with ID {index_id} is not a dictionary") + raise ValueError("Custom index is not a dictionary") + + return custom_index + except ConnectionError as e: + logger().error(f"Error connecting to Redis: {e}") + raise e + except Exception as e: + logger().error(f"Unexpected error retrieving custom index with ID {index_id}: {e}") + raise e + + def _get_redis_members(self, key: str) -> HandleSetT: + """ + Retrieve members from a Redis set. + + Args: + key (str): The key of the set in Redis. + + Returns: + HandleSetT: Set of members retrieved from Redis. + """ + return set(self.redis.smembers(key)) # type: ignore + + def _update_atom_indexes(self, documents: Iterable[DocumentT], **kwargs) -> None: + """ + Update the indexes for the given documents in the database. + + This method iterates through the provided documents and updates the corresponding + indexes. If a document is identified as a link, it updates the link index; otherwise, + it updates the node index. + + Args: + documents (Iterable[DocumentT): An iterable of documents to be indexed. + **kwargs: Additional keyword arguments for index updates. + """ + for document in documents: + if self._is_document_link(document): + self._update_link_index(document, **kwargs) + else: + self._update_node_index(document, **kwargs) + + def _update_node_index(self, document: DocumentT, **kwargs) -> None: + """ + Update the index for the given node document in the database. + + This method updates the Redis index for the provided node document. It constructs a Redis + key using the document's handle and updates the node name in the Redis cache. If the + `delete_atom` flag is set to True, it deletes the Redis key and any associated incoming + links for the node. + + Args: + document (DocumentT): The node document to be indexed. + **kwargs: Additional keyword arguments for index updates. Supports `delete_atom` to + indicate whether the node should be deleted from the index. + """ + handle = document[FieldNames.ID_HASH] + node_name = document[FieldNames.NODE_NAME] + key = _build_redis_key(KeyPrefix.NAMED_ENTITIES, handle) + if kwargs.get("delete_atom", False): + self.redis.delete(key) + if links_handle := self._retrieve_and_delete_incoming_set(handle): + documents = self._get_and_delete_links_by_handles(links_handle) + for _document in documents: + self._update_link_index(_document, delete_atom=True) + else: + self.redis.set(key, node_name) + + def _update_link_index(self, document: DocumentT, **kwargs) -> None: + """ + Update the index for the given link document in the database. + + This method updates the Redis index for the provided link document. It constructs a Redis + key using the document's handle and updates the link targets in the Redis cache. If the + `delete_atom` flag is set to True, it deletes the Redis key and any associated incoming + links for the link. + + Args: + document (DocumentT): The link document to be indexed. + **kwargs: Additional keyword arguments for index updates. Supports `delete_atom` to + indicate whether the link should be deleted from the index. + """ + handle: str = document[FieldNames.ID_HASH] + targets: HandleListT = self._get_document_keys(document) + targets_str: str = "".join(targets) + arity: int = len(targets) + named_type_hash: str = document[FieldNames.TYPE_NAME_HASH] + + index_templates = self.pattern_index_templates or [] + + if kwargs.get("delete_atom", False): + links_handle = self._retrieve_and_delete_incoming_set(handle) + + if links_handle: + docs = self._get_and_delete_links_by_handles(links_handle) + for doc in docs: + self._update_link_index(doc, delete_atom=True) + + outgoing_atoms = self._retrieve_outgoing_set(handle, delete=True) + + for atom_handle in outgoing_atoms: + self._delete_smember_incoming_set(atom_handle, handle) + + for type_hash in [ + FieldNames.COMPOSITE_TYPE_HASH, + FieldNames.TYPE_NAME_HASH, + ]: + self._delete_smember_template(document[type_hash], handle) + + for template in index_templates: + key = self._apply_index_template(template, named_type_hash, targets, arity) + if key: + self.redis.srem(key, handle) + else: + incoming_buffer: dict[str, HandleListT] = {} + key = _build_redis_key(KeyPrefix.OUTGOING_SET, handle) + self.redis.set(key, targets_str) + + for target in targets: + buffer = incoming_buffer.get(target, None) + if buffer is None: + buffer = [] + incoming_buffer[target] = buffer + buffer.append(handle) + + for type_hash in [ + FieldNames.COMPOSITE_TYPE_HASH, + FieldNames.TYPE_NAME_HASH, + ]: + key = _build_redis_key(KeyPrefix.TEMPLATES, document[type_hash]) + self.redis.sadd(key, handle) + + for template in index_templates: + key = self._apply_index_template(template, named_type_hash, targets, arity) + if key: + self.redis.sadd(key, handle) + + for handle in incoming_buffer: + key = _build_redis_key(KeyPrefix.INCOMING_SET, handle) + self.redis.sadd(key, *incoming_buffer[handle]) + + @staticmethod + def _is_document_link(document: DocumentT) -> bool: + """ + Determine if the given document is a link. + + This method checks if the provided document contains the `COMPOSITE_TYPE` field, which + indicates that the document is a link. + + Args: + document (DocumentT): The document to be checked. + + Returns: + bool: True if the document is a link, False otherwise. + """ + return FieldNames.COMPOSITE_TYPE in document + + @staticmethod + def _calculate_composite_type_hash(composite_type: list[Any]) -> str: + """ + Calculate the composite type hash for the given composite type. + + This method computes the hash for the provided composite type by iterating through + the elements of the composite type. If an element is a list, it recursively calculates + the hash for the nested list. The final hash is generated using the ExpressionHasher. + + Args: + composite_type (list[Any]): The composite type for which the hash is to be calculated. + + Returns: + str: The calculated composite type hash. + """ + + def calculate_composite_type_hashes(_composite_type: list[Any]) -> HandleListT: + return [ + ( + ExpressionHasher.composite_hash(calculate_composite_type_hashes(t)) + if isinstance(t, list) + else ExpressionHasher.named_type_hash(t) + ) + for t in _composite_type + ] + + composite_type_hashes_list = calculate_composite_type_hashes(composite_type) + return ExpressionHasher.composite_hash(composite_type_hashes_list) + + def _retrieve_documents_by_index( + self, collection: Collection, index_id: str, **kwargs + ) -> tuple[int, list[DocumentT]]: + """ + Retrieve documents from the specified MongoDB collection using the given index. + + This method retrieves documents from the provided MongoDB collection by utilizing the + specified index. It supports additional keyword arguments for cursor-based pagination + and chunk size. + + Args: + collection (Collection): The MongoDB collection from which documents are to be retrieved. + index_id (str): The identifier of the index to be used for retrieval. + **kwargs: Additional keyword arguments for retrieval. + - cursor (int, optional): The cursor position for pagination. + - chunk_size (int, optional): The number of documents to retrieve per chunk. + + Returns: + tuple[int, list[DocumentT]]: A tuple containing the cursor position and a list of + retrieved documents. + + Raises: + ValueError: If the specified index does not exist in the collection. + """ + if MongoDBIndex(collection).index_exists(index_id): + cursor: int | None = kwargs.pop("cursor", None) + chunk_size = kwargs.pop("chunk_size", 500) + + try: + # Fallback to previous version + conditionals = self._retrieve_custom_index(index_id) + if isinstance(conditionals, dict) and ( + (c := conditionals.get("conditionals")) or c == {} + ): + conditionals = c + if conditionals: + kwargs.update(conditionals) + except Exception as e: + raise e + + # Using the hint() method is an additional measure to ensure its use + pymongo_cursor = collection.find(kwargs).hint(index_id) + + if cursor is not None: + pymongo_cursor.skip(cursor).limit(chunk_size) + + documents = list(pymongo_cursor) + + if not documents: + return 0, [] + + if len(documents) < chunk_size: + return 0, documents + else: + return cursor + chunk_size, documents + + return 0, list(pymongo_cursor) + else: + raise ValueError(f"Index '{index_id}' does not exist in collection '{collection}'") + + def reindex(self, pattern_index_templates: dict[str, list[DocumentT]] | None = None) -> None: + if isinstance(pattern_index_templates, list): + self._save_pattern_index(deepcopy(pattern_index_templates)) + self._setup_indexes({'pattern_index_templates': pattern_index_templates}) + self.redis.flushall() + self._update_atom_indexes(self.mongo_atoms_collection.find({})) + + def delete_atom(self, handle: str, **kwargs) -> None: + self.commit() + + mongo_filter: dict[str, str] = {FieldNames.ID_HASH: handle} + + document: DocumentT | None = self.mongo_atoms_collection.find_one_and_delete(mongo_filter) + + if not document: + logger().error( + f"Failed to delete atom for handle: {handle}. " + f"This atom may not exist. - Details: {kwargs}" + ) + raise AtomDoesNotExist( + message="Nonexistent atom", + details=f"handle: {handle}", + ) + self._update_atom_indexes([document], delete_atom=True) + + def create_field_index( + self, + atom_type: str, + fields: list[str], + named_type: Optional[str] = None, + composite_type: Optional[list[Any]] = None, + index_type: Optional[FieldIndexType] = None, + ) -> str: + if named_type and composite_type: + raise ValueError("Both named_type and composite_type cannot be specified") + + if fields is None or len(fields) == 0: + raise ValueError("Fields can not be empty or None") + + kwargs: dict[str, Any] = {} + + if named_type: + kwargs = {FieldNames.TYPE_NAME: named_type} + elif composite_type: + kwargs = { + FieldNames.COMPOSITE_TYPE_HASH: self._calculate_composite_type_hash(composite_type) + } + + collection = self.mongo_atoms_collection + + index_id = "" + + mongo_index_type = ( + MongoIndexType.TEXT if index_type == FieldIndexType.TOKEN_INVERTED_LIST else None + ) + + exc: Exception | None = None + try: + index_id, index_props = MongoDBIndex(collection).create( + atom_type, fields, index_type=mongo_index_type, **kwargs + ) + serialized_index_props = pickle.dumps(index_props) + serialized_index_props_str = base64.b64encode(serialized_index_props).decode("utf-8") + self.redis.set( + _build_redis_key(KeyPrefix.CUSTOM_INDEXES, index_id), + serialized_index_props_str, + ) + except pymongo_errors.OperationFailure as e: + exc = e + logger().error(f"Error creating index in collection '{collection}': {str(e)}") + except Exception as e: # pylint: disable=broad-except + exc = e + logger().error(f"Error: {str(e)}") + finally: + if not index_id: + return ( # pylint: disable=lost-exception + f"Index creation failed, Details: {str(exc)}" + if exc + else "Index creation failed" + ) + + return index_id + + def _get_atoms_by_index(self, index_id: str, **kwargs) -> tuple[int, list[AtomT]]: + """ + Retrieve atoms from the MongoDB collection using the specified index. + + This method retrieves atoms from the MongoDB collection by utilizing the specified + index. It supports additional keyword arguments for cursor-based pagination and + chunk size. + + Args: + index_id (str): The identifier of the index to be used for retrieval. + **kwargs: Additional keyword arguments for retrieval. + - cursor (int, optional): The cursor position for pagination. + - chunk_size (int, optional): The number of documents to retrieve per chunk. + + Returns: + tuple[int, list[AtomT]]: A tuple containing the cursor position and a list of + retrieved atoms. + + Raises: + Exception: If there is an error retrieving atoms by index. + """ + try: + cursor, documents = self._retrieve_documents_by_index( + self.mongo_atoms_collection, index_id, **kwargs + ) + return cursor, [self.get_atom(document[FieldNames.ID_HASH]) for document in documents] + except Exception as e: + logger().error(f"Error retrieving atoms by index: {str(e)}") + raise e + + def retrieve_all_atoms(self) -> list[AtomT]: + try: + all_atoms: list[AtomT] = [] + document: DocumentT = {} + for document in self.mongo_atoms_collection.find(): + if self._is_document_link(document) and FieldNames.TARGETS not in document: + document[FieldNames.TARGETS] = self._get_document_keys(document) + atom = self._build_atom_from_dict(document) + all_atoms.append(atom) + return all_atoms + except Exception as e: + logger().error(f"Error retrieving all atoms: {type(e)}: {str(e)}, {document=}") + raise e + + def bulk_insert(self, documents: list[AtomT]) -> None: + """ + Insert multiple documents into the MongoDB collection and update indexes. + + This method performs a bulk insert of the provided documents into the MongoDB collection. + It replaces existing documents with the same ID and updates the corresponding indexes. + Additional keyword arguments can be used to customize the insertion behavior. + + Args: + documents (list[AtomT]): A list of atoms to be inserted into the collection. + + Raises: + pymongo.errors.BulkWriteError: If there is an error during the bulk write operation. + Exception: If there is an unexpected error during the insertion process. + """ + try: + _id = FieldNames.ID_HASH + docs: list[DocumentT] = [d.to_dict() for d in documents] + for document in docs: + self.mongo_atoms_collection.replace_one({_id: document[_id]}, document, upsert=True) + self._update_atom_indexes(docs) + except Exception as e: # pylint: disable=broad-except + logger().error(f"Error bulk inserting documents: {str(e)}") diff --git a/src/python/hyperon_das_atomdb/database.py b/src/python/hyperon_das_atomdb/database.py new file mode 100644 index 0000000..4fb7620 --- /dev/null +++ b/src/python/hyperon_das_atomdb/database.py @@ -0,0 +1,46 @@ +from typing import TypeAlias + +from hyperon_das_atomdb_cpp.constants import ( + TYPE_HASH, + TYPEDEF_MARK_HASH, + WILDCARD, + WILDCARD_HASH, + FieldIndexType, + FieldNames, +) +from hyperon_das_atomdb_cpp.database import AtomDB +from hyperon_das_atomdb_cpp.document_types import Atom, Link, Node + +# pylint: disable=invalid-name + +AtomT: TypeAlias = Atom +NodeT: TypeAlias = Node +LinkT: TypeAlias = Link + +HandleT: TypeAlias = str + +HandleListT: TypeAlias = list[HandleT] + +HandleSetT: TypeAlias = set[HandleT] + +IncomingLinksT: TypeAlias = HandleListT | list[AtomT] + +# pylint: enable=invalid-name + + +__all__ = [ + "FieldNames", + "FieldIndexType", + "AtomDB", + "WILDCARD", + "WILDCARD_HASH", + "TYPE_HASH", + "TYPEDEF_MARK_HASH", + "AtomT", + "NodeT", + "LinkT", + "HandleT", + "HandleListT", + "HandleSetT", + "IncomingLinksT", +] diff --git a/src/python/hyperon_das_atomdb/exceptions.py b/src/python/hyperon_das_atomdb/exceptions.py new file mode 100644 index 0000000..a136568 --- /dev/null +++ b/src/python/hyperon_das_atomdb/exceptions.py @@ -0,0 +1,27 @@ +"""Custom exceptions for Atom DB""" + +from hyperon_das_atomdb_cpp.exceptions import ( + AddLinkException, + AddNodeException, + AtomDbBaseException, + AtomDoesNotExist, + InvalidAtomDB, + InvalidOperationException, + RetryException, +) + + +class ConnectionMongoDBException(AtomDbBaseException): + """Exception raised for errors in the connection to MongoDB.""" + + +__all__ = [ + "ConnectionMongoDBException", + "AtomDbBaseException", + "AtomDoesNotExist", + "AddNodeException", + "AddLinkException", + "InvalidOperationException", + "RetryException", + "InvalidAtomDB", +] diff --git a/src/python/hyperon_das_atomdb/index.py b/src/python/hyperon_das_atomdb/index.py new file mode 100644 index 0000000..b88a96c --- /dev/null +++ b/src/python/hyperon_das_atomdb/index.py @@ -0,0 +1,52 @@ +"""This module contains the abstract class for index creation and management.""" + +from abc import ABC, abstractmethod +from typing import Any + +from hyperon_das_atomdb.utils.expression_hasher import ExpressionHasher + + +class Index(ABC): + """Abstract class for index creation and management.""" + + @staticmethod + def generate_index_id(field: str, conditionals: dict[str, Any]) -> str: + """Generates an index ID based on the field name. + + Args: + field (str): The field name. + conditionals (dict[str, Any]): The conditionals. + + Returns: + str: The index ID. + """ + return ExpressionHasher.compute_hash(f"{field}{conditionals}") + + @abstractmethod + def create( + self, + atom_type: str, + fields: list[str], + **kwargs, + ) -> tuple[str, Any]: + """Creates an index on the given field. + + Args: + atom_type (str): Atom's type + fields (list[str]): The fields to create the index on. + + + Returns: + tuple[str, Any]: Returns the index id and the index properties dict + """ + + @abstractmethod + def index_exists(self, index_id: str) -> bool: + """Checks if an index exists + + Args: + index_id (str): The index ID. + + Returns: + bool: True if the index exists, False otherwise. + """ diff --git a/src/python/hyperon_das_atomdb/logger.py b/src/python/hyperon_das_atomdb/logger.py new file mode 100644 index 0000000..5edd098 --- /dev/null +++ b/src/python/hyperon_das_atomdb/logger.py @@ -0,0 +1,96 @@ +""" +This module provides a singleton Logger class for logging messages to a file with a specific +format and level. + +The Logger class ensures that only one instance of the logger is created and provides methods +for logging messages at different levels (debug, info, warning, error). The log messages are +written to a file specified by LOG_FILE_NAME with a logging level defined by LOGGING_LEVEL. +""" + +import logging + +LOG_FILE_NAME = "/tmp/das.log" +LOGGING_LEVEL = logging.INFO + +# pylint: disable=logging-not-lazy + + +class Logger: + """Singleton Logger class for logging messages to a file.""" + + __instance = None + + @staticmethod + def get_instance() -> "Logger": + """Get the singleton instance of the Logger class.""" + if Logger.__instance is None: + return Logger() + return Logger.__instance + + def __init__(self): + """Initializes the Logger instance and sets up the logging configuration. + + Raises: + Exception: If an attempt is made to re-instantiate the Logger. + """ + + if Logger.__instance is not None: + raise RuntimeError("Invalid re-instantiation of Logger") + + logging.basicConfig( + filename=LOG_FILE_NAME, + level=LOGGING_LEVEL, + format="%(asctime)s %(levelname)-8s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + Logger.__instance = self + + @staticmethod + def _prefix() -> str: + """Returns a prefix for log messages. + + Returns: + str: The prefix for log messages. + """ + return "" + + def debug(self, msg: str) -> None: + """Logs a debug message. + + Args: + msg (str): The message to log. + """ + logging.debug(self._prefix() + msg) + + def info(self, msg: str) -> None: + """Logs an info message. + + Args: + msg (str): The message to log. + """ + logging.info(self._prefix() + msg) + + def warning(self, msg: str) -> None: + """Logs a warning message. + + Args: + msg (str): The message to log. + """ + logging.warning(self._prefix() + msg) + + def error(self, msg: str) -> None: + """Logs an error message. + + Args: + msg (str): The message to log. + """ + logging.error(self._prefix() + msg) + + +def logger() -> Logger: + """Get the singleton instance of the Logger class. + + Returns: + Logger: The singleton instance of the Logger class. + """ + return Logger.get_instance() diff --git a/src/python/hyperon_das_atomdb/utils/BUILD b/src/python/hyperon_das_atomdb/utils/BUILD new file mode 100644 index 0000000..5623b0b --- /dev/null +++ b/src/python/hyperon_das_atomdb/utils/BUILD @@ -0,0 +1,23 @@ +load("@pypi//:requirements.bzl", "requirement") +load("@rules_python//python:defs.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "py_files", + srcs = glob(["*.py"]), +) + +py_library( + name = "expression_hasher", + srcs = ["expression_hasher.py"], + deps = [], +) + +py_library( + name = "settings", + srcs = ["settings.py"], + deps = [ + requirement("python-dotenv"), + ], +) diff --git a/src/python/hyperon_das_atomdb/utils/__init__.py b/src/python/hyperon_das_atomdb/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/python/hyperon_das_atomdb/utils/expression_hasher.py b/src/python/hyperon_das_atomdb/utils/expression_hasher.py new file mode 100644 index 0000000..918ed69 --- /dev/null +++ b/src/python/hyperon_das_atomdb/utils/expression_hasher.py @@ -0,0 +1,120 @@ +""" +This module provides utility functions for hashing expressions and generating unique identifiers. + +It includes classes for computing hashes of various types of expressions, such as named types, +terminals, and composite expressions. The module uses the MD5 hashing algorithm to generate +hashes and provides methods for creating composite hashes from lists of elements. +""" + +from hashlib import md5 +from typing import Any + + +class ExpressionHasher: + """Utility class for hashing various types of expressions.""" + + compound_separator = " " + + @staticmethod + def compute_hash( + text: str, + ) -> str: + """ + Compute the MD5 hash of the given text. + + This method takes a string input and returns its MD5 hash as a hexadecimal string. + It is used internally by the `ExpressionHasher` class to generate unique identifiers + for various types of expressions. + + Args: + text (str): The input text to be hashed. + + Returns: + str: The MD5 hash of the input text as a hexadecimal string. + """ + return md5(text.encode("utf-8")).digest().hex() + + @staticmethod + def named_type_hash(name: str) -> str: + """ + Compute the hash for a named type. + + This method generates a hash for the given named type using the MD5 hashing algorithm. + It is used to create unique identifiers for named types in the `ExpressionHasher` class. + + Args: + name (str): The name of the type to be hashed. + + Returns: + str: The MD5 hash of the named type as a hexadecimal string. + """ + return ExpressionHasher.compute_hash(name) + + @staticmethod + def terminal_hash(named_type: str, terminal_name: str) -> str: + """ + Compute the hash for a terminal expression. + + This method generates a hash for the given terminal expression using the MD5 hashing + algorithm. It combines the named type and terminal name to create a unique identifier + for the terminal expression. + + Args: + named_type (str): The name of the type to be hashed. + terminal_name (str): The name of the terminal to be hashed. + + Returns: + str: The MD5 hash of the terminal expression as a hexadecimal string. + """ + return ExpressionHasher.compute_hash( + ExpressionHasher.compound_separator.join([named_type, terminal_name]) + ) + + @staticmethod + def expression_hash(named_type_hash: str, elements: list[str]) -> str: + """ + Compute the hash for a composite expression. + + This method generates a hash for the given composite expression using the MD5 hashing + algorithm. It combines the named type hash and a list of element hashes to create a + unique identifier for the composite expression. + + Args: + named_type_hash (str): The hash of the named type. + elements (list[str]): A list of element hashes to be combined. + + Returns: + str: The MD5 hash of the composite expression as a hexadecimal string. + """ + return ExpressionHasher.composite_hash([named_type_hash, *elements]) + + @staticmethod + def composite_hash(hash_base: str | list[Any]) -> str: + """ + Compute the composite hash for the given base. + + This method generates a composite hash using the MD5 hashing algorithm. It can take + either a single string or a list of strings as the base. If a list is provided, the + elements are joined with a separator before hashing. + + Args: + hash_base (str | list[str]): The base for the composite hash, either a single string + or a list of strings. + + Returns: + str: The MD5 hash of the composite base as a hexadecimal string. + """ + if isinstance(hash_base, str): + return hash_base + elif isinstance(hash_base, list): + if len(hash_base) == 1: + return hash_base[0] + else: + return ExpressionHasher.compute_hash( + ExpressionHasher.compound_separator.join(hash_base) + ) + # TODO unreachable + else: + raise ValueError( + "Invalid base to compute composite hash: " f"{type(hash_base)}: {hash_base}" + ) diff --git a/src/python/hyperon_das_atomdb/utils/settings.py b/src/python/hyperon_das_atomdb/utils/settings.py new file mode 100644 index 0000000..17c7cca --- /dev/null +++ b/src/python/hyperon_das_atomdb/utils/settings.py @@ -0,0 +1,10 @@ +""" +This module loads configuration settings from a .env file using the dotenv library. + +It reads the key-value pairs from the .env file and stores them in a dictionary named `config`. +These settings can be used throughout the application to configure various parameters. +""" + +from dotenv import dotenv_values + +config = dotenv_values(".env") diff --git a/src/scripts/bazel_build.sh b/src/scripts/bazel_build.sh index 2ad9965..ae12d15 100755 --- a/src/scripts/bazel_build.sh +++ b/src/scripts/bazel_build.sh @@ -2,17 +2,22 @@ (( JOBS=$(nproc)/2 )) BAZELISK_CMD=/opt/bazel/bazelisk -BIN_FOLDER=/opt/das-attention-broker/bin -mkdir -p $BIN_FOLDER +REPO_ROOT=/opt/das-attention-broker +WORKSPACE_DIR=${REPO_ROOT}/cpp +BIN_DIR=${REPO_ROOT}/bin +mkdir -p $BIN_DIR + +cd $WORKSPACE_DIR \ +&& $BAZELISK_CMD build --jobs $JOBS --noenable_bzlmod //:link_creation_engine \ +&& mv bazel-bin/link_creation_engine $BIN_DIR \ +&& $BAZELISK_CMD build --jobs $JOBS --noenable_bzlmod //:word_query \ +&& mv bazel-bin/word_query $BIN_DIR \ +&& $BAZELISK_CMD build --jobs $JOBS --noenable_bzlmod //:attention_broker_service \ +&& mv bazel-bin/attention_broker_service $BIN_DIR \ +&& $BAZELISK_CMD build --jobs $JOBS --noenable_bzlmod //:query_broker \ +&& mv bazel-bin/query_broker $BIN_DIR \ +&& $BAZELISK_CMD build --jobs $JOBS --noenable_bzlmod //:query \ +&& mv bazel-bin/query $BIN_DIR \ +&& chown -R ${_USER}:${_GROUP} ${BIN_DIR} -$BAZELISK_CMD build --jobs $JOBS --noenable_bzlmod //cpp:link_creation_engine \ -&& mv bazel-bin/cpp/link_creation_engine $BIN_FOLDER \ -&& $BAZELISK_CMD build --jobs $JOBS --noenable_bzlmod //cpp:word_query \ -&& mv bazel-bin/cpp/word_query $BIN_FOLDER \ -&& $BAZELISK_CMD build --jobs $JOBS --noenable_bzlmod //cpp:attention_broker_service \ -&& mv bazel-bin/cpp/attention_broker_service $BIN_FOLDER \ -&& $BAZELISK_CMD build --jobs $JOBS --noenable_bzlmod //cpp:query_broker \ -&& mv bazel-bin/cpp/query_broker $BIN_FOLDER \ -&& $BAZELISK_CMD build --jobs $JOBS --noenable_bzlmod //cpp:query \ -&& mv bazel-bin/cpp/query $BIN_FOLDER diff --git a/src/scripts/build.sh b/src/scripts/build.sh index 8e222d9..1c518d2 100755 --- a/src/scripts/build.sh +++ b/src/scripts/build.sh @@ -1,12 +1,14 @@ #!/bin/bash -CONTAINER_NAME="das-attention-broker-build" +IMAGE_NAME="das-attention-broker-builder" +CONTAINER_NAME=${IMAGE_NAME}-container -mkdir -p bin docker run --rm \ + -e _USER=$(id -u) \ + -e _GROUP=$(id -g) \ --name=$CONTAINER_NAME \ --volume .:/opt/das-attention-broker \ --workdir /opt/das-attention-broker \ - das-attention-broker-builder \ + ${IMAGE_NAME} \ ./scripts/bazel_build.sh