diff --git a/data_services/.terraform.lock.hcl b/data_services/.terraform.lock.hcl index 983f17b..d86dc06 100644 --- a/data_services/.terraform.lock.hcl +++ b/data_services/.terraform.lock.hcl @@ -2,25 +2,25 @@ # Manual edits may be lost in future updates. provider "registry.terraform.io/hashicorp/aws" { - version = "5.51.1" + version = "5.62.0" constraints = ">= 4.9.0, ~> 5.1, >= 5.32.0" hashes = [ - "h1:ESfxP2tCO6IZldSQnepXmIm+x+VtaQt/bKgGjYE+0BY=", - "zh:03d524b70ab300d90dc4dccad0c28b18d797b8986722b7a93e40a41500450eaa", - "zh:04dbcb7ab52181a784877c409f6c882df34bda686d8c884d511ebd4abf493f0c", - "zh:2b068f7838e0f3677829258df05d8b9d73fe6434a1a809f8710956cc1c01ea03", - "zh:41a4b1e4adbf7c90015ebff17a719fc08133b8a2c4dcefd2fa281552126e59a8", - "zh:48b1adf57f695a72c88c598f99912171ef7067638fd63fb0c6ad3fa397b3f7c3", - "zh:5c2fb26ecb83adac90d06dcf5f97edbc944824c2821816b1653e1a2b9d37b3c4", - "zh:93df05f53702df829d9b9335e559ad8b313808dbd2fad8b2ff14f176732e693d", + "h1:8tevkFG+ea/sNZYiQ2GQ02hknPcWBukxkrpjRCodQC0=", + "zh:1f366cbcda72fb123015439a42ab19f96e10ce4edb404273f4e1b7e06da20b73", + "zh:25f098454a34b483279e0382b24b4f42e51c067222c6e797eda5d3ec33b9beb1", + "zh:4b59d48b527e3cefd73f196853bfc265b3e1e57b55c1c8a2d12ff6e3534b4f07", + "zh:7bb88c1ca95e2b3f0f1fe8636925133b9813fc5b137cc467ba6a233ddf4b360e", + "zh:8a93dece40e816c92647e762839d0370e9cad2aa21dc4ca95baee9385f116459", + "zh:8dfe82c55ab8f633c1e2a39c687e9ca8c892d1c2005bf5166ac396ce868ecd05", "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", - "zh:b5da39898602e44551b56e2803a42d92ea7115e35b1792efbf6649da37ef597b", - "zh:b7ab7f743f864ed8d479a7cb04fd3ce00c376f867ee5b53c4c1acaef6e286c54", - "zh:e7e7b2d8ee486415481a25ac7bdded20bd2897d5dd0790741798f31935b9528d", - "zh:e8008e3f5ef560fd9004d1ed1738f0f53e99b0ce961d967e95fc7c02e5954e4e", - "zh:f1296f648b8608ffa930b52519b00ed01eebedde9fdaf94205b365536e6c3916", - "zh:f8539960fd978a54990740ee984c6f7f743c9c32c7734e2601e92abfe54367e9", - "zh:fd182e6e20bb52982752a5d8c4b16887565f413a9d50d9d394d2c06eea8a195e", + "zh:a754952d69b4860480d5207390e3ab42350c964dbca9a5ac0c6912dd24b4c11d", + "zh:b2a4dbf4abee0e9ec18c5d323b99defdcd3c681f8c4306fb6e02cff7de038f85", + "zh:b57d84be258b571c04271015f03858ab215768b82e47c11ecd86e789d577030a", + "zh:be811b03289407c8d59e6b199bf16e6071165565ffe502148172d0886cf849c4", + "zh:d4144c7366c840eff1ac15ba13d96063f798f0983d24053a832362033624fe6f", + "zh:d88612856d453c4e10c49c76e4ef522b7d068b4f7c3e2e0b03dd74540986eecd", + "zh:e8bd231a5d0786cc4aab8471bb6dabd5a5df1c598afda077a9f27987ada57b67", + "zh:ffb40a66b4d000a8ee4c54227eeb998f887ad867419c3af7d3981587788de074", ] } diff --git a/data_services/opensearch_connector.tf b/data_services/opensearch_connector.tf index 32f5f95..37ad935 100644 --- a/data_services/opensearch_connector.tf +++ b/data_services/opensearch_connector.tf @@ -1,78 +1,73 @@ locals { - connector_spec = { for key, config in var.sagemaker_configurations : key => { - name = "${local.namespace}-${config.name}-embedding" - description = "Opensearch Connector for ${config.name}" - version = 1 - protocol = "aws_sigv4" + connector_spec = { + name = "${local.namespace}-embedding" + description = "Opensearch Connector for ${var.embedding_model_name} via Amazon Bedrock" + version = 1 + protocol = "aws_sigv4" credential = { roleArn = aws_iam_role.opensearch_connector.arn } parameters = { - region = data.aws_region.current.name - service_name = "sagemaker" + region = data.aws_region.current.name + service_name = "bedrock" + model_name = var.embedding_model_name } actions = [ { - action_type = "predict" - method = "POST" + action_type = "predict" + method = "POST" headers = { "content-type" = "application/json" } - url = local.embedding_invocation_url[key] - post_process_function = file("${path.module}/opensearch_connector/post-process.painless") - request_body = "{\"inputs\": $${parameters.input}}" + url = "https://bedrock-runtime.$${parameters.region}.amazonaws.com/model/$${parameters.model_name}/invoke" + post_process_function = file("${path.module}/opensearch_connector/post-process.painless") + request_body = "{\"texts\": $${parameters.input}, \"input_type\": \"search_document\"}" } ] - - client_config = { - max_connections = config.max_concurrency / var.opensearch_cluster_nodes - connection_timeout = 5000 - read_timeout = 60000 - } - }} + } } data "aws_iam_policy_document" "opensearch_connector_assume_role" { statement { - effect = "Allow" - actions = ["sts:AssumeRole"] + effect = "Allow" + actions = ["sts:AssumeRole"] principals { - type = "Service" - identifiers = ["opensearchservice.amazonaws.com"] + type = "Service" + identifiers = ["opensearchservice.amazonaws.com"] } } } data "aws_iam_policy_document" "opensearch_connector_role" { statement { - effect = "Allow" - actions = [ - "sagemaker:InvokeEndpoint", - "sagemaker:InvokeEndpointAsync" + effect = "Allow" + actions = [ + "bedrock:InvokeModel", + "bedrock:InvokeModelWithResultStream", ] - resources = [ for endpoint in aws_sagemaker_endpoint.serverless_inference : endpoint.arn ] + resources = ["*"] } } resource "aws_iam_policy" "opensearch_connector" { - name = "${local.namespace}-opensearch-connector" - policy = data.aws_iam_policy_document.opensearch_connector_role.json + name = "${local.namespace}-opensearch-connector" + policy = data.aws_iam_policy_document.opensearch_connector_role.json } resource "aws_iam_role" "opensearch_connector" { - name = "${local.namespace}-opensearch-connector" - assume_role_policy = data.aws_iam_policy_document.opensearch_connector_assume_role.json + name = "${local.namespace}-opensearch-connector" + assume_role_policy = data.aws_iam_policy_document.opensearch_connector_assume_role.json } resource "aws_iam_role_policy_attachment" "opensearch_connector" { - role = aws_iam_role.opensearch_connector.id - policy_arn = aws_iam_policy.opensearch_connector.arn + role = aws_iam_role.opensearch_connector.id + policy_arn = aws_iam_policy.opensearch_connector.arn } data "aws_iam_policy_document" "deploy_model_lambda" { @@ -93,14 +88,14 @@ module "deploy_model_lambda" { source = "terraform-aws-modules/lambda/aws" version = "~> 7.2.1" - function_name = "${local.namespace}-deploy-opensearch-ml-model" - description = "Utility lambda to deploy a SageMaker model within Opensearch" - handler = "index.handler" - runtime = "nodejs18.x" - source_path = "${path.module}/deploy_model_lambda" - timeout = 30 - attach_policy_json = true - policy_json = data.aws_iam_policy_document.deploy_model_lambda.json + function_name = "${local.namespace}-deploy-opensearch-ml-model" + description = "Utility lambda to deploy an embedding model within Opensearch" + handler = "index.handler" + runtime = "nodejs18.x" + source_path = "${path.module}/deploy_model_lambda" + timeout = 30 + attach_policy_json = true + policy_json = data.aws_iam_policy_document.deploy_model_lambda.json environment_variables = { OPENSEARCH_ENDPOINT = aws_opensearch_domain.elasticsearch.endpoint @@ -108,14 +103,13 @@ module "deploy_model_lambda" { } resource "aws_lambda_invocation" "deploy_model" { - for_each = local.connector_spec function_name = module.deploy_model_lambda.lambda_function_name lifecycle_scope = "CRUD" input = jsonencode({ - namespace = local.namespace - connector_spec = local.connector_spec[each.key] - model_name = "${each.value.name}/huggingface/${var.model_repository}" - model_version = "1.0.0" + namespace = local.namespace + connector_spec = local.connector_spec + model_name = var.embedding_model_name + model_version = "1.0.0" }) } diff --git a/data_services/opensearch_connector/post-process.painless b/data_services/opensearch_connector/post-process.painless index c7c438d..27553d3 100644 --- a/data_services/opensearch_connector/post-process.painless +++ b/data_services/opensearch_connector/post-process.painless @@ -1,8 +1,13 @@ def name = 'sentence_embedding'; def dataType = 'FLOAT32'; -if (params.embedding == null || params.embedding.length == 0) { - return params.message; +if (params.embeddings == null || params.embeddings.length == 0) { + return params.message; } -def shape = [params.embedding.length]; -def json = '{"name":"' + name + '","data_type":"' + dataType + '","shape":' + shape + ',"data":' + params.embedding + '}'; -return json; + +def embedding = params.embeddings[0]; +if (embedding == null || embedding.length == 0) { + return params.message; +} +def shape = [embedding.length]; +def json = '{"name":"' + name + '","data_type":"' + dataType + '","shape":' + shape + ',"data":' + embedding + '}'; +return json; \ No newline at end of file diff --git a/data_services/outputs.tf b/data_services/outputs.tf index 036a21b..8172dd0 100644 --- a/data_services/outputs.tf +++ b/data_services/outputs.tf @@ -1,6 +1,6 @@ locals { - deploy_model_result = { for key in keys(var.sagemaker_configurations) : key => jsondecode(aws_lambda_invocation.deploy_model[key].result) } - deploy_model_body = { for key in keys(var.sagemaker_configurations) : key => jsondecode(local.deploy_model_result[key].body) } + deploy_model_result = jsondecode(aws_lambda_invocation.deploy_model.result) + deploy_model_body = jsondecode(local.deploy_model_result.body) } output "elasticsearch" { @@ -14,22 +14,22 @@ output "elasticsearch" { } output "inference" { - value = { for key, value in local.deploy_model_body : key => { - endpoint_name = aws_sagemaker_endpoint.serverless_inference[key].name - invocation_url = local.embedding_invocation_url[key] - opensearch_model_id = lookup(value, "model_id", "DEPLOY ERROR") - }} + value = { + endpoint_name = var.embedding_model_name + invocation_url = "https://bedrock-runtime.${data.aws_region.current.name}.amazonaws.com/model/${var.embedding_model_name}/invoke" + opensearch_model_id = lookup(local.deploy_model_body, "model_id", "DEPLOY ERROR") + } } output "search_snapshot_configuration" { value = { - create_url = "https://${aws_opensearch_domain.elasticsearch.endpoint}/_snapshot/" - create_doc = jsonencode({ - type = "s3" + create_url = "https://${aws_opensearch_domain.elasticsearch.endpoint}/_snapshot/" + create_doc = jsonencode({ + type = "s3" settings = { - bucket = aws_s3_bucket.elasticsearch_snapshot_bucket.id - region = data.aws_region.current.name - role_arn = aws_iam_role.elasticsearch_snapshot_bucket_access.arn + bucket = aws_s3_bucket.elasticsearch_snapshot_bucket.id + region = data.aws_region.current.name + role_arn = aws_iam_role.elasticsearch_snapshot_bucket_access.arn } }) } diff --git a/data_services/sagemaker.tf b/data_services/sagemaker.tf deleted file mode 100644 index 1d099d5..0000000 --- a/data_services/sagemaker.tf +++ /dev/null @@ -1,146 +0,0 @@ -locals { - model_container_spec = { - framework = "huggingface" - base_framework = "pytorch" - image_scope = "inference" - framework_version = "2.1.0" - image_version = "4.37.0" - python_version = "py310" - processor = "cpu" - image_os = "ubuntu22.04" - } - - model_id = element(split("/", var.model_repository), length(split("/", var.model_repository))-1) - model_repository = join("-", [local.model_container_spec.framework, local.model_container_spec.base_framework, local.model_container_spec.image_scope]) - model_image_tag = "${local.model_container_spec.framework_version}-transformers${local.model_container_spec.image_version}-${local.model_container_spec.processor}-${local.model_container_spec.python_version}-${local.model_container_spec.image_os}" - - - embedding_invocation_url = { for key, value in var.sagemaker_configurations : key => "https://runtime.sagemaker.${data.aws_region.current.name}.amazonaws.com/endpoints/${aws_sagemaker_endpoint.serverless_inference[key].name}/invocations" } -} - -resource "aws_s3_bucket" "sagemaker_model_bucket" { - bucket = "${local.namespace}-model-artifacts" -} - -resource "terraform_data" "inference_model_artifact" { - triggers_replace = [ - var.model_repository - ] - - input = "${path.module}/model/.working/${local.model_id}.tar.gz" - - provisioner "local-exec" { - command = "./build_model.sh" - working_dir = "${path.module}/model" - - environment = { - model_id = local.model_id - repository = var.model_repository - requirements = join("\n", var.model_requirements) - } - } -} - -resource "aws_s3_object" "inference_model_artifact" { - bucket = aws_s3_bucket.sagemaker_model_bucket.bucket - key = "custom_inference/${local.model_id}/${local.model_id}.tar.gz" - source = terraform_data.inference_model_artifact.output - content_type = "application/gzip" -} - -data "aws_sagemaker_prebuilt_ecr_image" "inference_container" { - repository_name = local.model_repository - image_tag = local.model_image_tag -} - -data "aws_iam_policy_document" "embedding_model_execution_assume_role" { - statement { - effect = "Allow" - actions = ["sts:AssumeRole"] - - principals { - type = "Service" - identifiers = ["sagemaker.amazonaws.com"] - } - } -} - -data "aws_iam_policy_document" "embedding_model_execution_role" { - statement { - effect = "Allow" - actions = [ - "cloudwatch:PutMetricData", - "logs:CreateLogStream", - "logs:PutLogEvents", - "logs:CreateLogGroup", - "logs:DescribeLogStreams", - "ecr:GetAuthorizationToken" - ] - resources = ["*"] - } - - statement { - effect = "Allow" - actions = ["s3:GetObject"] - resources = ["arn:aws:s3:::${aws_s3_bucket.sagemaker_model_bucket.bucket}/${aws_s3_object.inference_model_artifact.key}"] - } - - statement { - effect = "Allow" - actions = [ - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage" - ] - resources = ["*"] - } -} - -resource "aws_iam_policy" "embedding_model_execution_role" { - name = "${local.namespace}-sagemaker-model-execution-role" - policy = data.aws_iam_policy_document.embedding_model_execution_role.json -} - -resource "aws_iam_role" "embedding_model_execution_role" { - name = "${local.namespace}-sagemaker-model-execution-role" - assume_role_policy = data.aws_iam_policy_document.embedding_model_execution_assume_role.json -} - -resource "aws_iam_role_policy_attachment" "embedding_model_execution_role" { - role = aws_iam_role.embedding_model_execution_role.id - policy_arn = aws_iam_policy.embedding_model_execution_role.arn -} - -resource "aws_sagemaker_model" "embedding_model" { - name = "${local.namespace}-embedding-model" - execution_role_arn = aws_iam_role.embedding_model_execution_role.arn - - primary_container { - image = data.aws_sagemaker_prebuilt_ecr_image.inference_container.registry_path - mode = "SingleModel" - model_data_url = "s3://${aws_s3_object.inference_model_artifact.bucket}/${aws_s3_object.inference_model_artifact.key}" - } -} - -resource "aws_sagemaker_endpoint_configuration" "serverless_inference" { - for_each = var.sagemaker_configurations - - name = "${local.namespace}-embedding-model-${each.value.name}" - - production_variants { - model_name = aws_sagemaker_model.embedding_model.name - variant_name = "AllTraffic" - - serverless_config { - memory_size_in_mb = each.value.memory - max_concurrency = each.value.max_concurrency - provisioned_concurrency = each.value.provisioned_concurrency > 0 ? each.value.provisioned_concurrency : null - } - } -} - -resource "aws_sagemaker_endpoint" "serverless_inference" { - for_each = var.sagemaker_configurations - name = "${local.namespace}-embedding-${each.value.name}" - endpoint_config_name = aws_sagemaker_endpoint_configuration.serverless_inference[each.key].name -} diff --git a/data_services/secrets.tf b/data_services/secrets.tf index b77af4d..496271e 100644 --- a/data_services/secrets.tf +++ b/data_services/secrets.tf @@ -1,22 +1,22 @@ locals { secrets = { db = { - host = aws_db_instance.db.address - port = aws_db_instance.db.port - username = "dbadmin" - password = random_string.db_master_password.result + host = aws_db_instance.db.address + port = aws_db_instance.db.port + username = "dbadmin" + password = random_string.db_master_password.result } index = { - endpoint = aws_opensearch_domain.elasticsearch.endpoint - models = { for key, value in local.deploy_model_body : key => lookup(value, "model_id", "DEPLOY ERROR") } + endpoint = aws_opensearch_domain.elasticsearch.endpoint + models = lookup(local.deploy_model_body, "model_id", "DEPLOY ERROR") } inference = { - endpoints = { for key, value in local.deploy_model_body : key => { - name = aws_sagemaker_endpoint.serverless_inference[key].name - endpoint = local.embedding_invocation_url[key] - }} + endpoints = { + name = var.embedding_model_name + endpoint = "https://bedrock-runtime.${data.aws_region.current.name}.amazonaws.com/model/${var.embedding_model_name}/invoke" + } } ldap = var.ldap_config diff --git a/data_services/variables.tf b/data_services/variables.tf index 5156772..c2cd3bf 100644 --- a/data_services/variables.tf +++ b/data_services/variables.tf @@ -9,7 +9,7 @@ variable "instance_class" { } variable "ldap_config" { - type = map(string) + type = map(string) } variable "opensearch_cluster_nodes" { @@ -22,20 +22,16 @@ variable "opensearch_volume_size" { default = 10 } -variable "model_repository" { +variable "embedding_model_name" { type = string } +<<<<<<< HEAD variable "model_requirements" { type = list(string) default = [] +======= +variable "embedding_dimensions" { + type = number +>>>>>>> 9b6d2ca (Replace SageMaker with Bedrock connector for generating embeddings) } - -variable "sagemaker_configurations" { - type = map(object({ - name = string - memory = number - provisioned_concurrency = number - max_concurrency = number - })) -} \ No newline at end of file