template.yaml

Transform: AWS::Serverless-2016-10-31
Metadata:
  "AWS::CloudFormation::Interface":
    ParameterGroups:
      - Label:
          default: "General S3 Parameters"
        Parameters:
          - BucketName
      - Label:
          default: "SageMaker Model ARN"
        Parameters:
          - TechniqueModelARN
          - ClusterModelARN
          - FlowModelARN

      - Label:
          default: UI admin credentials
        Parameters:
          - SuperuserEmail
          - SuperuserUsername
          - SuperuserPassword
      - Label:
          default: Container images
        Parameters:
          - NginxContainerImage
          - WebContainerImage
          - VRLLambdaImage
      - Label:
          default: "Load Balancer Configuration"
        Parameters:
          - LoadBalancerName

      - Label:
          default: "SageMaker Model Parameters"
        Parameters:
          - TechniqueModelName
          - ClusterModelName
          - FlowModelName
      - Label:
          default: "Input processing Parameters"
        Parameters:
          - TechniqueLookupObject
          - ChunkSize
          - FlowInputMaxClusters
      - Label:
          default: "Technique detector Batch Job Parameters"
        Parameters:
          - EnrichTechBatchInstanceType
          - EnrichTechBatchTimeout
      - Label:
          default: "Temporal clustering Batch Job Parameters"
        Parameters:
          - ClusterBatchInstanceType
          - ClusterBatchTimeout
      - Label:
          default: "Flow detector Batch Job Parameters"
        Parameters:
          - FlowBatchInstanceType
          - FlowBatchTimeout
      - Label:
          default: "UI lambda function Parameters"
        Parameters:
          - ClusterOrFlowMapToCampaign
          - EventThreshold
          - TacticThreshold
      - Label:
          default: "Lambda Function Parameters"
        Parameters:
          - SplunkInputFunctionName
          - VRLLambdaFunctionName
          - EnrichTechFunctionName
          - ProcessTechFunctionName
          - CreateClusterFunctionName
          - ProcessClusterFunctionName
          - CreateFlowFunctionName
          - ProcessFlowFunctionName
          - UpdateLookupFunctionName
          - CreateCampaignFunctionName
          - SaveFeedbackFunctionName

      - Label:
          default: "ECS Configuration"
        Parameters:
          - ECSClusterName
          - ServiceAndTaskDefinitionName
          - Cpu
          - Memory
          - LatestECSOptimizedAMI
      - Label:
          default: "Auto Scaling Configuration"
        Parameters:
          - ClusterAutoScalingMinSize
          - ClusterAutoScalingMaxSize
      - Label:
          default: "VPC Configuration"
        Parameters:
          - VPCNamePrefix
          - VpcCidr
          - Subnet1Cidr
          - Subnet2Cidr

Parameters:
  BucketName:
    Type: String
    Description: Name of the S3 bucket to create.
    MinLength: 1
  
  TechniqueModelName:
    Type: String
    Description: Name of Sagemaker model for technique classification
    Default: technique-model
    MinLength: 1
  TechniqueModelARN:
    Type: String
    Description: Model package ARN for the technique model subscribed from marketplace
    MinLength: 1
    AllowedPattern: ^arn:aws(-[a-z]+)*:[a-zA-Z0-9\-]+:[a-z0-9\-]*:[0-9]{12}:(.+)$
    ConstraintDescription: Enter a valid AWS ARN for technique model
  
  ClusterModelName:
    Type: String
    Description: Name of Sagemaker model for cluster detection
    Default: cluster-model
    MinLength: 1
  ClusterModelARN:
    Type: String
    Description: Model package ARN for the cluster model subscribed from marketplace
    MinLength: 1
    AllowedPattern: ^arn:aws(-[a-z]+)*:[a-zA-Z0-9\-]+:[a-z0-9\-]*:[0-9]{12}:(.+)$
    ConstraintDescription: Enter a valid AWS ARN for technique model
  
  FlowModelName:
    Type: String
    Description: Name of Sagemaker model for flow detection
    Default: flow-model
    MinLength: 1
  FlowModelARN:
    Type: String
    Description: Model package ARN for the flow model subscribed from marketplace
    MinLength: 1
    AllowedPattern: ^arn:aws(-[a-z]+)*:[a-zA-Z0-9\-]+:[a-z0-9\-]*:[0-9]{12}:(.+)$
    ConstraintDescription: Enter a valid AWS ARN for technique model

  VRLLambdaImage:
    Description: VRL Lambda ECR container image URI
    Type: String
    MinLength: 8
    ConstraintDescription: Must be minimum length of 8

  TechniqueLookupObject:
    Type: String
    Description: Path to S3 object where you want to save lookup table for previously classified techniques for alerts. The object file must have an extension of '.csv'
    Default: "scratch/lambda/data.csv"
    MinLength: 5
  
  ChunkSize:
    Type: Number
    Description: Size of single chunk of input to be processed at a time for an input file
    Default: 20000

  EnrichTechBatchInstanceType:
    Type: String
    Default: ml.p2.xlarge
    Description: EC2 instance type for the enrich technology batch transform job.
    AllowedValues:
      - ml.p2.xlarge
      - ml.p2.8xlarge
      - ml.p2.16xlarge
      - ml.p3.2xlarge
      - ml.p3.8xlarge
      - ml.p3.16xlarge

  EnrichTechBatchTimeout:
    Type: Number
    Default: 3600
    Description: Timeout in seconds for the enrich technology batch transform job. Minimum allowed value '1'. Maximum allowed value '3600'
    MinValue: 1
    MaxValue: 3600

  ClusterBatchInstanceType:
    Type: String
    Default: ml.p3.2xlarge
    Description: EC2 instance type for the cluster batch transform job.
    AllowedValues:
      - ml.p3.2xlarge
      - ml.p3.8xlarge
      - ml.p3.16xlarge

  ClusterBatchTimeout:
    Type: Number
    Default: 3600
    Description: Timeout in seconds for the cluster batch transform job. Minimum allowed value '1'. Maximum allowed value '3600'
    MinValue: 1
    MaxValue: 3600

  FlowBatchInstanceType:
    Type: String
    Default: ml.c5.4xlarge
    Description: EC2 instance type for the flow batch transform job.
    AllowedValues:
      - ml.m5.xlarge
      - ml.m5.2xlarge
      - ml.m5.4xlarge
      - ml.m5.12xlarge
      - ml.m5.24xlarge
      - ml.m4.xlarge
      - ml.m4.2xlarge
      - ml.m4.4xlarge
      - ml.m4.10xlarge
      - ml.c5.xlarge
      - ml.c5.2xlarge
      - ml.c5.4xlarge
      - ml.c5.9xlarge
      - ml.c5.18xlarge
      - ml.c4.xlarge
      - ml.c4.2xlarge
      - ml.c4.4xlarge
      - ml.c4.8xlarge
      - ml.m4.16xlarge

  FlowBatchTimeout:
    Type: Number
    Default: 3600
    Description: Timeout in seconds for the flow batch transform job. Minimum allowed value '1'. Maximum allowed value '3600'
    MinValue: 1
    MaxValue: 3600

  FlowInputMaxClusters:
    Type: Number
    Description: Number of clusters as input to Flow detection model to be processed at a time for an input file
    Default: 5000

  SplunkInputFunctionName:
    Type: String
    Default: splunk_input
    Description: Function name for process splunk input.
    MinLength: 5

  VRLLambdaFunctionName:
    Type: String
    Default: vrl_lambda
    Description: Function name for VRL transform.
    MinLength: 5

  EnrichTechFunctionName:
    Type: String
    Default: enrich_with_technique
    Description: Function name for enriching with technique.
    MinLength: 5

  ProcessTechFunctionName:
    Type: String
    Default: process_enriched_with_technique
    Description: Function name for processing enriched data with technique.
    MinLength: 5

  CreateClusterFunctionName:
    Type: String
    Default: create_cluster
    Description: Function name for creating cluster.
    MinLength: 5

  ProcessClusterFunctionName:
    Type: String
    Default: process_cluster
    Description: Function name for processing cluster.
    MinLength: 5

  CreateFlowFunctionName:
    Type: String
    Default: create_flow
    Description: Function name for creating flow.
    MinLength: 5

  ProcessFlowFunctionName:
    Type: String
    Default: process_flow
    Description: Function name for processing flow.
    MinLength: 5

  UpdateLookupFunctionName:
    Type: String
    Default: update_lookup_table
    Description: Function name for updating the lookup table.
    MinLength: 5
  
  CreateCampaignFunctionName:
    Type: String
    Default: create_campaign
    Description: Function name for creating campaigns on UI.
    MinLength: 5
  
  SaveFeedbackFunctionName:
    Type: String
    Default: save_feedback
    Description: Function name for fetching copy/cut action feedback from UI.
    MinLength: 5

  ClusterOrFlowMapToCampaign:
    Type: String
    Default: cluster
    AllowedValues:
      - cluster
      - flow

  EventThreshold:
    Type: Number
    Default: 2
    Description: Number of events present in the cluster or flow to create campaign on UI. Minimum allowed value '1'.
    MinValue: 1

  TacticThreshold:
    Type: Number
    Default: 0
    Description: Number of tactics present in an event to be part of the campaign. Minimum allowed value '0'.
    MinValue: 0

  ECSClusterName:
    Type: String
    Description: Specifies the ECS Cluster Name with which the resources would be associated
    Default: cypienta-cluster
    MinLength: 3
    ConstraintDescription: Must be minimum length of 3

  ECSClusterInstanceType:
    Type: String
    Description: ECS Cluster instance type for EC2
    Default: t3a.large
    AllowedValues: 
      - t2.micro
      - t2.small
      - t2.medium
      - t2.large
      - t3.micro
      - t3.small
      - t3.medium
      - t3.large
      - t3a.micro
      - t3a.small
      - t3a.medium
      - t3a.large
      - m5.large
      - m5.xlarge
      - m5.2xlarge
      - m5.4xlarge
      - m5a.large
      - m5a.xlarge
      - m5a.2xlarge
      - m5a.4xlarge
      - m6g.medium
      - m6g.large
      - m6g.xlarge
      - m6g.2xlarge

  ServiceAndTaskDefinitionName: 
    Description: Name of the ECS task definition and ECS Service
    Type: String
    Default: cypienta-ui
    MinLength: 3
    ConstraintDescription: Must be minimum length of 3
    
  Cpu:
    Description: Number of CPU units used by the task. 1 vCPU = 1024
    Type: String
    Default: 1024
    AllowedValues:
      - 256
      - 512
      - 1024
      - 2048
      - 4096
      - 8192
      - 16384

  Memory:
    Description: Amount of memory (in MiB) used by the task. 1 GB = 1024
    Type: String
    Default: 4096
    AllowedPattern: ^[0-9]+$
    ConstraintDescription: Must be an integer value

  SuperuserEmail: 
    Description: Email of superuser
    Type: String
    AllowedPattern: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}$"
    ConstraintDescription: Must be a valid email address

  SuperuserUsername: 
    Description: Superuser username. Minimum length of 3.
    Type: String
    MinLength: 3
    ConstraintDescription: Must be minimum length of 3

  SuperuserPassword: 
    Description: Superuser password. Minimum length of 8
    Type: String
    MinLength: 8
    ConstraintDescription: Must be minimum length of 8
  
  NginxContainerImage: 
    Description: Nginx container image with tag nginx-market*
    Type: String
    MinLength: 8
    ConstraintDescription: Must be minimum length of 8
  
  WebContainerImage: 
    Description: Container image for web app with tag market*
    Type: String
    MinLength: 8
    ConstraintDescription: Must be minimum length of 8

  VPCNamePrefix:
    Description: Prefix for naming VPC and corresponding resources
    Type: String
    Default: cypienta
    MinLength: 3
    ConstraintDescription: Must be minimum length of 3

  VpcCidr:
    Description: The CIDR block for the VPC
    Type: String
    Default: 10.0.0.0/16
    AllowedPattern: ^(\d{1,3}\.){3}\d{1,3}\/\d{1,2}$
    ConstraintDescription: Must be a valid CIDR block of the form x.x.x.x/x

  Subnet1Cidr:
    Description: The CIDR block for the first public subnet
    Type: String
    Default: 10.0.0.0/20
    AllowedPattern: ^(\d{1,3}\.){3}\d{1,3}\/\d{1,2}$
    ConstraintDescription: Must be a valid CIDR block of the form x.x.x.x/x

  Subnet2Cidr:
    Description: The CIDR block for the second public subnet
    Type: String
    Default: 10.0.16.0/20
    AllowedPattern: ^(\d{1,3}\.){3}\d{1,3}\/\d{1,2}$
    ConstraintDescription: Must be a valid CIDR block of the form x.x.x.x/x

  LoadBalancerName:
    Description: Name of Load balancer
    Type: String
    Default: cypienta-ui
    MinLength: 3
    ConstraintDescription: Must be minimum length of 3

  ClusterAutoScalingMinSize:
    Description: The minimum size of the auto scaling group for the ECS cluster
    Type: String
    Default: 0
    AllowedPattern: ^[0-9]+$
    ConstraintDescription: Must be an integer value
  
  ClusterAutoScalingMaxSize:
    Description: The maximum size of the auto scaling group for the ECS cluster
    Type: String
    Default: 5
    AllowedPattern: ^[0-9]+$
    ConstraintDescription: Must be an integer value

  LatestECSOptimizedAMI:
    Description: AMI ID
    Type: AWS::SSM::Parameter::Value<AWS::EC2::Image::Id>
    Default: /aws/service/ecs/optimized-ami/amazon-linux-2/recommended/image_id

Mappings:
  RegionMap:
    af-south-1:
      lambdaLayer: 'arn:aws:lambda:af-south-1:336392948345:layer:AWSSDKPandas-Python311:12'
    ap-northeast-1:
      lambdaLayer: >-
        arn:aws:lambda:ap-northeast-1:336392948345:layer:AWSSDKPandas-Python311:12
    ap-northeast-2:
      lambdaLayer: >-
        arn:aws:lambda:ap-northeast-2:336392948345:layer:AWSSDKPandas-Python311:12
    ap-northeast-3:
      lambdaLayer: >-
        arn:aws:lambda:ap-northeast-3:336392948345:layer:AWSSDKPandas-Python311:12
    ap-south-1:
      lambdaLayer: 'arn:aws:lambda:ap-south-1:336392948345:layer:AWSSDKPandas-Python311:12'
    ap-southeast-1:
      lambdaLayer: >-
        arn:aws:lambda:ap-southeast-1:336392948345:layer:AWSSDKPandas-Python311:12
    ap-southeast-2:
      lambdaLayer: >-
        arn:aws:lambda:ap-southeast-2:336392948345:layer:AWSSDKPandas-Python311:12
    ca-central-1:
      lambdaLayer: 'arn:aws:lambda:ca-central-1:336392948345:layer:AWSSDKPandas-Python311:12'
    eu-central-1:
      lambdaLayer: 'arn:aws:lambda:eu-central-1:336392948345:layer:AWSSDKPandas-Python311:12'
    eu-north-1:
      lambdaLayer: 'arn:aws:lambda:eu-north-1:336392948345:layer:AWSSDKPandas-Python311:12'
    eu-west-1:
      lambdaLayer: 'arn:aws:lambda:eu-west-1:336392948345:layer:AWSSDKPandas-Python311:12'
    eu-west-2:
      lambdaLayer: 'arn:aws:lambda:eu-west-2:336392948345:layer:AWSSDKPandas-Python311:12'
    eu-west-3:
      lambdaLayer: 'arn:aws:lambda:eu-west-3:336392948345:layer:AWSSDKPandas-Python311:12'
    sa-east-1:
      lambdaLayer: 'arn:aws:lambda:sa-east-1:336392948345:layer:AWSSDKPandas-Python311:12'
    us-east-1:
      lambdaLayer: 'arn:aws:lambda:us-east-1:336392948345:layer:AWSSDKPandas-Python311:12'
    us-east-2:
      lambdaLayer: 'arn:aws:lambda:us-east-2:336392948345:layer:AWSSDKPandas-Python311:12'
    us-west-1:
      lambdaLayer: 'arn:aws:lambda:us-west-1:336392948345:layer:AWSSDKPandas-Python311:12'
    us-west-2:
      lambdaLayer: 'arn:aws:lambda:us-west-2:336392948345:layer:AWSSDKPandas-Python311:12'
    ap-east-1:
      lambdaLayer: 'arn:aws:lambda:ap-east-1:839552336658:layer:AWSSDKPandas-Python311:14'
    ap-south-2:
      lambdaLayer: 'arn:aws:lambda:ap-south-2:246107603503:layer:AWSSDKPandas-Python311:13'
    ap-southeast-3:
      lambdaLayer: >-
        arn:aws:lambda:ap-southeast-3:258944054355:layer:AWSSDKPandas-Python311:14
    ap-southeast-4:
      lambdaLayer: >-
        arn:aws:lambda:ap-southeast-4:945386623051:layer:AWSSDKPandas-Python311:13
    eu-central-2:
      lambdaLayer: 'arn:aws:lambda:eu-central-2:956415814219:layer:AWSSDKPandas-Python311:13'
    eu-south-1:
      lambdaLayer: 'arn:aws:lambda:eu-south-1:774444163449:layer:AWSSDKPandas-Python311:14'
    eu-south-2:
      lambdaLayer: 'arn:aws:lambda:eu-south-2:982086096842:layer:AWSSDKPandas-Python311:13'
    il-central-1:
      lambdaLayer: 'arn:aws:lambda:il-central-1:263840725265:layer:AWSSDKPandas-Python311:12'
    me-central-1:
      lambdaLayer: 'arn:aws:lambda:me-central-1:593833071574:layer:AWSSDKPandas-Python311:12'
    me-south-1:
      lambdaLayer: 'arn:aws:lambda:me-south-1:938046470361:layer:AWSSDKPandas-Python311:14'
    cn-north-1:
      lambdaLayer: >-
        arn:aws-cn:lambda:cn-north-1:406640652441:layer:AWSSDKPandas-Python311:10
    cn-northwest-1:
      lambdaLayer: >-
        arn:aws-cn:lambda:cn-northwest-1:406640652441:layer:AWSSDKPandas-Python311:10
  
Resources:
  Bucket:
    Type: AWS::S3::Bucket
    Properties:
      BucketName: !Ref BucketName

  SagemakerRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: '2012-10-17'
        Statement:
          - Effect: Allow
            Principal:
              Service: sagemaker.amazonaws.com
            Action: sts:AssumeRole
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
      Policies:
        - PolicyName: S3access
          PolicyDocument:
            Version: '2012-10-17'
            Statement:
              - Effect: Allow
                Action:
                  - s3:GetObject
                  - s3:PutObject
                  - s3:DeleteObject
                  - s3:ListBucket
                Resource:
                  - arn:aws:s3:::*
  LambdaRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: '2012-10-17'
        Statement:
          - Effect: Allow
            Principal:
              Service: lambda.amazonaws.com
            Action: sts:AssumeRole
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/service-role/AmazonSageMakerServiceCatalogProductsLambdaServiceRolePolicy
        - arn:aws:iam::aws:policy/AmazonS3FullAccess
  
  TechModel:
    Type: AWS::SageMaker::Model
    Properties:
      Containers:
        - ModelPackageName: !Ref TechniqueModelARN
      ExecutionRoleArn: !GetAtt SagemakerRole.Arn
      ModelName: !Ref TechniqueModelName
      EnableNetworkIsolation: true
  ClusterModel:
    Type: AWS::SageMaker::Model
    Properties:
      Containers:
        - ModelPackageName: !Ref ClusterModelARN
      ExecutionRoleArn: !GetAtt SagemakerRole.Arn
      ModelName: !Ref ClusterModelName
      EnableNetworkIsolation: true
  FlowModel:
    Type: AWS::SageMaker::Model
    Properties:
      Containers:
        - ModelPackageName: !Ref FlowModelARN
      ExecutionRoleArn: !GetAtt SagemakerRole.Arn
      ModelName: !Ref FlowModelName
      EnableNetworkIsolation: true

  splunkInput:
    Type: AWS::Serverless::Function
    Properties:
      FunctionName: !Ref SplunkInputFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: splunkInput
      InlineCode: |
        '''
        Chunk input from splunk to create input for VRL transform.
        Merge back chunks for single input file back to one and send as input to pipeline.

        Input: Input from splunk Add-on Amazon S3 Uploader for Splunk (json format)
        '''

        import os
        import sys
        import subprocess
        import json
        import glob
        import urllib
        import boto3
        from itertools import islice
        import gc

        # Install ijson pip library to handle large json input files
        os.makedirs("/tmp/pylib", exist_ok=True)
        subprocess.call('pip install ijson==3.3.0 -t /tmp/pylib/ --no-cache-dir'.split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        sys.path.insert(1, '/tmp/pylib/')

        import ijson

        CHUNK_SIZE = int(os.getenv("chunk_size"))

        missing_variables = []
        if CHUNK_SIZE is None:
            missing_variables.append("chunk_size")

        if missing_variables:
            raise ValueError(f"Please enter environment variable(s): {missing_variables}")

        S3_CLIENT = boto3.client("s3")

        SCRATCH_DIR = "scratch"

        BUF_SIZE = 1*1024*1024

        TEMP_INPUT_DIR = "/tmp/input/"
        os.makedirs(TEMP_INPUT_DIR, exist_ok=True)


        def lambda_handler(event, context):
            '''
            Read input file and split in chunks
            '''
            global S3_CLIENT
            global CHUNK_SIZE

            func = "lambda_handler"

            input_file_object = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"], encoding="utf-8")

            clear_temp_dir()

            print(f"{func}: Input file object: {input_file_object}")

            prefix = "/".join(input_file_object.split("/")[:2])
            if prefix == "splunk_input/input":
                process_splunk_input(event, context)
            else:
                merge_transformed_input(event, context)


        def merge_transformed_input(event, context):
            '''
            Read chunk transformed input file and merge it to be input to pipeline
            '''
            global S3_CLIENT
            func = "merge_transformed_input"
            bucket = event["Records"][0]["s3"]["bucket"]["name"]
            input_file_object = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"], encoding="utf-8")

            input_file = "/tmp/chunk_input.json"

            clear_temp_dir()

            try:
                print(f"{func}: Download input file: {bucket}/{input_file_object}")
                S3_CLIENT.download_file(bucket, input_file_object, input_file)
            except Exception as e:
                print(f"{func}: Failed to download file from S3: {bucket}/{input_file_object}")
                raise e

            print(f"{func}: Check if all splits are processed")
            input_file_name = input_file_object.split("/")[-2]
            input_file_splits_length_json_filename = f"splunk_input/{SCRATCH_DIR}/count/{input_file_name}/split_length.json"
            temp_input_file_splits_length_json_filename = f"/tmp/split_length.json"
            try:
                print(f"{func}: Download length file: {bucket}/{input_file_splits_length_json_filename}")
                S3_CLIENT.download_file(bucket, input_file_splits_length_json_filename, temp_input_file_splits_length_json_filename)
            except Exception as e:
                print(f"{func}: Failed to download file from S3: {bucket}/{input_file_splits_length_json_filename}")
                raise e

            length_json = json.load(open(temp_input_file_splits_length_json_filename, "r"))

            prefix = f"splunk_input/{SCRATCH_DIR}/transformed/{input_file_name}/"
            print(f"{func}: Prefix to search: {prefix}")

            paginator = S3_CLIENT.get_paginator('list_objects_v2')
            page_iterator = paginator.paginate(Bucket=bucket, Prefix=prefix)

            completed_chunk_keys = []

            print(f"{func}: Iterate through each page")
            for page in page_iterator:
                if 'Contents' in page:
                    for obj in page['Contents']:
                        completed_chunk_keys.append(obj['Key'])

            print(f"{func}: Total files in prefix: {len(completed_chunk_keys)}")

            if len(completed_chunk_keys) != length_json["count"]:
                print(f"{func}: All chunk did not complete processing. Skip merge")
                return

            print(f"{func}: All chunks completed processing. Merge chunks and upload to S3")

            input_to_pipeline_filename = "/tmp/input_pipe.json"
            with open(input_to_pipeline_filename, "w") as f_write:
                f_write.write("""{"input": [""")
                first_line_present = False

                for chunk_key in completed_chunk_keys:
                    completed_chunk_filename = f"{TEMP_INPUT_DIR}chunk.json"
                    try:
                        print(f"{func}: Download input file: {bucket}/{chunk_key}")
                        S3_CLIENT.download_file(bucket, chunk_key, completed_chunk_filename)
                    except Exception as e:
                        print(f"{func}: Failed to download file from S3: {bucket}/{chunk_key}")
                        raise e

                    print("read and add chunk contents")
                    with open(completed_chunk_filename, "r") as f_read:
                        for line in f_read:
                            if first_line_present:
                                f_write.write(",\n" + line.strip("\n"))
                            else:
                                f_write.write("\n" + line.strip("\n"))
                                first_line_present = True

                f_write.write("]}")

            print("upload output file")
            output_file_object = f"input/{input_file_name}.json"

            try:
                S3_CLIENT.upload_file(input_to_pipeline_filename, bucket, output_file_object)
            except Exception as e:
                print(f"{func}: Failed to upload file to S3: {bucket}/{output_file_object}")
                raise e


        def process_splunk_input(event, context):
            '''
            Read input file and split in chunks
            '''
            global S3_CLIENT
            global CHUNK_SIZE

            func = "process_splunk_input"

            bucket = event["Records"][0]["s3"]["bucket"]["name"]
            input_file_object = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"], encoding="utf-8")

            input_file = "/tmp/input.json"

            clear_temp_dir()

            try:
                print(f"{func}: Download input file: {bucket}/{input_file_object}")
                S3_CLIENT.download_file(bucket, input_file_object, input_file)
            except Exception as e:
                print(f"{func}: Failed to download file from S3: {bucket}/{input_file_object}")
                raise e
            
            print(f"{func}: Download completed")

            chunks_of_large_input_file = create_chunk_for_large_input_file(input_file)

            input_file_name = input_file_object.split("/")[-1].split(".")[0]

            input_file_splits_length_json = {"count": len(chunks_of_large_input_file)}

            input_file_splits_length_json_filename = f"splunk_input/{SCRATCH_DIR}/count/{input_file_name}/split_length.json"
            temp_input_file_splits_length_json_filename = f"/tmp/split_length.json"
            json.dump(input_file_splits_length_json, open(temp_input_file_splits_length_json_filename, "w"))

            try:
                S3_CLIENT.upload_file(temp_input_file_splits_length_json_filename, bucket, input_file_splits_length_json_filename)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{input_file_splits_length_json_filename}")
                raise e

            print(f"{func}: Upload chunks. Number of chunks: {len(chunks_of_large_input_file)}")
            for i, chunk_of_large_input_file in enumerate(chunks_of_large_input_file, start=1):
                output_file_object = f"splunk_input/{SCRATCH_DIR}/chunk/{input_file_name}/split_{i}.json"
                try:
                    S3_CLIENT.upload_file(chunk_of_large_input_file, bucket, output_file_object)
                except Exception as e:
                    print(f"{func}: Failed to save file to S3: {bucket}/{output_file_object}")
                    raise e

            print(f"{func}: Upload completed")


        def create_chunk_for_large_input_file(temp_input_file):
            '''
            Create chunks for a large input file
            Returns:
                List of split file names
            '''
            global CHUNK_SIZE
            global BUF_SIZE
            global TEMP_INPUT_DIR

            split_files = []
            with open(temp_input_file, 'rb') as f:
                split_files = []
                item = 1
                input_list_iter = ijson.items(f, 'item', use_float=True, buf_size=BUF_SIZE)
                while True:
                    input_list = list(islice(input_list_iter, CHUNK_SIZE))
                    split_filename = TEMP_INPUT_DIR + "input" + '_' + str(item) + '.json'
                    if not input_list:
                        break

                    with open(split_filename, 'w') as outfile:
                        for alert in input_list:
                            outfile.write(json.dumps(alert) + "\n")
                        split_files.append(split_filename)

                    item += 1
                    del input_list
                    gc.collect()

            del input_list_iter
            gc.collect()

            return split_files


        def clear_temp_dir():
            rm_list = glob.glob("/tmp/**", recursive=True)
            for f in rm_list:
                if os.path.isfile(f):
                    os.remove(f)

      Handler: index.lambda_handler
      Runtime: python3.11
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Layers:
        - !FindInMap
          - RegionMap
          - !Ref AWS::Region
          - lambdaLayer
      Environment:
        Variables:
          chunk_size: "5000"
      Events:
        S3ObjectCreated:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: splunk_input/input/
                  - Name: suffix
                    Value: .json
        S3ObjectCreatedTransformed:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: splunk_input/scratch/transformed/
                  - Name: suffix
                    Value: .json
  splunkInputLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${splunkInput}
  splunkInputLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref splunkInput
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  vrlLambda:
    Type: AWS::Serverless::Function
    Properties: 
      FunctionName: !Ref VRLLambdaFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: vrlLambda
      ImageUri: !Ref VRLLambdaImage
      PackageType: Image
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Environment:
        Variables:
          vrl_program_bucket: !Ref BucketName
          vrl_program_s3_key: "splunk_input/program/program.vrl"
      Events:
        S3ObjectCreated:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: splunk_input/scratch/chunk/
                  - Name: suffix
                    Value: .json
  vrlLambdaLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${vrlLambda}
  vrlLambdaLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref vrlLambda
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  enrichWithTechnique:
    Type: AWS::Serverless::Function
    Properties:
      FunctionName: !Ref EnrichTechFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: enrichWithTechnique
      InlineCode: |
        '''
        Chunk input file, sanititze in required format, encode attributes, encode node_features if present, generate internal id.
        Create batch transform job for technique classification.

        Input: User input file
        '''

        import os
        import sys
        import subprocess
        import gc
        from datetime import datetime, timezone
        from dateutil import parser

        import urllib
        import uuid
        import json
        import glob
        from itertools import islice
        import sqlite3
        from contextlib import closing

        import boto3
        from botocore.exceptions import ClientError
        import pandas as pd
        import numpy as np

        # Install ijson pip library to handle large json input files
        os.makedirs("/tmp/pylib", exist_ok=True)
        subprocess.call('pip install ijson==3.3.0 -t /tmp/pylib/ --no-cache-dir'.split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        sys.path.insert(1, '/tmp/pylib/')

        import ijson

        TECHNIQUE_MODEL_NAME = os.getenv("technique_model_name")

        BATCH_TRANSFORM_JOB_INSTANCE_TYPE = os.getenv("batch_transform_job_instance_type")

        BATCH_TRANSFORM_JOB_MAX_RETRIES = int(os.getenv("batch_transform_job_max_retries"))

        BATCH_TRANSFORM_JOB_TIMEOUT = int(os.getenv("batch_transform_job_timeout"))

        BATCH_TRANSFORM_JOB_MAX_PAYLOAD = int(os.getenv("batch_transform_job_max_payload"))

        BATCH_TRANSFORM_JOB_CONCURRENT_TRANSFORM = int(os.getenv("batch_transform_job_concurrent_transform"))

        TECHNIQUE_LOOKUP_OBJECT = os.getenv("technique_lookup_object")

        CHUNK_SIZE = int(os.getenv("chunk_size"))

        ENCODE_OTHER_ATTRS = os.getenv("encode_other_attrs").lower() == "true"

        missing_variables = []

        if TECHNIQUE_MODEL_NAME is None:
            missing_variables.append("technique_model_name")
        if BATCH_TRANSFORM_JOB_INSTANCE_TYPE is None:
            missing_variables.append("batch_transform_job_instance_type")
        if BATCH_TRANSFORM_JOB_MAX_RETRIES is None:
            missing_variables.append("batch_transform_job_max_retries")
        if BATCH_TRANSFORM_JOB_TIMEOUT is None:
            missing_variables.append("batch_transform_job_timeout")
        if BATCH_TRANSFORM_JOB_MAX_PAYLOAD is None:
            missing_variables.append("batch_transform_job_max_payload")
        if BATCH_TRANSFORM_JOB_CONCURRENT_TRANSFORM is None:
            missing_variables.append("batch_transform_job_concurrent_transform")
        if TECHNIQUE_LOOKUP_OBJECT is None:
            missing_variables.append("technique_lookup_object")
        if CHUNK_SIZE is None:
            missing_variables.append("chunk_size")
        if ENCODE_OTHER_ATTRS is None:
            missing_variables.append("encode_other_attrs")

        if missing_variables:
            raise ValueError(f"Please enter environment variable(s): {missing_variables}")

        S3_CLIENT = boto3.client("s3")
        SAGEMAKER = boto3.client(service_name='sagemaker')

        BUF_SIZE = 1*1024*1024

        TEMP_INPUT_DIR = "/tmp/input/"
        os.makedirs(TEMP_INPUT_DIR, exist_ok=True)

        SCRATCH_DIR = "scratch"

        PATH_TO_SQLITE = f"{SCRATCH_DIR}/cypienta.db"
        TEMP_SQLITE_FILENAME = "/tmp/cypienta.db"

        INTERNAL_ID_TO_USER_ID_OBJECT_KEY = f"{SCRATCH_DIR}/internal_id_to_user_id.json"
        INTERNAL_ID_TO_UNIQUE_ID_OBJECT_KEY = f"{SCRATCH_DIR}/internal_id_to_unique_id.json"

        QUEUE_LOOKUP_FILE = f"{SCRATCH_DIR}/queue_lookup.json"
        NODE_FEATURE_LOOKUP_FILE = f"{SCRATCH_DIR}/node_feature_lookup.json"

        TEMP_INPUT_FILENAME = "/tmp/input_all.json"
        TEMP_ALERTS_INPUT_FILENAME = f"{TEMP_INPUT_DIR}input.json"
        TEMP_CLUSTER_CONFIG_INPUT_FILENAME = f"{TEMP_INPUT_DIR}cluster_config.json"
        TEMP_CONFIG_INPUT_FILENAME = f"{TEMP_INPUT_DIR}config.json"
        TEMP_NODE_FEATURE_INPUT_FILENAME = f"{TEMP_INPUT_DIR}node_feature.json"
        TEMP_NODE_FEATURE_ORIGINAL_INPUT_FILENAME = f"{TEMP_INPUT_DIR}node_feature_original.json"
        TEMP_USER_FEEDBACK_WEIGHTS = f"{TEMP_INPUT_DIR}user_feedback_weights.json"

        TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE = "/tmp/data.csv"

        TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE = "/tmp/internal_id_to_user_id.json"
        TEMP_INTERNAL_ID_TO_UNIQUE_ID_LOOKUP_FILE = "/tmp/internal_id_to_unique_id.json"
        TEMP_QUEUE_LOOKUP_FILE = "/tmp/queue_lookup.json"

        TEMP_NODE_FEATURE_LOOKUP_FILE = "/tmp/node_feature_lookup.json"

        TEMP_SKIPPED_EVENTS_FILENAME = "/tmp/skip_events.json"

        INITIAL_WEIGHTS_VALUE = 100

        CLUSTER_CONFIG_EXISTS_IN_REQUEST = False
        CONFIG_EXISTS_IN_REQUEST = False
        NODE_FEATURE_EXISTS_IN_REQUEST = False

        SKIP_ENCODING_ATTRIBUTE = ["priority", "port", "url", "user_agent", "cert", "user_feedback"]

        ENCODE_TABLE = {}


        def lambda_handler(event, context):
            '''
            Entrypoint from the trigger setup from lambda
            Args:
                event: Event triggered
                context: Context of the lambda function
            '''
            global S3_CLIENT
            global TEMP_INPUT_FILENAME
            global TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE
            global TEMP_CONFIG_INPUT_FILENAME
            global TEMP_NODE_FEATURE_INPUT_FILENAME
            global NODE_FEATURE_EXISTS_IN_REQUEST
            global CLUSTER_CONFIG_EXISTS_IN_REQUEST
            global CONFIG_EXISTS_IN_REQUEST
            func = "lambda_handler"

            CLUSTER_CONFIG_EXISTS_IN_REQUEST = False
            CONFIG_EXISTS_IN_REQUEST = False
            NODE_FEATURE_EXISTS_IN_REQUEST = False

            bucket = event["Records"][0]["s3"]["bucket"]["name"]
            try:
                input_filename = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"], encoding="utf-8")

                # delete only the error txt files from output folder if present
                delete_error_log_s3(bucket)

                clear_temp_dir()

                initialize_sqlite(bucket)

                fetch_or_create_queue_lookup_file(bucket)
                skip_input = skip_processing_input_file(bucket, input_filename)

                if skip_input:
                    print(f"{func}: Skipping current input. Update queue lookup in S3")
                    try:
                        S3_CLIENT.upload_file(TEMP_QUEUE_LOOKUP_FILE, bucket, QUEUE_LOOKUP_FILE)
                    except Exception as e:
                        print(f"{func}: Failed to save file to S3: {bucket}/{QUEUE_LOOKUP_FILE}")
                        raise e
                    return
                else:
                    print(f"{func}: Process current input")
                    queue_lookup_json = json.load(open(TEMP_QUEUE_LOOKUP_FILE, "r"))

                    if len(queue_lookup_json["input_queue"]) > 0:
                        print(f"{func}: Current input is present in input queue. Remove from input queue.")
                        queue_lookup_json["input_queue"].pop(0)
                        json.dump(queue_lookup_json, open(TEMP_QUEUE_LOOKUP_FILE, "w"))

                try:
                    S3_CLIENT.download_file(bucket, input_filename, TEMP_INPUT_FILENAME)
                    print(f"{func}: Input file download completed")
                except Exception as e:
                    print(f"{func}: Failed to download input file")
                    raise e

                fetch_or_create_technique_classification_lookup_file(bucket)
                fetch_or_create_internal_id_to_user_id_lookup_file(bucket)
                fetch_or_create_internal_id_to_unique_id_lookup_file(bucket)

                path_to_global_attribute_weights = f"{SCRATCH_DIR}/attribute_weights.json"
                fetch_or_create_global_attribute_weights(bucket, path_to_global_attribute_weights)

                # try:
                #     print(f"{func}: Extract config and node_feature if present in the input file")
                #     process_input_file_to_extract_config_and_node_feature()
                # except Exception as e:
                #     print("Failure while extracting config and node_feature from input file")
                #     raise e

                get_static_cluster_config()

                try:
                    print(f"{func}: Create chunk for large input file.")
                    chunks_of_large_input_file = create_chunk_for_large_input_file()
                    if len(chunks_of_large_input_file) < 1:
                        raise ValueError(f"Number of chunks created for input file: {len(chunks_of_large_input_file)}. There should be atleast 1 chunk created. Check your input file.")
                except Exception as e:
                    print("Failure during creating chunks for input file.")
                    raise e

                print(f"{func}: Number of chunks created for large input file: {len(chunks_of_large_input_file)}")

                cef_to_internal_mappings = get_cef_to_internal_mappings()

                print(f"{func}: Map events to internal structure")
                process_chunks = insert_event_sql(bucket, chunks_of_large_input_file, cef_to_internal_mappings)

                print(f"{func}: Chunks to process: {process_chunks}")

                if not process_chunks:
                    print(f"{func}: Input file does not have any tranformed events that could be processed. Skip input file.")
                    return

                unique_ids = [str(uuid.uuid4()) for _ in process_chunks]

                print(f"Unique id(s) generated for the chunk(s): {unique_ids}")

                if NODE_FEATURE_EXISTS_IN_REQUEST:
                    process_node_features()
                    print(f"{func}: Upload node feature lookup file to S3")
                    try:
                        S3_CLIENT.upload_file(TEMP_NODE_FEATURE_LOOKUP_FILE, bucket, NODE_FEATURE_LOOKUP_FILE)
                    except Exception as e:
                        print(f"{func}: Failed to save file to S3: {bucket}/{NODE_FEATURE_LOOKUP_FILE}")
                        raise e

                first_batch_transform_job_started = False

                queue_lookup_json = json.load(open(TEMP_QUEUE_LOOKUP_FILE, "r"))

                for i, chunk_of_large_input_file in enumerate(process_chunks):

                    unique_id = unique_ids[i]

                    next_unique_id = None
                    if (i+1) < len(unique_ids):
                        next_unique_id = unique_ids[i+1]

                    if i > 0:
                        previous_unique_id = unique_ids[i-1]
                    else:
                        previous_unique_id = queue_lookup_json["prev"]
                        # previous_unique_id = None

                    # first - unique id of first batch of current input
                    # prevous - unique id of previous batch that needs to be run (inter file handled)
                    # next - unique id of next batch that needs to be run (last batch of current input will always be null)
                    queue_json = {
                        "first": unique_ids[0],
                        "previous": previous_unique_id,
                        "next": next_unique_id
                    }

                    print(f"{func}: Processing chunk file: {chunk_of_large_input_file}; unique ID for processing chunk file: {unique_id}")

                    print(f"{func}: Starting batch job for enriching alerts with techniques")
                    response = enrich_alerts_with_techniques(bucket, chunk_of_large_input_file, unique_id, queue_json, first_batch_transform_job_started)
                    if response:
                        first_batch_transform_job_started = True
                    print(f"{func}: Batch job created for enriching alerts with techniques")

                update_queue_lookup(bucket, unique_ids)
            except Exception as ex:
                print(f"{func}: Exception occurred while running lambda function. Uploading error file to S3.")
                error_message = f"Error occurred in lambda function: {context.function_name}. More details can be found in CloudWatch Logs for this lambda function. The exception message is: {ex}"
                upload_error_to_s3(bucket, error_message)
                raise ex


        def skip_processing_input_file(bucket, input_filename):
            '''
            Check if current input file should be processed or skipped
            Args:
                bucket: Bucket name
                input_filename: S3 object key of the input file on which lambda function is triggered
            Returns:
                true/false if current input processed should be skipped
            '''
            global S3_CLIENT
            global SCRATCH_DIR
            global TEMP_QUEUE_LOOKUP_FILE
            func = "skip_processing_input_file"

            skip_input = False

            queue_lookup_json = json.load(open(TEMP_QUEUE_LOOKUP_FILE, "r"))

            previous_input_last_batch_unique_id = queue_lookup_json["prev"]

            if previous_input_last_batch_unique_id is None:
                print(f"{func}: There are no previous batches to check. Start processing current input")
                skip_input = False
                return skip_input

            input_queue_entry = f"{bucket}/{input_filename}"
            print(f"{func}: Input queue entry: {input_queue_entry}")

            flow_output_for_previous_input_filename = f"{SCRATCH_DIR}/intermediate/{previous_input_last_batch_unique_id}/flow.json"
            flow_output_for_previous_input_present = False
            try:
                print(f"{func}: Check if the flow output for previous input is available: {bucket}/{flow_output_for_previous_input_filename}")
                S3_CLIENT.head_object(Bucket=bucket, Key=flow_output_for_previous_input_filename)
                flow_output_for_previous_input_present = True
            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: Flow output is not present for previous input. Cannot start processing current input.")
                flow_output_for_previous_input_present = False

            if not flow_output_for_previous_input_present:
                if input_queue_entry not in queue_lookup_json["input_queue"]:
                    print(f"{func}: Current input is not present in queue, push to queue.")
                    queue_lookup_json["input_queue"].append(input_queue_entry)

                skip_input = True

            else:
                print(f"{func}: Flow output for previous input is present. Start processing next input.")

                if len(queue_lookup_json["input_queue"]) > 0:
                    print(f"{func}: Next input to process is: {queue_lookup_json['input_queue'][0]}")
                    if input_queue_entry == queue_lookup_json["input_queue"][0]:
                        print(f"{func}: Current input is to be processed next.")
                        skip_input = False
                    else:
                        print(f"{func}: Current input is not to be processed next.")
                        skip_input = True
                else:
                    print(f"{func}: There are no items in queue input. Current input is to be processed next.")
                    skip_input = False

            print(f"{func}: Save any updated to queue lookup file")
            json.dump(queue_lookup_json, open(TEMP_QUEUE_LOOKUP_FILE, "w"))

            return skip_input


        def setup_batch_transform_job_for_enrich_alerts_with_techniques(bucket, unique_id):
            '''
            Configure batch transform job for Technique detection model
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk of input
                input_filename: S3 input filename to batch tranform job
            Returns:
                batch_transform_job: Configuration to create batch transform job
            '''
            transform_job_name = f'transform-job-tech-{unique_id}'

            transform_input = {
                "DataSource": {
                    "S3DataSource": {
                        "S3DataType": "S3Prefix",
                        "S3Uri": f"s3://{bucket}/{SCRATCH_DIR}/intermediate/{unique_id}/input_classification.json"
                    }
                },
                "ContentType": "text/plain",
                "SplitType": "Line"
            }
            transform_output = {
                "S3OutputPath": f"s3://{bucket}/{SCRATCH_DIR}/response/classification_out/{unique_id}/",
                "KmsKeyId": "",
                "AssembleWith": "Line"
            }
            transform_resources = {
                "InstanceType": BATCH_TRANSFORM_JOB_INSTANCE_TYPE,
                "InstanceCount": 1
            }
            model_client_config = {
                'InvocationsTimeoutInSeconds': BATCH_TRANSFORM_JOB_TIMEOUT,
                'InvocationsMaxRetries': BATCH_TRANSFORM_JOB_MAX_RETRIES
            }

            batch_transform_job = {
                'TransformJobName': transform_job_name,
                'ModelName': TECHNIQUE_MODEL_NAME,
                'TransformInput': transform_input,
                'TransformOutput': transform_output,
                'MaxPayloadInMB': BATCH_TRANSFORM_JOB_MAX_PAYLOAD,
                'MaxConcurrentTransforms': BATCH_TRANSFORM_JOB_CONCURRENT_TRANSFORM,
                "TransformResources": transform_resources,
                "ModelClientConfig": model_client_config
            }
            return batch_transform_job


        def enrich_alerts_with_techniques(bucket, chunk_of_large_input_file, unique_id, queue_json, first_batch_transform_job_started):
            '''
            Enriches alerts with techniques. Filter out previously classified text. Sanitize input. create batch transform job for technique classification
            Args:
                bucket: Bucket name
                chunk_of_large_input_file: Chunk of input file
                unique_id: The unique identifier for the data chunk
                queue_json: JSON object containing queue information
                first_batch_transform_job_started: Boolean indicating if the first batch transform job has started.
            Returns: None if no new alerts to classify, or the response from creating a batch transform job.
            '''

            global SCRATCH_DIR
            global TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE
            global ENCODE_TABLE
            global ENCODE_OTHER_ATTRS
            global CLUSTER_CONFIG_EXISTS_IN_REQUEST
            global NODE_FEATURE_EXISTS_IN_REQUEST
            global CONFIG_EXISTS_IN_REQUEST
            global S3_CLIENT
            global TEMP_CONFIG_INPUT_FILENAME
            global TEMP_NODE_FEATURE_INPUT_FILENAME
            global TEMP_CLUSTER_CONFIG_INPUT_FILENAME
            global TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE
            global INTERNAL_ID_TO_USER_ID_OBJECT_KEY
            global TEMP_INTERNAL_ID_TO_UNIQUE_ID_LOOKUP_FILE
            global INTERNAL_ID_TO_UNIQUE_ID_OBJECT_KEY
            global SAGEMAKER
            global SKIP_ENCODING_ATTRIBUTE
            global TEMP_NODE_FEATURE_ORIGINAL_INPUT_FILENAME

            func = "enrich_alerts_with_techniques"

            path_to_intermediate_input_before_technique_classification = f"{SCRATCH_DIR}/intermediate/{unique_id}/input.json"
            path_to_intermediate_queue = f"{SCRATCH_DIR}/intermediate/{unique_id}/queue.json"
            path_to_input_for_batch_technique_classification = f"{SCRATCH_DIR}/intermediate/{unique_id}/input_classification.json"
            path_to_cluster_config_from_input = f"{SCRATCH_DIR}/intermediate/{unique_id}/cluster_config.json"
            path_to_config_from_input = f"{SCRATCH_DIR}/intermediate/{unique_id}/config.json"
            path_to_node_feature_from_input = f"{SCRATCH_DIR}/intermediate/{unique_id}/node_feature.json"
            path_to_node_feature_original_from_input = f"{SCRATCH_DIR}/intermediate/{unique_id}/node_feature_original.json"

            technique_classification_lookup_df = pd.read_csv(TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE)

            try:
                print(f"{func}: Save original chunk")
                path_to_original_chunk_input = f"{SCRATCH_DIR}/intermediate/{unique_id}/original_input.json"
                S3_CLIENT.upload_file(chunk_of_large_input_file, bucket, path_to_original_chunk_input)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{path_to_config_from_input}")
                raise e

            # From the input, filter alerts that do not have recognized techniques in the technique lookup table

            # drop dulicates from chunk input
            chunk_input_df = pd.read_json(chunk_of_large_input_file)
            chunk_input_df = chunk_input_df.rename(columns={"name": "alerts"})
            chunk_input_df = chunk_input_df.drop_duplicates("alerts")

            # if tech is present in the input chunk then rename it to techniques, else initialize to None
            if "tech" in chunk_input_df.columns:
                chunk_input_df = chunk_input_df.rename(columns={"tech": "techniques"})
            else:
                chunk_input_df["techniques"] = None

            # If there are any empty techniques in the input chunk, make it None.
            chunk_input_df['techniques'] = chunk_input_df['techniques'].apply(lambda x: None if x == [] else x)
            chunk_input_df = chunk_input_df[["alerts", "techniques"]]

            # Get only those rows from chunk input which did not have tech field, or it was empty.
            filtered_chunk_input_df = chunk_input_df[chunk_input_df["techniques"].isna()]

            # merge the technique lookup table with filtered chunk input.
            # merge on right means it will get None for the alerts not present in the lookup table
            merged_technique_classification_df = pd.merge(technique_classification_lookup_df, filtered_chunk_input_df, on="alerts", how="right")

            # Get all the alerts that are not present in the lookup table by filtering techniques from lookup table as None
            unique_alerts_to_classify = merged_technique_classification_df[merged_technique_classification_df["techniques_x"].isna()]
            unique_alerts_to_classify = unique_alerts_to_classify.drop_duplicates("alerts")
            unique_alerts_to_classify = unique_alerts_to_classify[unique_alerts_to_classify["alerts"].notna() & (unique_alerts_to_classify["alerts"] != "")]
            unique_alerts_to_classify = unique_alerts_to_classify["alerts"].to_list()

            # create dictionary for alerts: recognized techniques
            merged_technique_classification_df["techniques_x"] = merged_technique_classification_df["techniques_x"].fillna("[]")
            merged_technique_classification_df["techniques_y"] = merged_technique_classification_df["techniques_x"]
            merged_technique_classification_df = merged_technique_classification_df[["alerts", "techniques_y"]]

            merged_technique_classification_df = merged_technique_classification_df[merged_technique_classification_df["alerts"].notna() & (merged_technique_classification_df["alerts"] != "")]
            merged_technique_classification_dict = merged_technique_classification_df.set_index("alerts").to_dict()["techniques_y"]

            print(f"{func}: Start enriching alerts with techniques")

            chunk_input_json = json.load(open(chunk_of_large_input_file, "r"))
            batch_request_for_technique_classification_list = []

            # sanitize input. encode other_attributes_dict and update recognized techniques
            for chunk_input_alert in chunk_input_json:
                if "other_attributes_dict" not in chunk_input_alert or len(chunk_input_alert["other_attributes_dict"].keys()) == 0:
                    chunk_input_alert["other_attributes_dict"] = {"empty": True}
                else:
                    if ENCODE_OTHER_ATTRS:
                        # it will also encode special other_Attributes liek priority
                        for key in chunk_input_alert['other_attributes_dict'].keys():
                            if key in SKIP_ENCODING_ATTRIBUTE:
                                continue
                            encoded, ENCODE_TABLE = custom_encoder(chunk_input_alert['other_attributes_dict'][key], ENCODE_TABLE)
                            chunk_input_alert["other_attributes_dict"][key] = encoded
                    chunk_input_alert["other_attributes_dict"]["empty"] = False

                if "label" not in chunk_input_alert:
                    chunk_input_alert["label"] = -1

                alert_text = chunk_input_alert["name"]

                # if alert text is empty, continue. If tech field does not exist, initialize empty list
                if alert_text.strip() == "":
                    if "tech" not in chunk_input_alert:
                        chunk_input_alert["tech"] = []
                    continue

                # if the tech is not in input, put tech recognized in lookup table if present
                if "tech" not in chunk_input_alert:
                    chunk_input_alert["tech"] = []
                    if alert_text in merged_technique_classification_dict:
                        tech = json.loads(merged_technique_classification_dict[alert_text])
                        chunk_input_alert["tech"] = tech

            # create request for technique classification model
            for request_id, alert_text in enumerate(unique_alerts_to_classify, start=1):
                individual_request_body_for_technique_classification_batch = {"request_id": str(request_id), "input": alert_text}

                batch_request_for_technique_classification_list.append(json.dumps(individual_request_body_for_technique_classification_batch))

            batch_request_for_technique_classification_jsonl = "\n".join(batch_request_for_technique_classification_list)

            temp_batch_request_for_technique_classification_jsonl_filename = f"{chunk_of_large_input_file}_classification.txt"

            print(f"{func}: Save intermediate inputs to S3.")
            if CLUSTER_CONFIG_EXISTS_IN_REQUEST:
                try:
                    S3_CLIENT.upload_file(TEMP_CLUSTER_CONFIG_INPUT_FILENAME, bucket, path_to_cluster_config_from_input)
                except Exception as e:
                    print(f"{func}: Failed to save file to S3: {bucket}/{path_to_cluster_config_from_input}")
                    raise e

            if CONFIG_EXISTS_IN_REQUEST:
                try:
                    S3_CLIENT.upload_file(TEMP_CONFIG_INPUT_FILENAME, bucket, path_to_config_from_input)
                except Exception as e:
                    print(f"{func}: Failed to save file to S3: {bucket}/{path_to_config_from_input}")
                    raise e

            if NODE_FEATURE_EXISTS_IN_REQUEST:
                try:
                    S3_CLIENT.upload_file(TEMP_NODE_FEATURE_INPUT_FILENAME, bucket, path_to_node_feature_from_input)
                except Exception as e:
                    print(f"{func}: Failed to save file to S3: {bucket}/{path_to_node_feature_from_input}")
                    raise e

                try:
                    S3_CLIENT.upload_file(TEMP_NODE_FEATURE_ORIGINAL_INPUT_FILENAME, bucket, path_to_node_feature_original_from_input)
                except Exception as e:
                    print(f"{func}: Failed to save file to S3: {bucket}/{path_to_node_feature_original_from_input}")
                    raise e

            chunk_intermediate_input_local_filename = f"{chunk_of_large_input_file}_intermediate.json"

            json.dump(chunk_input_json, open(chunk_intermediate_input_local_filename, "w"))
            preprocess_chunk_input(unique_id, chunk_intermediate_input_local_filename)
            chunk_input_json = json.load(open(chunk_intermediate_input_local_filename, "r"))
            try:
                S3_CLIENT.upload_file(chunk_intermediate_input_local_filename, bucket, path_to_intermediate_input_before_technique_classification)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{path_to_intermediate_input_before_technique_classification}")
                raise e

            # Save internal id to user id mapping, internal id to unique id mapping
            try:
                S3_CLIENT.upload_file(TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE, bucket, INTERNAL_ID_TO_USER_ID_OBJECT_KEY)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{INTERNAL_ID_TO_USER_ID_OBJECT_KEY}")
                raise e

            try:
                S3_CLIENT.upload_file(TEMP_INTERNAL_ID_TO_UNIQUE_ID_LOOKUP_FILE, bucket, INTERNAL_ID_TO_UNIQUE_ID_OBJECT_KEY)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{INTERNAL_ID_TO_UNIQUE_ID_OBJECT_KEY}")
                raise e

            # Save queue for interbatch
            chunk_intermediate_queue_filename = f"{chunk_of_large_input_file}_queue.json"
            json.dump(queue_json, open(chunk_intermediate_queue_filename, "w"))

            try:
                S3_CLIENT.upload_file(chunk_intermediate_queue_filename, bucket, path_to_intermediate_queue)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{path_to_intermediate_queue}")
                raise e

            print(f"{func}: Save batch request for technique classification")
            with open(temp_batch_request_for_technique_classification_jsonl_filename, "w") as file:
                file.writelines(batch_request_for_technique_classification_jsonl)
            try:
                S3_CLIENT.upload_file(temp_batch_request_for_technique_classification_jsonl_filename,
                                      bucket, path_to_input_for_batch_technique_classification)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{path_to_input_for_batch_technique_classification}")
                raise e

            if not unique_alerts_to_classify:
                print(f"{func}: No new alerts to classify. Skip batch transform job for {unique_id}")
                skip_creating_batch_transform_job(bucket, unique_id, chunk_input_json)
                return None

            if first_batch_transform_job_started:
                print(f"{func}: Will start batch transform job in sequential order for {unique_id}")
                return None

            try:
                print(f"{func}: Create batch transforom job for Technique classification. Unique id: {unique_id}")
                batch_transform_job = setup_batch_transform_job_for_enrich_alerts_with_techniques(bucket, unique_id)
                response = SAGEMAKER.create_transform_job(**batch_transform_job)
                print(f"{func}: Created batch transform job")
            except Exception as e:
                print(f"{func}: Failed to create batch transform job. Unique id: {unique_id}")
                raise e
            return response


        def insert_event_sql(bucket, chunks_of_large_input_file, cef_to_internal_mappings):
            '''
            Map chunk of input from CEF to internal and insert to event table.
            save skipped events to s3.
            Args:
                bucket: Bucket name
                chunks_of_large_input_file: Chunks of input file
                cef_to_internal_mappings: cef to internal mappings list
            Returns:
                list of chunks to process
            '''
            global S3_CLIENT
            global TEMP_SKIPPED_EVENTS_FILENAME
            global NODE_FEATURE_EXISTS_IN_REQUEST
            global TEMP_NODE_FEATURE_INPUT_FILENAME
            global TEMP_NODE_FEATURE_ORIGINAL_INPUT_FILENAME
            func = "insert_event_sql"

            process_chunks = []
            node_feature = {}
            for chunk_of_large_input_file in chunks_of_large_input_file:

                tranformed_events = []
                skipped_events = []

                print(f"{func}: Read cef mapped chunk events")
                events = json.load(open(chunk_of_large_input_file, "r"))

                print(f"{func}: Transform cef mapped chunk events")
                for event in events:
                    tranformed_event, cef_mapped_keys_to_event, event_node_feature = map_cef_to_internal(event, cef_to_internal_mappings)

                    # gather skipped events
                    if tranformed_event is None:
                        skipped_events.append(event)
                        continue

                    # skip insert to table for now

                    tranformed_events.append(tranformed_event)

                    # update node feature
                    for key, value in event_node_feature.items():
                        if key in node_feature:
                            # if its existing node, update node features
                            node_feature[key].update(value)
                        else:
                            # if its new node. get node features
                            node_feature[key] = value

                if len(skipped_events) > 0:
                    print(f"{func}: Save skipped events")
                    json.dump(skipped_events, open(TEMP_SKIPPED_EVENTS_FILENAME, "w"))

                    print(f"{func}: Upload skipped events to S3")
                    current_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S %z")
                    skipped_chunk_object_key = f"skipped_events/{current_timestamp}/input.json"
                    try:
                        S3_CLIENT.upload_file(TEMP_SKIPPED_EVENTS_FILENAME, bucket, skipped_chunk_object_key)
                    except Exception as e:
                        print(f"{func}: Failed to save file to S3: {bucket}/{skipped_chunk_object_key}")
                        print(e)

                if len(tranformed_events) > 0:
                    print(f"{func}: save transformed cef mapped chunk events")
                    json.dump(tranformed_events, open(chunk_of_large_input_file, "w"))
                    process_chunks.append(chunk_of_large_input_file)
                else:
                    print(f"{func}: No events in transformed events, skip batch")

                try:
                    del events
                    del tranformed_events
                    del skipped_events
                    gc.collect()
                except Exception as e:
                    print(f"{func}: Failed to clear local variables. Continue anyways.")

            # save node features
            if node_feature.keys():
                print(f"{func}: Save node features")
                NODE_FEATURE_EXISTS_IN_REQUEST = True
                json.dump(node_feature, open(TEMP_NODE_FEATURE_INPUT_FILENAME, "w"))
                json.dump(node_feature, open(TEMP_NODE_FEATURE_ORIGINAL_INPUT_FILENAME, "w"))

            return process_chunks


        def map_cef_to_internal(event, cef_to_internal_mappings):
            '''
            Map cef fields to internal
            Args:
                event: event from chunk of input
                cef_to_internal_mappings: cef to internal mappings list
            Returns:
                tranformed event mapped from cef to internal, cef mapped keys to internal event, event_node_feature
            '''

            self_loop = None
            self_loop_key = None

            transformed_event = {
                "id": "",
                "src": "",
                "dst": "",
                "time": "",
                "name": "",
                "other_attributes_dict": {}
            }

            other_attrs = {}
            event_node_feature = {}

            cef_mapped_keys_to_event = {
                "time": "",
                "src": "",
                "dst": "",
                "name": ""
            }

            for mapping in cef_to_internal_mappings:
                src_field = mapping["source_field"]
                dst_field = mapping["dest_field"]
                src_field_value = event[src_field]
                if src_field_value and src_field_value != "":
                    if dst_field in ["id"]:
                        transformed_event["id"] = str(src_field_value)
                    elif dst_field in ["dst"]:
                        transformed_event["dst"] = src_field_value
                        cef_mapped_keys_to_event["dst"] = src_field
                    elif dst_field in ["src"]:
                        transformed_event["src"] = src_field_value
                        cef_mapped_keys_to_event["src"] = src_field
                    elif dst_field in ["time"]:
                        transformed_event["time"] = src_field_value
                        cef_mapped_keys_to_event["time"] = src_field
                    elif dst_field in ["name"]:
                        transformed_event["name"] = str(src_field_value)
                        cef_mapped_keys_to_event["name"] = src_field

                    elif dst_field in ["self_loop_src (when both dst & src r empty) "]:
                        self_loop = src_field_value
                        self_loop_key = src_field

                    elif dst_field in ["event_ftr_IMPRTNT_priority"]:
                        try:
                            src_field_value = int(src_field_value)
                            other_attrs["priority"] = src_field_value
                        except Exception as e:
                            pass
                    elif dst_field in ["src_port", "dst_port"]:
                        try:
                            src_field_value = int(src_field_value)
                            other_attrs["port"] = src_field_value
                        except Exception as e:
                            pass
                    elif dst_field in ["event_ftr", "event_ftr_IMPRTNT"]:
                        other_attrs[src_field] = src_field_value

            transformed_event["other_attributes_dict"].update(other_attrs)

            # if none of the src, dst is present, then use self loop
            if transformed_event["src"] and not transformed_event["dst"]:
                transformed_event["dst"] = transformed_event["src"]
                cef_mapped_keys_to_event["dst"] = cef_mapped_keys_to_event["src"]

            elif not transformed_event["src"] and transformed_event["dst"]:
                transformed_event["src"] = transformed_event["dst"]
                cef_mapped_keys_to_event["src"] = cef_mapped_keys_to_event["dst"]

            elif not transformed_event["src"] and not transformed_event["dst"]:
                # self loop get the cef field for the self loop
                if self_loop is not None:
                    transformed_event["src"] = self_loop
                    transformed_event["dst"] = self_loop
                    cef_mapped_keys_to_event["src"] = self_loop_key
                    cef_mapped_keys_to_event["dst"] = self_loop_key
                else:
                    # if self loop is absent too, then required fields src, dest cannot be mapped. skip event
                    return None, cef_mapped_keys_to_event, event_node_feature

            # if required fields are empty, skip event:
            if not transformed_event["time"]:
                return None, cef_mapped_keys_to_event, event_node_feature

            # convert to unix timestamp
            original_time = transformed_event["time"]
            unix_timestamp = convert_to_unix_timestamp(transformed_event["time"])
            if unix_timestamp is None:
                return None, cef_mapped_keys_to_event, event_node_feature
            transformed_event["time"] = unix_timestamp

            self_loop_exists = (transformed_event["src"] == transformed_event["dst"])

            # create node feature

            event_node_feature[transformed_event["src"]] = {}
            event_node_feature[transformed_event["dst"]] = {}

            for mapping in cef_to_internal_mappings:
                src_field = mapping["source_field"]
                dst_field = mapping["dest_field"]

                if dst_field not in ["dst_node_ftr", "src_node_ftr", "self_loop_node_ftr"]:
                    continue

                src_field_value = event[src_field]
                if src_field_value != "":
                    if dst_field in ["dst_node_ftr"]:
                        event_node_feature[transformed_event["dst"]][src_field] = src_field_value
                    elif dst_field in ["src_node_ftr"]:
                        event_node_feature[transformed_event["src"]][src_field] = src_field_value
                    elif dst_field in ["self_loop_node_ftr"]:
                        if self_loop_exists:
                            event_node_feature[transformed_event["src"]][src_field] = src_field_value

            # get id
            source_type = str(event['_sourceType'])
            source_type = source_type.replace("'", "\"")
            source_type = source_type.replace("\\", "\\\\")

            source = str(event['_source'])
            source = source.replace("'", "\"")
            source = source.replace("\\", "\\\\")

            event_type = str(event['_eventType'])
            event_type = event_type.replace("'", "\"")

            event_cd = str(event['_cd'])
            event_cd = event_cd.replace("'", "\"")

            event_uuid = str(uuid.uuid4())

            event_time = str(event["_time"])

            transformed_event["id"] = f'''"{transformed_event['src']}" "{transformed_event['dst']}"'''

            if event_time != "":
                transformed_event["id"] = f'''{transformed_event["id"]} _time="{event_time}"'''

            if source != "":
                transformed_event["id"] = f'''{transformed_event["id"]} source="{source}"'''

            if source_type != "":
                transformed_event["id"] = f'''{transformed_event["id"]} sourcetype="{source_type}"'''

            transformed_event["id"] = f'''{transformed_event["id"]} id:{event_uuid}'''

            return transformed_event, cef_mapped_keys_to_event, event_node_feature


        def preprocess_chunk_input(unique_id, chunk_of_large_input_file):
            '''
            Generate internal id and update chunk input
            Args:
                unique_id: The unique identifier for the data chunk.
                chunk_of_large_input_file: Chunk of input file
            '''

            chunk_input_df = pd.read_json(chunk_of_large_input_file)

            start_id, end_id = create_internal_id(unique_id, chunk_input_df["id"].to_list())

            chunk_input_df["id"] = list(range(start_id, end_id + 1))

            chunk_input_df.to_json(chunk_of_large_input_file, orient="records")


        def create_internal_id(unique_id, user_id_list):
            '''
            Create mapping from internal id to user id and internal id to unique id
            Args:
                unique_id: The unique identifier for the data chunk.
                user_id_list: List of user ids to map to internal id.
            Returns:
                Tuple containing the start and end internal id.
            '''
            global TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE
            global TEMP_INTERNAL_ID_TO_UNIQUE_ID_LOOKUP_FILE

            func = "create_internal_id"

            print(f"{func}: Create mapping from internal id to user id")
            temp_internal_id_to_user_id_json = json.load(open(TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE, "r"))

            start_id = len(temp_internal_id_to_user_id_json)
            temp_internal_id_to_user_id_json.extend(user_id_list)
            end_id = len(temp_internal_id_to_user_id_json) - 1

            print(f"{func}: Save mapping from internal id to user id")
            json.dump(temp_internal_id_to_user_id_json, open(TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE, "w"))

            print(f"{func}: Create mapping from internal id to unique id")
            temp_internal_id_to_unique_id_json = json.load(open(TEMP_INTERNAL_ID_TO_UNIQUE_ID_LOOKUP_FILE, "r"))

            temp_internal_id_to_unique_id_json = temp_internal_id_to_unique_id_json + [unique_id] * len(user_id_list)

            print(f"{func}: Save mapping from internal id to unique id")
            json.dump(temp_internal_id_to_unique_id_json, open(TEMP_INTERNAL_ID_TO_UNIQUE_ID_LOOKUP_FILE, "w"))

            return start_id, end_id


        def skip_creating_batch_transform_job(bucket, unique_id, chunk_input_json):
            '''
            Skip creating a batch transform job and upload enriched alerts with techniques as input to cluster detection.
            Args:
                bucket: The S3 bucket name.
                unique_id: The unique id for classification.
                chunk_input_json: JSON data for chunk input.
            '''
            global SCRATCH_DIR
            global CLUSTER_CONFIG_EXISTS_IN_REQUEST
            global TEMP_CLUSTER_CONFIG_INPUT_FILENAME
            global NODE_FEATURE_EXISTS_IN_REQUEST
            global TEMP_NODE_FEATURE_INPUT_FILENAME
            global TEMP_USER_FEEDBACK_WEIGHTS
            global S3_CLIENT

            func = "skip_creating_batch_transform_job"

            skip_batch_dir = "/tmp/skip_batch/"
            os.makedirs(skip_batch_dir, exist_ok=True)
            temp_enriched_alerts_with_techniques_output_filename = f"{skip_batch_dir}input.json"

            path_to_enriched_alerts_with_techniques = f"{SCRATCH_DIR}/output/classification/{unique_id}/input.json"

            user_feedback_weights_json = json.load(open(TEMP_USER_FEEDBACK_WEIGHTS, "r"))
            edge_weights = user_feedback_weights_json["event"]

            with open(temp_enriched_alerts_with_techniques_output_filename, "w") as of:
                req = {"request_id": "1", "input": chunk_input_json, "user_feedback_weights": edge_weights}

                if CLUSTER_CONFIG_EXISTS_IN_REQUEST:
                    req["cluster_config"] = json.load(open(TEMP_CLUSTER_CONFIG_INPUT_FILENAME, "r"))

                if NODE_FEATURE_EXISTS_IN_REQUEST:
                    req["node_feature"] = json.load(open(TEMP_NODE_FEATURE_INPUT_FILENAME, "r"))
                json.dump(req, of)

            try:
                print(f"{func}: Save enriched alerts with techniques as input to cluster detection: {bucket}/{path_to_enriched_alerts_with_techniques}")
                S3_CLIENT.upload_file(temp_enriched_alerts_with_techniques_output_filename, bucket, path_to_enriched_alerts_with_techniques)
            except Exception as e:
                print(f"{func}: Failed to save enriched alert with technique: {bucket}/{path_to_enriched_alerts_with_techniques}")
                raise e


        def fetch_or_create_technique_classification_lookup_file(bucket):
            '''
            Check if the technique classification lookup file for alerts exists on S3. Create if it does not exist.
            Args:
                bucket: Bucket name
            '''
            global TECHNIQUE_LOOKUP_OBJECT
            global S3_CLIENT
            global TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE

            func = "fetch_or_create_technique_classification_lookup_file"
            try:
                print(
                    f"{func}: Checking if technique classification lookup file for alerts exists on S3 path {bucket}/{TECHNIQUE_LOOKUP_OBJECT}")

                S3_CLIENT.head_object(Bucket=bucket, Key=TECHNIQUE_LOOKUP_OBJECT)

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: Technique classification lookup file for alerts does not exist on S3. Create empty table")

                technique_classification_lookup_df = pd.DataFrame(columns=["alerts", "techniques"])
                technique_classification_lookup_df.to_csv(TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE, index=False)

                print(f"{func}: Empty table created")
            else:
                try:
                    print(f"{func}: Download available technique classification lookup file for alerts from S3")

                    S3_CLIENT.download_file(bucket, TECHNIQUE_LOOKUP_OBJECT, TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE)

                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download lookup table from S3: {bucket}/{TECHNIQUE_LOOKUP_OBJECT}")
                    raise e


        def fetch_or_create_internal_id_to_user_id_lookup_file(bucket):
            '''
            Check if the internal id to user id lookup file exists on S3. Create if it does not
            Args:
                bucket: Bucket name
            '''
            global INTERNAL_ID_TO_USER_ID_OBJECT_KEY
            global S3_CLIENT
            global TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE

            func = "fetch_or_create_internal_id_to_user_id_lookup_file"

            try:
                print(
                    f"{func}: Checking if internal id to user id lookup file exists on S3 path {bucket}/{INTERNAL_ID_TO_USER_ID_OBJECT_KEY}")

                S3_CLIENT.head_object(Bucket=bucket, Key=INTERNAL_ID_TO_USER_ID_OBJECT_KEY)

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: Internal id to user id lookup file does not exist on S3. Create empty table")

                internal_id_to_user_id_json = []
                json.dump(internal_id_to_user_id_json, open(TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE, "w"))

                print(f"{func}: Empty table created")
            else:
                try:
                    print(f"{func}: Download available internal id to user id lookup file from S3")

                    S3_CLIENT.download_file(bucket, INTERNAL_ID_TO_USER_ID_OBJECT_KEY, TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE)

                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download lookup file from S3: {bucket}/{INTERNAL_ID_TO_USER_ID_OBJECT_KEY}")
                    raise e


        def fetch_or_create_internal_id_to_unique_id_lookup_file(bucket):
            '''
            Check if the internal id to unique id lookup file exists on S3. Create if it does not
            Args:
                bucket: The S3 bucket name
            '''
            global INTERNAL_ID_TO_UNIQUE_ID_OBJECT_KEY
            global S3_CLIENT
            global TEMP_INTERNAL_ID_TO_UNIQUE_ID_LOOKUP_FILE

            func = "fetch_or_create_internal_id_to_unique_id_lookup_file"

            try:
                print(
                    f"{func}: Checking if internal id to unique id lookup file exists on S3 path {bucket}/{INTERNAL_ID_TO_UNIQUE_ID_OBJECT_KEY}")

                S3_CLIENT.head_object(Bucket=bucket, Key=INTERNAL_ID_TO_UNIQUE_ID_OBJECT_KEY)

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: Internal id to unique id lookup file does not exist on S3. Create empty table")

                internal_id_to_unique_id_json = []
                json.dump(internal_id_to_unique_id_json, open(TEMP_INTERNAL_ID_TO_UNIQUE_ID_LOOKUP_FILE, "w"))

                print(f"{func}: Empty table created")
            else:
                try:
                    print(f"{func}: Download available uinternal id to unique id lookup file from S3")

                    S3_CLIENT.download_file(bucket, INTERNAL_ID_TO_UNIQUE_ID_OBJECT_KEY, TEMP_INTERNAL_ID_TO_UNIQUE_ID_LOOKUP_FILE)

                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download lookup file from S3: {bucket}/{INTERNAL_ID_TO_UNIQUE_ID_OBJECT_KEY}")
                    raise e


        def fetch_or_create_queue_lookup_file(bucket):
            '''
            Check if the queue lookup file exists on S3. Create if it does not
            Args:
                bucket: The S3 bucket name
            '''
            global QUEUE_LOOKUP_FILE
            global S3_CLIENT
            global TEMP_QUEUE_LOOKUP_FILE

            func = "fetch_or_create_queue_lookup_file"

            try:
                print(
                    f"{func}: Checking if queue lookup file exists on S3 path {bucket}/{QUEUE_LOOKUP_FILE}")

                S3_CLIENT.head_object(Bucket=bucket, Key=QUEUE_LOOKUP_FILE)

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: Queue lookup file does not exist on S3. Create empty table")

                internal_id_to_unique_id_json = {"first": None, "prev": None, "input_queue": []}
                json.dump(internal_id_to_unique_id_json, open(TEMP_QUEUE_LOOKUP_FILE, "w"))

                print(f"{func}: Initiated queue lookup")
            else:
                try:
                    print(f"{func}: Download available queue lookup file from S3")

                    S3_CLIENT.download_file(bucket, QUEUE_LOOKUP_FILE, TEMP_QUEUE_LOOKUP_FILE)

                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download lookup file from S3: {bucket}/{QUEUE_LOOKUP_FILE}")
                    raise e


        def fetch_or_create_global_attribute_weights(bucket, path_to_global_attribute_weights):
            '''
            Get or initialize the global attribute weights file
            Args:
                path_to_global_attribute_weights: global attribute weights object key
            '''
            global SCRATCH_DIR
            global S3_CLIENT
            global TEMP_USER_FEEDBACK_WEIGHTS
            func = "fetch_or_create_global_attribute_weights"

            try:
                print(
                    f"{func}: Checking if file exists on S3 path {bucket}/{path_to_global_attribute_weights}")

                S3_CLIENT.head_object(Bucket=bucket, Key=path_to_global_attribute_weights)

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: File does not exist on S3. Initialize file")

                global_attribute_weights_json = {
                    "event": {
                        "technique": INITIAL_WEIGHTS_VALUE,
                        "tactic": INITIAL_WEIGHTS_VALUE,
                        "stage": INITIAL_WEIGHTS_VALUE,
                        "count": INITIAL_WEIGHTS_VALUE,
                        "priority": INITIAL_WEIGHTS_VALUE,
                        "port": INITIAL_WEIGHTS_VALUE,
                        "url": INITIAL_WEIGHTS_VALUE,
                        "user_agent": INITIAL_WEIGHTS_VALUE,
                        "cert": INITIAL_WEIGHTS_VALUE
                    },
                    "node": {
                        "os": INITIAL_WEIGHTS_VALUE,
                        "risk": INITIAL_WEIGHTS_VALUE,
                        "user": INITIAL_WEIGHTS_VALUE,
                        "domain": INITIAL_WEIGHTS_VALUE,
                        "subnet": INITIAL_WEIGHTS_VALUE,
                        "usergroup": INITIAL_WEIGHTS_VALUE,
                        "geolocation": INITIAL_WEIGHTS_VALUE
                    }
                }
                json.dump(global_attribute_weights_json, open(TEMP_USER_FEEDBACK_WEIGHTS, "w"))
                print(f"{func}: Initiated file")
                try:
                    print(f"{func}: Upload file to S3: {bucket}/{path_to_global_attribute_weights}")
                    S3_CLIENT.upload_file(TEMP_USER_FEEDBACK_WEIGHTS, bucket, path_to_global_attribute_weights)
                except Exception as ex:
                    print(f"{func}: Failed to upload file to S3: {bucket}/{path_to_global_attribute_weights}")
                    raise ex
            else:
                try:
                    print(f"{func}: Download available file from S3")

                    S3_CLIENT.download_file(bucket, path_to_global_attribute_weights, TEMP_USER_FEEDBACK_WEIGHTS)

                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download lookup file from S3: {bucket}/{path_to_global_attribute_weights}")
                    raise e


        def update_queue_lookup(bucket, unique_ids):
            '''
            Update queue lookup file
            Args:
                bucket: Bucket name
                unique_ids: Unique ids generated for the current input file
            '''
            global TEMP_QUEUE_LOOKUP_FILE
            global QUEUE_LOOKUP_FILE
            global S3_CLIENT
            func = "update_queue_lookup"

            queue_lookup_json = json.load(open(TEMP_QUEUE_LOOKUP_FILE, "r"))

            if queue_lookup_json["first"] is None:
                queue_lookup_json["first"] = unique_ids[0]

            # if queue_lookup_json["prev"] is not None:
            #     prev_unique_id = queue_lookup_json["prev"]
            #     next_for_prev_unique_id = unique_ids[0]
                # queue_lookup_json[prev_unique_id] = next_for_prev_unique_id

            queue_lookup_json["prev"] = unique_ids[-1]

            json.dump(queue_lookup_json, open(TEMP_QUEUE_LOOKUP_FILE, "w"))

            print("Save queue lookup file")
            try:
                S3_CLIENT.upload_file(TEMP_QUEUE_LOOKUP_FILE, bucket, QUEUE_LOOKUP_FILE)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{QUEUE_LOOKUP_FILE}")
                raise e


        def create_chunk_for_large_input_file():
            '''
            Create chunks for a large input file
            Returns:
                List of split file names
            '''
            global CHUNK_SIZE
            global BUF_SIZE
            global TEMP_INPUT_DIR
            global TEMP_INPUT_FILENAME

            split_files = []
            with open(TEMP_INPUT_FILENAME, 'rb') as f:
                split_files = []
                item = 0
                input_list_iter = ijson.items(f, 'input.item', use_float=True, buf_size=BUF_SIZE)
                while True:
                    input_list = list(islice(input_list_iter, CHUNK_SIZE))
                    split_filename = TEMP_INPUT_DIR + "input" + '_' + str(item) + '.json'
                    if not input_list:
                        break

                    with open(split_filename, 'w') as outfile:
                        json.dump(input_list, outfile)
                        split_files.append(split_filename)

                    item += 1
                    del input_list
                    gc.collect()

            del input_list_iter
            gc.collect()

            return split_files


        def process_input_file_to_extract_config_and_node_feature():
            '''
            Process the input file to extract config and node feature
            '''
            global CLUSTER_CONFIG_EXISTS_IN_REQUEST
            global CONFIG_EXISTS_IN_REQUEST
            global NODE_FEATURE_EXISTS_IN_REQUEST
            global BUF_SIZE
            global TEMP_INPUT_FILENAME
            global TEMP_CONFIG_INPUT_FILENAME
            global TEMP_NODE_FEATURE_INPUT_FILENAME

            func = "process_input_file_to_extract_config_and_node_feature"

            with open(TEMP_INPUT_FILENAME, 'rb') as f:
                cluster_config_json_object = next(ijson.items(f, 'cluster_config', use_float=True, buf_size=BUF_SIZE), None)
                if cluster_config_json_object:
                    print(f"{func}: Cluster config exists in request")
                    CLUSTER_CONFIG_EXISTS_IN_REQUEST = True
                    if not isinstance(cluster_config_json_object, dict):
                        raise ValueError("The 'cluster_config' object in input file must be a json object")
                    json.dump(cluster_config_json_object, open(TEMP_CLUSTER_CONFIG_INPUT_FILENAME, "w"))

            del cluster_config_json_object
            gc.collect()

            with open(TEMP_INPUT_FILENAME, 'rb') as f:
                config_json_object = next(ijson.items(f, 'config', use_float=True, buf_size=BUF_SIZE), None)
                if config_json_object:
                    print(f"{func}: Config exists in request")
                    CONFIG_EXISTS_IN_REQUEST = True
                    if not isinstance(config_json_object, dict):
                        raise ValueError("The 'config' object in input file must be a json object")
                    json.dump(config_json_object, open(TEMP_CONFIG_INPUT_FILENAME, "w"))

            del config_json_object
            gc.collect()

            with open(TEMP_INPUT_FILENAME, 'rb') as f:
                node_feature = next(ijson.items(f, 'node_feature', use_float=True, buf_size=BUF_SIZE), None)
                if node_feature:
                    print(f"{func}: node feature exists in request")
                    NODE_FEATURE_EXISTS_IN_REQUEST = True
                    if not isinstance(node_feature, dict):
                        raise ValueError("The 'node_feature' object in input file must be a json object")
                    json.dump(node_feature, open(TEMP_NODE_FEATURE_INPUT_FILENAME, "w"))

            del node_feature
            gc.collect()


        def initialize_sqlite(bucket):
            '''
            Initialize sqlite if database is not present
            Args:
                bucket: Bucket name where sqlite will be stored
            '''
            global S3_CLIENT
            global SCRATCH_DIR
            global PATH_TO_SQLITE
            global TEMP_SQLITE_FILENAME

            func = "initialize_sqlite"

            try:
                print(f"{func}: Checking if sqlite exists on S3 path {bucket}/{PATH_TO_SQLITE}")

                S3_CLIENT.head_object(Bucket=bucket, Key=PATH_TO_SQLITE)

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: sqlite not exist on S3. Initialize database.")

                create_cluster_output_table = """
                CREATE TABLE IF NOT EXISTS cluster_output (
                    cluster_id INTEGER PRIMARY KEY,
                    cluster_starttime REAL,
                    cluster_endtime REAL,
                    cluster_srcips TEXT,
                    cluster_dstips TEXT,
                    cluster_techs TEXT,
                    cluster_tacs TEXT,
                    cluster_stages TEXT
                );
                """

                create_cluster_ticket_output_table = """
                CREATE TABLE IF NOT EXISTS cluster_ticket_output (
                    cluster_id  INTEGER PRIMARY KEY,
                    ticket_id INTEGER UNIQUE,
                    metrics TEXT,
                    FOREIGN KEY (cluster_id) REFERENCES cluster_output(cluster_id)
                );
                """

                create_event_table = """
                CREATE TABLE IF NOT EXISTS event (
                    alert_id TEXT PRIMARY KEY,
                    cluster_id INTEGER,
                    src TEXT,
                    dst TEXT,
                    time REAL,
                    name TEXT,
                    tech TEXT,
                    tac TEXT,
                    stage TEXT,
                    other_attributes TEXT,
                    kv_src TEXT,
                    kv_dst TEXT,
                    kv_time TEXT,
                    kv_name TEXT,
                    FOREIGN KEY (cluster_id) REFERENCES cluster_output(cluster_id)
                );
                """

                create_flow_output_table = """
                CREATE TABLE IF NOT EXISTS flow_output (
                    flow_id  INTEGER PRIMARY KEY,
                    cluster_prob TEXT,
                    alert_ids TEXT
                );
                """

                create_operation_on_cluster_table = """
                CREATE TABLE IF NOT EXISTS operation_on_cluster (
                    operation_id  INTEGER PRIMARY KEY AUTOINCREMENT,
                    cluster_id INTEGER,
                    alert_ids TEXT,
                    operation_type TEXT,
                    FOREIGN KEY (cluster_id) REFERENCES cluster_output(cluster_id)
                );
                """

                create_operation_on_flow_table = """
                CREATE TABLE IF NOT EXISTS operation_on_flow (
                    operation_id  INTEGER PRIMARY KEY AUTOINCREMENT,
                    flow_id INTEGER,
                    alert_ids TEXT,
                    operation_type TEXT,
                    FOREIGN KEY (flow_id) REFERENCES flow_output(flow_id)
                );
                """

                create_global_feature_table = """
                CREATE TABLE IF NOT EXISTS global_feature (
                    feature TEXT NOT NULL,
                    feature_type TEXT NOT NULL,
                    added_to_ui INTEGER,
                    CONSTRAINT unique_feature_type UNIQUE (feature, feature_type)
                );
                """

                try:
                    # Connect to the database
                    with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                        with closing(conn.cursor()) as cursor:

                            # Execute a query
                            cursor.execute(create_cluster_output_table)
                            cursor.execute(create_cluster_ticket_output_table)
                            cursor.execute(create_event_table)
                            cursor.execute(create_flow_output_table)
                            cursor.execute(create_operation_on_cluster_table)
                            cursor.execute(create_operation_on_flow_table)
                            cursor.execute(create_global_feature_table)
                        conn.commit()

                    print(f"{func}: Database initialized")
                except Exception as exc:
                    print(f"{func}: Failed to create tables in database.")
                    raise exc

                try:
                    print(f"{func}: Upload sqlite to S3")

                    S3_CLIENT.upload_file(TEMP_SQLITE_FILENAME, bucket, PATH_TO_SQLITE)

                    print(f"{func}: Upload completed")
                except Exception as e:
                    print(f"{func}: Failed to upload sqlite from S3: {bucket}/{PATH_TO_SQLITE}")
                    raise e
            else:
                try:
                    print(f"{func}: Download available sqlite")

                    S3_CLIENT.download_file(bucket, PATH_TO_SQLITE, TEMP_SQLITE_FILENAME)

                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download sqlite from S3: {bucket}/{PATH_TO_SQLITE}")
                    raise e


        def process_node_features():
            '''
            Encode node features
            '''
            global TEMP_NODE_FEATURE_INPUT_FILENAME
            global TEMP_USER_FEEDBACK_WEIGHTS
            global TEMP_NODE_FEATURE_LOOKUP_FILE
            func = "process_node_features"

            node_feature = json.load(open(TEMP_NODE_FEATURE_INPUT_FILENAME, "r"))
            user_feedback_weights = json.load(open(TEMP_USER_FEEDBACK_WEIGHTS, "r"))
            node_weights = user_feedback_weights["node"]

            node_feature_lookup = {}

            for key in node_feature.keys():
                if type(node_feature[key]) is dict:
                    for key_2 in node_feature[key]:
                        if key_2 not in node_feature_lookup:
                            node_feature_lookup[key_2] = len(node_feature_lookup)
                else:
                    print(f"{func}: Skip node feature: {key}")

            new_node_feature = {}
            encode_table = {}
            for key in node_feature.keys():
                if type(node_feature[key]) is dict:
                    new_node_feature_embedding = [0] * len(node_feature_lookup)
                    for key_2 in node_feature[key]:
                        if node_feature[key][key_2]:
                            encoded, encode_table = custom_encoder_node(node_feature[key][key_2], encode_table)
                        else:
                            encoded = 0
                        if key_2 in node_weights:
                            encoded = encoded * (node_weights[key_2] / 100.0)
                        new_node_feature_embedding[node_feature_lookup[key_2]] = encoded
                    new_node_feature[key] = new_node_feature_embedding

            json.dump(new_node_feature, open(TEMP_NODE_FEATURE_INPUT_FILENAME, "w"))
            json.dump(node_feature_lookup, open(TEMP_NODE_FEATURE_LOOKUP_FILE, "w"))


        def custom_encoder_node(data, encode_table):
            '''
            Simple encoder
            Args:
                data: data to encode
                encode_table: history
            Returns:
                (encoded data, updated history)
            '''
            if str(data) not in encode_table:
                encode_table[str(data)] = len(encode_table)
            return encode_table[str(data)], encode_table


        def custom_encoder(data, encode_table):
            '''
            Simple encoder
            Args:
                data: data to encode
                encode_table: history
            Returns:
                (encoded data, updated history)
            '''
            if str(data) not in encode_table:
                encode_table[str(data)] = len(encode_table)
            return [encode_table[str(data)]], encode_table


        def get_cef_to_internal_mappings():
            '''
            Create cef to internal mappings dataframe
            Returns:
                dataframe of cef to internal mappings
            '''
            cef_to_internal_mappings = [
                {
                    "source_field": "accessGroup",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "accountDomain",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "accountId",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "accountName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "clientDomain",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "clientLogonId",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "clientMachineName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "clientUserName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceAddress",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceAddress",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceAddressIPv6",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceCustomString1",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceHostName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceNtHost",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "domain",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "group",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "groupDomain",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "groupTypeChange",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "hostname",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "logonAccount",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "logonAccount",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "memberDn",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "memberId",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "memberId",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "memberName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "memberNtDomain",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "primaryDomain",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "primaryUserName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "subjectAccountDomain",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "subjectAccountName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "subjectDomainName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "subjectLogonId",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "subjectLogonId",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "subjectSecurityId",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "subjectUserName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "suser",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "user",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "userGroup",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "userGroupId",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "userId",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "userId",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "userName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "userName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "userName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "userType",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "workstation",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "workstationName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "workstationName",
                    "dest_field": "self_loop_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "acl",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "act",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "act",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "attachDisposition",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "attachFileName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "attachSize",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "attachSizeDecoded",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "attachTransferEncoding",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "attachType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "authMethod",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "awsAccountId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "body",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "bucket",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "bytes",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "bytesIn",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "bytesOut",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "callerComputerName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "callerDomain",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "callerLogonId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "callerMachineName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "callerUserName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cat",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cat",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "categoryString",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cd",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "changeClass",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "changeDescription",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "changeType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "changeType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "ciscoAsaMessageId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "ciscoAsaUser",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "ciscoAsaVendorAction",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cn1",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cn2",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cn3",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cn4",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cn5",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cn6",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "command",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "commProto",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "content",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "count",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "creatorProcessName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cs1",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cs1Label",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cs2",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cs3",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cs4",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cs5",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "cs6",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceDirection",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceProduct",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceProduct",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceReceiptTime",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceVendor",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceVendor",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceVersion",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "deviceVersion",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "duration",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "durationDay",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "durationHour",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "durationMinute",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "durationSecond",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "errorCode",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventCode",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventDay",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventHour",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventMinute",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventMonth",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventSecond",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventSubtype",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventTypeColor",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventWeekDay",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventYear",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "eventZone",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "exitStatus",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "fileName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "fileName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "filePath",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "filePath",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "fileSize",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "fileType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "flowId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "fragmentCount",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "hashCodes",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "icmpCode",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "icmpType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "id",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "idsType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "imageFileName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "index",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "indexTime",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "initialRtt",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "interfaceId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "keywords",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "kv",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "laction",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "lineCount",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "logLevel",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "logName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "logonId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "logonType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "logStatus",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "messageId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "messageType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "name",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "name",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "newAccountName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "newDomain",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "newProcessName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "object",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "objectAttrs",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "objectCategory",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "objectId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "objectName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "objectType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "opCode",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "outcome",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "packets",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "packetsIn",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "packetsOut",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "parentProcess",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "parentProcessId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "parentProcessName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "parentProcessPath",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "preMsg",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "privilegeList",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "product",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "punct",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "query",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "query",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "queryType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "raw",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "recordNumber",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "region",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "registryPath",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "registryValueName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "registryValueType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "replyCode",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "replyCodeId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "requestClientApplication",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "requestMethod",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "requestURL",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "responseTime",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "rule",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "ruleId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "securityId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "serial",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "service",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "serviceId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "serviceName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "sessionId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "si",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "signatureId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "source",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceContent",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceNetworkAddress",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceSgInfo",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "splunkServer",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "splunkServerGroup",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "sslIsValid",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "status",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "subject",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "subSecond",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "suppliedRealmName",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "tag",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "tagAction",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "tagApp",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "tagEventType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "tagObjectCategory",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "taskCategory",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "timeTaken",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "tokenElevationType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "tokenElevationType",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "tos",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "totalBytes",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "totalPacketsIn",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "totalPacketsOut",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "totalResponseTime",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "transactionId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "transport",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "ttl",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "type",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "type",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "valuesFlowId",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "version",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "vpcFlowAction",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "winAction",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "winSecurityCategory",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "winStatus",
                    "dest_field": "event_ftr",
                    "priority": None
                },
                {
                    "source_field": "destinationAddress",
                    "dest_field": "dst",
                    "priority": 6.0
                },
                {
                    "source_field": "destinationNtHost",
                    "dest_field": "dst",
                    "priority": 6.0
                },
                {
                    "source_field": "destinationAddress",
                    "dest_field": "dst",
                    "priority": 5.0
                },
                {
                    "source_field": "destinationAddressIPv6",
                    "dest_field": "dst",
                    "priority": 4.0
                },
                {
                    "source_field": "destinationHostName",
                    "dest_field": "dst",
                    "priority": 3.0
                },
                {
                    "source_field": "destinationTranslatedAddress",
                    "dest_field": "dst",
                    "priority": 1.0
                },
                {
                    "source_field": "destContent",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "destHost",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "destinationInterface",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "destinationInterfaceId",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "destinationMacAddress",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "destinationNtDomain",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "targetAccountId",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "destinationPriority",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "targetAccountName",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "destinationZone",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "destSgInfo",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "duser",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "targetDomain",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "targetDomainName",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "targetProcessName",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "targetServerName",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "targetServerName",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "targetUserName",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "targetUserName",
                    "dest_field": "dst_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "description",
                    "dest_field": "name",
                    "priority": 5.0
                },
                {
                    "source_field": "name",
                    "dest_field": "name",
                    "priority": 4.0
                },
                {
                    "source_field": "ruleName",
                    "dest_field": "name",
                    "priority": 3.0
                },
                {
                    "source_field": "description",
                    "dest_field": "name",
                    "priority": 2.0
                },
                {
                    "source_field": "msg",
                    "dest_field": "name",
                    "priority": 2.0
                },
                {
                    "source_field": "message",
                    "dest_field": "name",
                    "priority": 1.0
                },
                {
                    "source_field": "destPublicPort",
                    "dest_field": "dst_port",
                    "priority": 3.0
                },
                {
                    "source_field": "destinationTranslatedPort",
                    "dest_field": "dst_port",
                    "priority": 2.0
                },
                {
                    "source_field": "destinationPort",
                    "dest_field": "dst_port",
                    "priority": 1.0
                },
                {
                    "source_field": "process",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "processCommandLine",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "processExec",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "processId",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "processId",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "processName",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "processName",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "processPath",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "proto",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "protoCode",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "protoFullName",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "protoId",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "protoStack",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "protoVersion",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "sourceType",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "sourceType",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "vendor",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "vendorAccount",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "vendorAction",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "vendorClass",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "vendorDefinition",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "vendorPrivilege",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "vendorSeverity",
                    "dest_field": "event_ftr_IMPRTNT",
                    "priority": None
                },
                {
                    "source_field": "severity",
                    "dest_field": "event_ftr_IMPRTNT_priority",
                    "priority": None
                },
                {
                    "source_field": "severityId",
                    "dest_field": "event_ftr_IMPRTNT_priority",
                    "priority": None
                },
                {
                    "source_field": "severityLevel",
                    "dest_field": "event_ftr_IMPRTNT_priority",
                    "priority": None
                },
                {
                    "source_field": "computerName",
                    "dest_field": "self_loop_src (when both dst & src r empty) ",
                    "priority": 6.0
                },
                {
                    "source_field": "hostAddr",
                    "dest_field": "self_loop_src (when both dst & src r empty) ",
                    "priority": 5.0
                },
                {
                    "source_field": "assignedIp",
                    "dest_field": "self_loop_src (when both dst & src r empty) ",
                    "priority": 4.0
                },
                {
                    "source_field": "clientAddress",
                    "dest_field": "self_loop_src (when both dst & src r empty) ",
                    "priority": 3.0
                },
                {
                    "source_field": "ip",
                    "dest_field": "self_loop_src (when both dst & src r empty) ",
                    "priority": 2.0
                },
                {
                    "source_field": "ipAddress",
                    "dest_field": "self_loop_src (when both dst & src r empty) ",
                    "priority": 1.0
                },
                {
                    "source_field": "sourceAddressIPv6",
                    "dest_field": "src",
                    "priority": 6.0
                },
                {
                    "source_field": "sourceTranslatedAddress",
                    "dest_field": "src",
                    "priority": 5.0
                },
                {
                    "source_field": "sourceAddress",
                    "dest_field": "src",
                    "priority": 4.0
                },
                {
                    "source_field": "sourceAddress",
                    "dest_field": "src",
                    "priority": 3.0
                },
                {
                    "source_field": "sourceNtHost",
                    "dest_field": "src",
                    "priority": 2.0
                },
                {
                    "source_field": "sourceHostName",
                    "dest_field": "src",
                    "priority": 1.0
                },
                {
                    "source_field": "sourceHost",
                    "dest_field": "src_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceInterface",
                    "dest_field": "src_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceInterfaceId",
                    "dest_field": "src_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceMacAddress",
                    "dest_field": "src_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceNtDomain",
                    "dest_field": "src_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourcePriority",
                    "dest_field": "src_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceUserName",
                    "dest_field": "src_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceWorkstation",
                    "dest_field": "src_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourceZone",
                    "dest_field": "src_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "suser",
                    "dest_field": "src_node_ftr",
                    "priority": None
                },
                {
                    "source_field": "sourcePublicPort",
                    "dest_field": "src_port",
                    "priority": 3.0
                },
                {
                    "source_field": "sourceTranslatedPort",
                    "dest_field": "src_port",
                    "priority": 2.0
                },
                {
                    "source_field": "sourcePort",
                    "dest_field": "src_port",
                    "priority": 1.0
                },
                {
                    "source_field": "endTime",
                    "dest_field": "time",
                    "priority": 5.0
                },
                {
                    "source_field": "endTime",
                    "dest_field": "time",
                    "priority": 4.0
                },
                {
                    "source_field": "startTime",
                    "dest_field": "time",
                    "priority": 3.0
                },
                {
                    "source_field": "timestamp",
                    "dest_field": "time",
                    "priority": 2.0
                },
                {
                    "source_field": "time",
                    "dest_field": "time",
                    "priority": 1.0
                }
            ]

            # cef_to_internal_mappings_df = pd.DataFrame(cef_to_internal_mappings)

            return cef_to_internal_mappings


        def convert_to_unix_timestamp(time_str):
            """
            Convert any date/time format to a UNIX timestamp.
            Args:
                time_str: The date/time string or UNIX timestamp to convert.
            Returns
                return: The UNIX timestamp.
            """
            try:
                # Check if the input is already a UNIX timestamp
                if isinstance(time_str, (int, float)):
                    return time_str

                # Attempt to convert string to a float or int
                try:
                    timestamp = float(time_str)
                    return timestamp
                except ValueError:
                    pass

                # Parse the date/time string into a datetime object
                dt = parser.parse(time_str)
                # Convert the datetime object to a UNIX timestamp
                unix_timestamp = int(dt.timestamp())
                return unix_timestamp
            except Exception as e:
                print(f"Error: {e}")
                return None


        def get_static_cluster_config():
            '''
            Get static cluster config for clustering model
            '''
            global CLUSTER_CONFIG_EXISTS_IN_REQUEST

            CLUSTER_CONFIG_EXISTS_IN_REQUEST = True
            cluster_config = {
                "set_space": 256,
                "max_cluster_size": 64,
                "eps_ds": 45,
                "lambd": 0.05,
                "beta": 0.4,
                "mu": 7,
                "eps_ds_DBSCAN": 25,
                "min_samples_ds": 7,
                "secondary_clustering": False,
                "contrastive_loss": "js",
                "triplet_margin": 0.1,
                "max_time": None,
                "time_feat_dim": 8,
                "channel_embedding_dim": 128,
                "direct_edge_weight": 1000,
                "technique": 1000000,
                "tactic": 100,
                "stage": 1,
                "count": 10,
                "priority": 10000,
                "port": 1,
                "url": 10,
                "user_agent": 1000000,
                "cert": 10000,
                "time": 1,
                "user_feedback": 100,
                "learning_rate": 3e-05,
                "num_epochs": 30,
                "batch_size": 500,
                "embedding_type": "avg",
                "dropout": 0.05,
                "patch_size": 1,
                "max_input_sequence_length": 64,
                "num_neighbors": 30,
                "seed": 1,
                "aggalerts_flag": False
            }

            json.dump(cluster_config, open(TEMP_CLUSTER_CONFIG_INPUT_FILENAME, "w"))


        def delete_error_log_s3(bucket):
            '''
            Delete contents of output folder in root of S3 bucket
            '''
            global S3_CLIENT

            func = "delete_error_log_s3"

            error_log_key = "output/error_log.txt"

            # Delete the objects
            response = S3_CLIENT.delete_object(
                Bucket=bucket,
                Key=error_log_key
            )

            print(f"{func}: Deleted objects: {response}")


        def upload_error_to_s3(bucket, error_message):
            '''
            Upload error message to S3
            Args:
                error_message: The error message to upload
                unique_id: Unique ID for identifying the error log
            '''
            global S3_CLIENT
            error_log_key = "output/error_log.txt"
            try:
                S3_CLIENT.put_object(Bucket=bucket, Key=error_log_key, Body=error_message)
                print(f"Uploaded error log to s3://{bucket}/{error_log_key}")
            except Exception as e:
                print(f"Failed to upload error log to S3. Error: {str(e)}")


        def clear_temp_dir():
            rm_list = glob.glob("/tmp/**", recursive=True)
            for f in rm_list:
                if os.path.isfile(f):
                    os.remove(f)

      Handler: index.lambda_handler
      Runtime: python3.11
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      ReservedConcurrentExecutions: 1
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Layers:
        - !FindInMap
          - RegionMap
          - !Ref AWS::Region
          - lambdaLayer
      Environment:
        Variables:
          batch_transform_job_instance_type: !Ref EnrichTechBatchInstanceType
          batch_transform_job_max_retries: "0"
          batch_transform_job_timeout: !Ref EnrichTechBatchTimeout
          batch_transform_job_max_payload: "10"
          batch_transform_job_concurrent_transform: "1"
          technique_model_name: !Ref TechniqueModelName
          technique_lookup_object: !Ref TechniqueLookupObject
          chunk_size: !Ref ChunkSize
          encode_other_attrs: "true"
      Events:
        S3ObjectCreated:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: input/
                  - Name: suffix
                    Value: .json
  enrichWithTechniqueLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${enrichWithTechnique}
  enrichWithTechniqueLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref enrichWithTechnique
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  processEnrichedWithTechnique:
    Type: AWS::Serverless::Function
    Properties:
      FunctionName: !Ref ProcessTechFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: processEnrichedWithTechnique
      InlineCode: |
        '''
        Process response from technique classification model and enrich alerts with technique. Create input for the Temporal clustering model.

        Input: Response from Technique classification model
        '''

        import os
        import urllib
        import json
        import glob
        import boto3
        from botocore.exceptions import ClientError
        import pandas as pd

        TECHNIQUE_MODEL_NAME = os.getenv("technique_model_name")

        BATCH_TRANSFORM_JOB_INSTANCE_TYPE = os.getenv("batch_transform_job_instance_type")

        BATCH_TRANSFORM_JOB_MAX_RETRIES = int(os.getenv("batch_transform_job_max_retries"))

        BATCH_TRANSFORM_JOB_TIMEOUT = int(os.getenv("batch_transform_job_timeout"))

        BATCH_TRANSFORM_JOB_MAX_PAYLOAD = int(os.getenv("batch_transform_job_max_payload"))

        BATCH_TRANSFORM_JOB_CONCURRENT_TRANSFORM = int(os.getenv("batch_transform_job_concurrent_transform"))

        missing_variables = []

        if TECHNIQUE_MODEL_NAME is None:
            missing_variables.append("technique_model_name")
        if BATCH_TRANSFORM_JOB_INSTANCE_TYPE is None:
            missing_variables.append("batch_transform_job_instance_type")
        if BATCH_TRANSFORM_JOB_MAX_RETRIES is None:
            missing_variables.append("batch_transform_job_max_retries")
        if BATCH_TRANSFORM_JOB_TIMEOUT is None:
            missing_variables.append("batch_transform_job_timeout")
        if BATCH_TRANSFORM_JOB_MAX_PAYLOAD is None:
            missing_variables.append("batch_transform_job_max_payload")
        if BATCH_TRANSFORM_JOB_CONCURRENT_TRANSFORM is None:
            missing_variables.append("batch_transform_job_concurrent_transform")

        if missing_variables:
            raise ValueError(f"Please enter environment variable(s): {missing_variables}")

        S3_CLIENT = boto3.client("s3")
        SAGEMAKER = boto3.client(service_name='sagemaker')

        TEMP_INPUT_DIR = "/tmp/input/"
        os.makedirs(TEMP_INPUT_DIR, exist_ok=True)

        SCRATCH_DIR = "scratch"

        TEMP_INPUT_FILENAME = "/tmp/classification_response.json"
        TEMP_ALERTS_INPUT_FILENAME = f"{TEMP_INPUT_DIR}input.json"
        TEMP_QUEUE_FILENAME = f"{TEMP_INPUT_DIR}queue.json"
        TEMP_CLUSTER_CONFIG_INPUT_FILENAME = f"{TEMP_INPUT_DIR}cluster_config.json"
        TEMP_NODE_FEATURE_INPUT_FILENAME = f"{TEMP_INPUT_DIR}node_feature.json"
        TEMP_USER_FEEDBACK_WEIGHTS = f"{TEMP_INPUT_DIR}user_feedback_weights.json"

        CLUSTER_CONFIG_EXISTS_IN_REQUEST = False
        NODE_FEATURE_EXISTS_IN_REQUEST = False


        def lambda_handler(event, context):
            '''
            Entrypoint from the trigger setup from lambda
            Args:
                event: Event triggered
                context: Context of the lambda function
            '''
            global S3_CLIENT
            global TEMP_INPUT_FILENAME
            global SCRATCH_DIR
            global CLUSTER_CONFIG_EXISTS_IN_REQUEST
            global NODE_FEATURE_EXISTS_IN_REQUEST

            func = "lambda_handler"

            CLUSTER_CONFIG_EXISTS_IN_REQUEST = False
            NODE_FEATURE_EXISTS_IN_REQUEST = False

            bucket = event["Records"][0]["s3"]["bucket"]["name"]
            try:
                input_filename = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"], encoding="utf-8")

                clear_temp_dir()

                print(f"{func}: Download file to process from S3 bucket: {bucket}/{input_filename}")

                try:
                    S3_CLIENT.download_file(bucket, input_filename, TEMP_INPUT_FILENAME)
                    print(f"{func}: File to process download completed")
                except Exception as e:
                    print(f"{func}: Failed to download file to process")
                    raise e

                unique_id = input_filename.split("/")[-2]

                print(f"{func}: Download intermediate files from S3")
                try:
                    download_intermediate_input_files(bucket, unique_id)

                except Exception as e:
                    print(f"{func}: Failed to download files to process")
                    raise e

                print(f"{func}: Processing chunk file and enriching with techniques. unique ID: {unique_id}")
                path_to_enriched_alerts_with_techniques = f"{SCRATCH_DIR}/output/classification/{unique_id}/input.json"
                process_classification_output(bucket, unique_id, path_to_enriched_alerts_with_techniques)

                print(f"{func}: Enriched with techniques: {unique_id}")
            except Exception as ex:
                print(f"{func}: Exception occurred while running lambda function. Uploading error file to S3.")
                error_message = f"Error occurred in lambda function: {context.function_name}. More details can be found in CloudWatch Logs for this lambda function. The exception message is: {ex}"
                upload_error_to_s3(bucket, error_message)
                raise ex


        def setup_batch_transform_job_for_enrich_alerts_with_techniques(bucket, unique_id):
            '''
            Configure batch transform job for Technique classification
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk of input
                input_filename: S3 input filename to batch tranform job
            Returns:
                batch_transform_job: Configuration to create batch transform job
            '''
            global SCRATCH_DIR
            global BATCH_TRANSFORM_JOB_INSTANCE_TYPE
            global BATCH_TRANSFORM_JOB_TIMEOUT
            global BATCH_TRANSFORM_JOB_MAX_RETRIES
            global TECHNIQUE_MODEL_NAME
            global BATCH_TRANSFORM_JOB_MAX_PAYLOAD
            global BATCH_TRANSFORM_JOB_CONCURRENT_TRANSFORM

            transform_job_name = f'transform-job-tech-{unique_id}'

            transform_input = {
                "DataSource": {
                    "S3DataSource": {
                        "S3DataType": "S3Prefix",
                        "S3Uri": f"s3://{bucket}/{SCRATCH_DIR}/intermediate/{unique_id}/input_classification.json"
                    }
                },
                "ContentType": "text/plain",
                "SplitType": "Line"
            }
            transform_output = {
                "S3OutputPath": f"s3://{bucket}/{SCRATCH_DIR}/response/classification_out/{unique_id}/",
                "KmsKeyId": "",
                "AssembleWith": "Line"
            }
            transform_resources = {
                "InstanceType": BATCH_TRANSFORM_JOB_INSTANCE_TYPE,
                "InstanceCount": 1
            }
            model_client_config = {
                'InvocationsTimeoutInSeconds': BATCH_TRANSFORM_JOB_TIMEOUT,
                'InvocationsMaxRetries': BATCH_TRANSFORM_JOB_MAX_RETRIES
            }

            batch_transform_job = {
                'TransformJobName': transform_job_name,
                'ModelName': TECHNIQUE_MODEL_NAME,
                'TransformInput': transform_input,
                'TransformOutput': transform_output,
                'MaxPayloadInMB': BATCH_TRANSFORM_JOB_MAX_PAYLOAD,
                'MaxConcurrentTransforms': BATCH_TRANSFORM_JOB_CONCURRENT_TRANSFORM,
                "TransformResources": transform_resources,
                "ModelClientConfig": model_client_config
            }
            return batch_transform_job


        def process_classification_output(bucket, unique_id, path_to_enriched_alerts_with_techniques):
            '''
            Enrich alerts with technique. Start next technique classification batch transform job from queue.
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk of input
                path_to_enriched_alerts_with_techniques: S3 path to save enriched alerts
            '''
            global TEMP_INPUT_FILENAME
            global TEMP_ALERTS_INPUT_FILENAME
            global TEMP_QUEUE_FILENAME
            global CLUSTER_CONFIG_EXISTS_IN_REQUEST
            global TEMP_CLUSTER_CONFIG_INPUT_FILENAME
            global NODE_FEATURE_EXISTS_IN_REQUEST
            global TEMP_NODE_FEATURE_INPUT_FILENAME
            global TEMP_USER_FEEDBACK_WEIGHTS
            global S3_CLIENT

            func = "process_classification_output"

            print(f"{func}: Load classification response file")
            classification_response_lines = open(TEMP_INPUT_FILENAME, "r").readlines()

            print(f"{func}: Load chunk input file")
            chunk_input_json = json.load(open(TEMP_ALERTS_INPUT_FILENAME, "r"))

            print(f"{func}: Gather alerts and classified techniques")

            classification_response_list = []
            for response_lines in classification_response_lines:
                response = json.loads(response_lines)
                classification_response_dict = {"alerts": response["raw_alert"], "techniques": json.dumps(response["labels"]["techniques"])}
                classification_response_list.append(classification_response_dict)

            classification_response_df = pd.DataFrame(classification_response_list, columns=["alerts", "techniques"])

            # Enrich input with recognized techniques from response from technique detection model
            if len(classification_response_df) > 0:
                print(f"{func}: Enrich alerts with techniques")

                for chunk_input_alert in chunk_input_json:

                    if len(chunk_input_alert["tech"]) > 0:
                        continue

                    alert_text = chunk_input_alert["name"]
                    alert_text_in_classification_response_df = classification_response_df["alerts"].eq(alert_text).any()

                    if alert_text_in_classification_response_df:
                        tech = classification_response_df[classification_response_df["alerts"] == alert_text].iloc[0]["techniques"]
                        tech = json.loads(tech)
                        chunk_input_alert["tech"] = tech

            temp_enriched_alerts_with_techniques_output_filename = "/tmp/input.json"

            user_feedback_weights_json = json.load(open(TEMP_USER_FEEDBACK_WEIGHTS, "r"))
            edge_weights = user_feedback_weights_json["event"]

            # Create request for temporal clustering model
            with open(temp_enriched_alerts_with_techniques_output_filename, "w") as of:
                req = {"request_id": "1", "input": chunk_input_json, "user_feedback_weights": edge_weights}

                if CLUSTER_CONFIG_EXISTS_IN_REQUEST:
                    req["cluster_config"] = json.load(open(TEMP_CLUSTER_CONFIG_INPUT_FILENAME, "r"))

                if NODE_FEATURE_EXISTS_IN_REQUEST:
                    req["node_feature"] = json.load(open(TEMP_NODE_FEATURE_INPUT_FILENAME, "r"))
                json.dump(req, of)

            try:
                print(f"{func}: Save enriched alerts with techniques as input to cluster detection: {bucket}/{path_to_enriched_alerts_with_techniques}")
                S3_CLIENT.upload_file(temp_enriched_alerts_with_techniques_output_filename, bucket, path_to_enriched_alerts_with_techniques)
            except Exception as e:
                print(f"Failed to save enriched alerts with technique to S3: {bucket}/{path_to_enriched_alerts_with_techniques}")
                raise e

            update_lookup_table(bucket, unique_id, classification_response_df)

            check_and_create_next_batch_transform_job_for_enrich_alerts_with_techniques(bucket)


        def check_and_create_next_batch_transform_job_for_enrich_alerts_with_techniques(bucket):
            '''
            Check and create technique classification batch transform job
            Args:
                bucket: Bucket name
            '''
            global TEMP_QUEUE_FILENAME
            global SCRATCH_DIR
            global SAGEMAKER
            global SCRATCH_DIR
            global S3_CLIENT

            func = "check_and_create_next_batch_transform_job_for_enrich_alerts_with_techniques"

            current_queue_json = json.load(open(TEMP_QUEUE_FILENAME, "r"))

            next_input_dir = "/tmp/next/"
            os.makedirs(next_input_dir, exist_ok=True)
            temp_next_queue_json_filename = f"{next_input_dir}queue.json"

            while current_queue_json["next"] is not None:
                next_unique_id = current_queue_json["next"]

                print(f"{func}: Processing for unique_id: {next_unique_id}")

                path_to_next_enriched_alerts_with_techniques = f"{SCRATCH_DIR}/output/classification/{next_unique_id}/input.json"

                try:
                    print(f"{func}: Checking if next batch transform job output file exists on S3 path {bucket}/{path_to_next_enriched_alerts_with_techniques}")
                    S3_CLIENT.head_object(Bucket=bucket, Key=path_to_next_enriched_alerts_with_techniques)
                    print(f"{func}: Next batch tranform job output file exists on S3. Skip creating batch transform job for: {next_unique_id}")

                except ClientError as e:
                    if e.response['Error']['Code'] != '404':
                        raise e

                    try:
                        print(f"{func}: Next batch tranform job output file does not exist on S3. Create batch tranform job for: {next_unique_id}")
                        batch_transform_job = setup_batch_transform_job_for_enrich_alerts_with_techniques(bucket, next_unique_id)
                        SAGEMAKER.create_transform_job(**batch_transform_job)
                        break
                    except Exception as ex:
                        print(f"{func}: Failed to create batch trasnform job. Unique id: {next_unique_id}")
                        raise ex

                try:
                    print(f"{func}: Download queue.json for next transform job after {next_unique_id}")
                    path_to_next_queue = f"{SCRATCH_DIR}/intermediate/{next_unique_id}/queue.json"
                    S3_CLIENT.download_file(bucket, path_to_next_queue, temp_next_queue_json_filename)
                except Exception as e:
                    print(f"{func}: Failed to download queue for next chunk: {bucket}/{path_to_next_queue}")
                    raise e

                current_queue_json = json.load(open(temp_next_queue_json_filename, "r"))


        def download_intermediate_input_files(bucket, unique_id):
            '''
            Download chunk input file, queue, config if exists, and node features if exists
            Args:
                bucket: Bucket name
                path_to_intermediate_input_before_technique_classification: S3 path to chunk input file before technique enrichment
                path_to_intermediate_queue: S3 path to queue for chunk
                path_to_config_from_input: S3 path to config
                path_to_node_feature_from_input: S3 path to node_feature
            '''
            global TEMP_ALERTS_INPUT_FILENAME
            global TEMP_QUEUE_FILENAME
            global CLUSTER_CONFIG_EXISTS_IN_REQUEST
            global TEMP_CLUSTER_CONFIG_INPUT_FILENAME
            global TEMP_NODE_FEATURE_INPUT_FILENAME
            global NODE_FEATURE_EXISTS_IN_REQUEST
            global TEMP_USER_FEEDBACK_WEIGHTS

            func = "download_intermediate_input_files"

            path_to_intermediate_input_before_technique_classification = f"{SCRATCH_DIR}/intermediate/{unique_id}/input.json"
            path_to_intermediate_queue = f"{SCRATCH_DIR}/intermediate/{unique_id}/queue.json"
            path_to_cluster_config_from_input = f"{SCRATCH_DIR}/intermediate/{unique_id}/cluster_config.json"
            path_to_node_feature_from_input = f"{SCRATCH_DIR}/intermediate/{unique_id}/node_feature.json"
            path_to_global_attribute_weights = f"{SCRATCH_DIR}/attribute_weights.json"

            S3_CLIENT.download_file(bucket, path_to_intermediate_input_before_technique_classification, TEMP_ALERTS_INPUT_FILENAME)
            S3_CLIENT.download_file(bucket, path_to_intermediate_queue, TEMP_QUEUE_FILENAME)
            S3_CLIENT.download_file(bucket, path_to_global_attribute_weights, TEMP_USER_FEEDBACK_WEIGHTS)

            try:
                print(f"{func}: Checking if cluster config file exists on S3 path {bucket}/{path_to_cluster_config_from_input}")
                S3_CLIENT.head_object(Bucket=bucket, Key=path_to_cluster_config_from_input)
                CLUSTER_CONFIG_EXISTS_IN_REQUEST = True

            except ClientError as e:
                if e.response['Error']['Code'] == '404':
                    print(f"{func}: Cluster config file does not exist")
                else:
                    raise e
            else:
                try:
                    S3_CLIENT.download_file(bucket, path_to_cluster_config_from_input, TEMP_CLUSTER_CONFIG_INPUT_FILENAME)
                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download file: {bucket}/{path_to_cluster_config_from_input}")
                    raise e

            try:
                print(f"{func}: Checking if node feaures file exists on S3 path {bucket}/{path_to_node_feature_from_input}")
                S3_CLIENT.head_object(Bucket=bucket, Key=path_to_node_feature_from_input)
                NODE_FEATURE_EXISTS_IN_REQUEST = True

            except ClientError as e:
                if e.response['Error']['Code'] == '404':
                    print(f"{func}: Node feaures file does not exist")
                else:
                    raise e
            else:
                try:
                    S3_CLIENT.download_file(bucket, path_to_node_feature_from_input, TEMP_NODE_FEATURE_INPUT_FILENAME)
                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download file: {bucket}/{path_to_node_feature_from_input}")
                    raise e


        def update_lookup_table(bucket, unique_id, classification_response_df):
            '''
            Create input to update technique classification lookup table. Trigger lookup table update.
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk of input
                classification_response_df: dataframe to add to lookup table
            '''
            global SCRATCH_DIR
            global S3_CLIENT

            func = "update_lookup_table"

            classification_response_s3_key = f"{SCRATCH_DIR}/queue/{unique_id}/classification_response.csv"

            temp_message_payload_dir = "/tmp/message_payload/"
            os.makedirs(temp_message_payload_dir, exist_ok=True)
            temp_message_payload_filename = f"{temp_message_payload_dir}classification_response.csv"

            if len(classification_response_df) < 1:
                return
            classification_response_df.to_csv(temp_message_payload_filename, index=False)

            try:
                S3_CLIENT.upload_file(temp_message_payload_filename, bucket, classification_response_s3_key)
            except Exception as e:
                print(f"{func}: Failed to upload classification response message payload file to: {bucket}/{classification_response_s3_key}")
                raise e

            message_body = {
                "s3_bucket": bucket,
                "s3_key": classification_response_s3_key
            }

            print(json.dumps({"event": "update lookup from message", "message": message_body}))


        def upload_error_to_s3(bucket, error_message):
            '''
            Upload error message to S3
            Args:
                error_message: The error message to upload
                unique_id: Unique ID for identifying the error log
            '''
            global S3_CLIENT
            error_log_key = "output/error_log.txt"
            try:
                S3_CLIENT.put_object(Bucket=bucket, Key=error_log_key, Body=error_message)
                print(f"Uploaded error log to s3://{bucket}/{error_log_key}")
            except Exception as e:
                print(f"Failed to upload error log to S3. Error: {str(e)}")


        def clear_temp_dir():
            rm_list = glob.glob("/tmp/**", recursive=True)
            for f in rm_list:
                if os.path.isfile(f):
                    os.remove(f)

      Handler: index.lambda_handler
      Runtime: python3.11
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Layers:
        - !FindInMap
          - RegionMap
          - !Ref AWS::Region
          - lambdaLayer
      Environment:
        Variables:
          batch_transform_job_instance_type: !Ref EnrichTechBatchInstanceType
          batch_transform_job_max_retries: "0"
          batch_transform_job_timeout: !Ref EnrichTechBatchTimeout
          technique_model_name: !Ref TechniqueModelName
          batch_transform_job_max_payload: "10"
          batch_transform_job_concurrent_transform: "1"
      Events:
        S3ObjectCreated:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: scratch/response/classification_out/
                  - Name: suffix
                    Value: .out
  processEnrichedWithTechniqueLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${processEnrichedWithTechnique}
  processEnrichedWithTechniqueLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref processEnrichedWithTechnique
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  createCluster:
    Type: AWS::Serverless::Function
    Properties:
      FunctionName: !Ref CreateClusterFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: createCluster
      InlineCode: |
        '''
        Create batch transform job for Temporal Clustering model

        Input: input alerts enriched with techniques
        '''

        import json
        import os
        import urllib
        import zipfile
        import glob

        import boto3
        from botocore.exceptions import ClientError

        CLUSTER_MODEL_NAME = os.getenv("cluster_model_name")

        BATCH_TRANSFORM_JOB_INSTANCE_TYPE = os.getenv("batch_transform_job_instance_type")

        BATCH_TRANSFORM_JOB_MAX_RETRIES = int(os.getenv("batch_transform_job_max_retries"))

        BATCH_TRANSFORM_JOB_TIMEOUT = int(os.getenv("batch_transform_job_timeout"))

        missing_variables = []
        if CLUSTER_MODEL_NAME is None:
            missing_variables.append("cluster_model_name")
        if BATCH_TRANSFORM_JOB_INSTANCE_TYPE is None:
            missing_variables.append("batch_transform_job_instance_type")
        if BATCH_TRANSFORM_JOB_MAX_RETRIES is None:
            missing_variables.append("batch_transform_job_max_retries")
        if BATCH_TRANSFORM_JOB_TIMEOUT is None:
            missing_variables.append("batch_transform_job_timeout")

        if missing_variables:
            raise ValueError(f"Please enter environment variable(s): {missing_variables}")

        S3_CLIENT = boto3.client("s3")
        SAGEMAKER = boto3.client(service_name='sagemaker')

        SCRATCH_DIR = "scratch"

        TEMP_INPUT_FILENAME = "/tmp/input.json"
        TEMP_INTERMEDIATE_QUEUE_FILENAME = "/tmp/queue.json"


        def lambda_handler(event, context):
            '''
            Entrypoint from the trigger setup from lambda
            Args:
                event: Event triggered
                context: Context of the lambda function
            '''
            global S3_CLIENT
            global TEMP_INPUT_FILENAME
            global TEMP_INTERMEDIATE_QUEUE_FILENAME
            func = "lambda_handler"

            bucket = event["Records"][0]["s3"]["bucket"]["name"]
            try:
                input_filename = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"], encoding="utf-8")

                clear_temp_dir()

                print(f"{func}: Download input file from S3 bucket: {bucket}/{input_filename}")

                try:
                    S3_CLIENT.download_file(bucket, input_filename, TEMP_INPUT_FILENAME)
                    print(f"{func}: Input file download completed")
                except Exception as e:
                    print(f"{func}: Failed to download input file")
                    raise e

                unique_id = input_filename.split("/")[-2]

                path_to_intermediate_queue = f"{SCRATCH_DIR}/intermediate/{unique_id}/queue.json"
                try:
                    S3_CLIENT.download_file(bucket, path_to_intermediate_queue, TEMP_INTERMEDIATE_QUEUE_FILENAME)
                    print(f"{func}: Queue file download completed")
                except Exception as e:
                    print(f"{func}: Failed to download file from S3: {bucket}/{path_to_intermediate_queue}")
                    raise e

                if check_if_skip_creating_tranform_job(bucket, unique_id):
                    return

                print(f"{func}: Start batch transform job for aggregate and cluster and alerts")
                batch_transform_job_creation_response = aggregate_and_cluster_alerts(bucket, unique_id)
                print(f"{func}: Created batch transform job for aggregate and cluster alerts: {batch_transform_job_creation_response['TransformJobArn']}")
            except Exception as ex:
                print(f"{func}: Exception occurred while running lambda function. Uploading error file to S3.")
                error_message = f"Error occurred in lambda function: {context.function_name}. More details can be found in CloudWatch Logs for this lambda function. The exception message is: {ex}"
                upload_error_to_s3(bucket, error_message)
                raise ex


        def setup_batch_transform_job_for_aggregate_and_cluster_alerts(bucket, unique_id, input_filename):
            '''
            Configure batch transform job for Temporal clustering
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk of input
                input_filename: S3 input filename to batch tranform job
            Returns:
                batch_transform_job: Configuration to create batch transform job
            '''
            transform_job_name = f'transform-job-cluster-{unique_id}'

            transform_input = {
                "DataSource": {
                    "S3DataSource": {
                        "S3DataType": "S3Prefix",
                        "S3Uri": f"s3://{bucket}/{input_filename}"
                    }
                },
                "ContentType": "application/zip",
                "SplitType": "None"
            }
            transform_output = {
                "S3OutputPath": f"s3://{bucket}/{SCRATCH_DIR}/response/cluster_out/{unique_id}/",
                "KmsKeyId": ""
            }
            transform_resources = {
                "InstanceType": BATCH_TRANSFORM_JOB_INSTANCE_TYPE,
                "InstanceCount": 1
            }
            model_client_config = {
                'InvocationsTimeoutInSeconds': BATCH_TRANSFORM_JOB_TIMEOUT,
                'InvocationsMaxRetries': BATCH_TRANSFORM_JOB_MAX_RETRIES
            }

            batch_transform_job = {
                'TransformJobName': transform_job_name,
                'ModelName': CLUSTER_MODEL_NAME,
                'TransformInput': transform_input,
                'TransformOutput': transform_output,
                'MaxPayloadInMB': 0,
                'MaxConcurrentTransforms': 1,
                "TransformResources": transform_resources,
                "ModelClientConfig": model_client_config
            }
            return batch_transform_job


        def check_if_skip_creating_tranform_job(bucket, unique_id):
            '''
            Check if batch transform job should be created for the unique id based on queue
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk of the input
            Returns:
                boolean: True, if the transform job creation should be skipped. False, otherwise
            '''

            global TEMP_INPUT_FILENAME
            global SCRATCH_DIR
            global S3_CLIENT
            global TEMP_INTERMEDIATE_QUEUE_FILENAME

            queue_json = json.load(open(TEMP_INTERMEDIATE_QUEUE_FILENAME, "r"))
            print(f"Contents of the queue for creating transform job: {queue_json}")

            if queue_json["previous"] is None:
                print(f"No previous unique id found. Create batch transform job for {unique_id}")
                return False

            previous_unique_id = queue_json["previous"]
            print(f"Previous unique id found: {previous_unique_id}. Check if the cluster detection is completed.")

            try:
                path_to_cluster_output_of_previous = f"{SCRATCH_DIR}/response/cluster_out/{previous_unique_id}/input.zip.out"
                S3_CLIENT.head_object(Bucket=bucket, Key=path_to_cluster_output_of_previous)
                print(f"Will create batch transform job for {unique_id}")
                return False
            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e
                print(f"Clustering did not complete for {previous_unique_id}. Skip create batch transform job for {unique_id}")
                return True


        def aggregate_and_cluster_alerts(bucket, unique_id):
            '''
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk of the input
            Returns:
                response: Response of sagemaker client batch transform job creation
            '''
            global TEMP_INPUT_FILENAME
            global TEMP_INTERMEDIATE_QUEUE_FILENAME
            global SCRATCH_DIR
            global S3_CLIENT
            global SAGEMAKER

            func = "aggregate_and_cluster_alerts"

            temp_previous_cluster_output_zip = "/tmp/previous_output.zip"
            temp_enriched_alerts_with_techniques_output_filename = "input.json"

            queue_json = json.load(open(TEMP_INTERMEDIATE_QUEUE_FILENAME, "r"))

            try:
                if queue_json["previous"] is not None:
                    previous_unique_id = queue_json["previous"]
                    path_to_cluster_output_of_previous = f"{SCRATCH_DIR}/response/cluster_out/{previous_unique_id}/input.zip.out"
                    S3_CLIENT.download_file(bucket, path_to_cluster_output_of_previous, temp_previous_cluster_output_zip)
            except Exception as e:
                print(f"{func}: Unable to download file from S3: {bucket}/{path_to_cluster_output_of_previous}")
                raise e

            try:
                with zipfile.ZipFile(temp_previous_cluster_output_zip, "a") as z:
                    z.write(TEMP_INPUT_FILENAME, arcname=temp_enriched_alerts_with_techniques_output_filename)
            except Exception as e:
                print(f"{func}: Unable to write input file to zip while creating the zip file")
                raise e

            path_to_enriched_alerts_with_techniques = f"{SCRATCH_DIR}/output/classification/{unique_id}/input.zip"

            try:
                print(f"{func}: Save zip input to flow detection: {bucket}/{path_to_enriched_alerts_with_techniques}")
                S3_CLIENT.upload_file(temp_previous_cluster_output_zip, bucket, path_to_enriched_alerts_with_techniques)
            except Exception as e:
                print(f"{func}: Failed to upload zip file to S3: {bucket}/{path_to_enriched_alerts_with_techniques}")
                raise e

            try:
                batch_transform_job = setup_batch_transform_job_for_aggregate_and_cluster_alerts(bucket, unique_id, path_to_enriched_alerts_with_techniques)
                response = SAGEMAKER.create_transform_job(**batch_transform_job)
            except Exception as e:
                print(f"{func}: Failed to create batch transform job. Unique id: {unique_id}")
                raise e

            return response


        def upload_error_to_s3(bucket, error_message):
            '''
            Upload error message to S3
            Args:
                error_message: The error message to upload
                unique_id: Unique ID for identifying the error log
            '''
            global S3_CLIENT
            error_log_key = "output/error_log.txt"
            try:
                S3_CLIENT.put_object(Bucket=bucket, Key=error_log_key, Body=error_message)
                print(f"Uploaded error log to s3://{bucket}/{error_log_key}")
            except Exception as e:
                print(f"Failed to upload error log to S3. Error: {str(e)}")


        def clear_temp_dir():
            rm_list = glob.glob("/tmp/**", recursive=True)
            for f in rm_list:
                if os.path.isfile(f):
                    os.remove(f)

      Handler: index.lambda_handler
      Runtime: python3.11
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Layers:
        - !FindInMap
          - RegionMap
          - !Ref AWS::Region
          - lambdaLayer
      Environment:
        Variables:
          batch_transform_job_instance_type: !Ref ClusterBatchInstanceType
          batch_transform_job_max_retries: "0"
          batch_transform_job_timeout: !Ref ClusterBatchTimeout
          cluster_model_name: !Ref ClusterModelName
      Events:
        S3ObjectCreated:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: scratch/output/classification/
                  - Name: suffix
                    Value: .json
  createClusterLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${createCluster}
  createClusterLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref createCluster
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  processCluster:
    Type: AWS::Serverless::Function
    Properties:
      FunctionName: !Ref ProcessClusterFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: processCluster
      InlineCode: |
        '''
        Create input for next batch if in queue. Extract internal aggregated alerts, cluster json. Create input for flow model.
        Input: response from the clustering model
        '''

        import json
        import os
        import urllib
        import zipfile
        import glob
        import sqlite3
        from contextlib import closing

        import pandas as pd
        import boto3
        from botocore.exceptions import ClientError

        pd.options.mode.chained_assignment = None

        CLUSTER_MODEL_NAME = os.getenv("cluster_model_name")

        BATCH_TRANSFORM_JOB_INSTANCE_TYPE = os.getenv("batch_transform_job_instance_type")

        BATCH_TRANSFORM_JOB_MAX_RETRIES = int(os.getenv("batch_transform_job_max_retries"))

        BATCH_TRANSFORM_JOB_TIMEOUT = int(os.getenv("batch_transform_job_timeout"))

        FLOW_INPUT_WINDOW_SIZE = int(os.getenv("flow_input_window_size"))

        missing_variables = []

        if CLUSTER_MODEL_NAME is None:
            missing_variables.append("cluster_model_name")
        if BATCH_TRANSFORM_JOB_INSTANCE_TYPE is None:
            missing_variables.append("batch_transform_job_instance_type")
        if BATCH_TRANSFORM_JOB_MAX_RETRIES is None:
            missing_variables.append("batch_transform_job_max_retries")
        if BATCH_TRANSFORM_JOB_TIMEOUT is None:
            missing_variables.append("batch_transform_job_timeout")
        if FLOW_INPUT_WINDOW_SIZE is None:
            missing_variables.append("flow_input_window_size")

        if missing_variables:
            raise ValueError(f"Please enter environment variable(s): {missing_variables}")

        S3_CLIENT = boto3.client("s3")
        SAGEMAKER = boto3.client(service_name='sagemaker')

        SCRATCH_DIR = "scratch"

        PATH_TO_SQLITE = f"{SCRATCH_DIR}/cypienta.db"
        TEMP_SQLITE_FILENAME = "/tmp/cypienta.db"

        TEMP_INTERMEDIATE_QUEUE_FILENAME = "/tmp/queue.json"

        TEMP_AGGREGATE_AND_CLUSTER_ALERTS_OUTPUT_FILENAME = "/tmp/aggregate_model_output.zip"

        TEMP_AGGREGATED_ALERTS_JSON_FILENAME = "/tmp/aggregated_alerts_output.json"
        TEMP_CLUSTER_OF_AGGREGATED_ALERTS_JSON_FILENAME = "/tmp/cluster_output.json"

        TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME = "/tmp/input_cluster_id_list.json"

        TEMP_INPUT_TO_SEQUENCE_ALERTS_JSON_FILENAME = "/tmp/input_sequence.json"

        TEMP_CONFIG_INPUT_FILENAME = "/tmp/config.json"
        CONFIG_EXISTS_IN_REQUEST = False

        INTERNAL_ID_TO_USER_ID_OBJECT_KEY = f"{SCRATCH_DIR}/internal_id_to_user_id.json"
        TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE = "/tmp/internal_id_to_user_id.json"

        TICKET_ID_OBJECT_KEY = f"{SCRATCH_DIR}/ticket_id.json"
        TEMP_TICKET_ID_LOOKUP_FILE = "/tmp/ticket_id.json"

        TEMP_INPUT_CLUSTER_TICKET_OUTPUT_FILENAME = "/tmp/input_cluster_ticket_output.json"

        TEMP_GLOBAL_METRICS_FILENAME = "/tmp/global_metrics.json"


        def lambda_handler(event, context):
            '''
            Entrypoint from the trigger setup from lambda
            Args:
                event: Event triggered
                context: Context of the lambda function
            '''
            global S3_CLIENT
            global SCRATCH_DIR
            global TEMP_AGGREGATE_AND_CLUSTER_ALERTS_OUTPUT_FILENAME
            global TEMP_SQLITE_FILENAME
            global PATH_TO_SQLITE
            global CONFIG_EXISTS_IN_REQUEST

            func = "lambda_handler"

            CONFIG_EXISTS_IN_REQUEST = False

            bucket = event["Records"][0]["s3"]["bucket"]["name"]
            try:
                aggregate_and_cluster_alerts_output = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"], encoding="utf-8")
                # lambda_handler: input file: alert-bucket-v5/scratch/response/cluster_out/b34c3c0e-bcad-4646-893b-aa097dea12ca/input.zip.out
                # lambda_handler: input file: alert-bucket-v5/scratch/response/cluster_out/665589b1-7948-477c-aabb-140f44d1f6f9/input.zip.out
                # lambda_handler: input file: alert-bucket-v5/scratch/response/cluster_out/8629ad0e-35d6-4f52-9eaa-3444118036a2/input.zip.out
                # bucket = "alert-bucket-v5"
                # aggregate_and_cluster_alerts_output = "scratch/response/cluster_out/8629ad0e-35d6-4f52-9eaa-3444118036a2/input.zip.out"

                clear_temp_dir()

                try:
                    print(f"{func}: input file: {bucket}/{aggregate_and_cluster_alerts_output}")
                    S3_CLIENT.download_file(bucket, aggregate_and_cluster_alerts_output, TEMP_AGGREGATE_AND_CLUSTER_ALERTS_OUTPUT_FILENAME)
                    print(f"{func}: Downloaded input file")
                except Exception as e:
                    print(f"{func}: Failed to download input file")
                    raise e

                unique_id = aggregate_and_cluster_alerts_output.split("/")[-2]

                print(f"{func}: Unique ID: {unique_id}")

                input_to_sequence_alerts = f"{SCRATCH_DIR}/output/cluster/{unique_id}/input_flow.json"

                print(f"{func}: Download intermediate files from S3 bucket")
                try:
                    download_intermediate_input_files(bucket, unique_id)

                except Exception as e:
                    print(f"{func}: Failed to download files to process")
                    raise e

                fetch_or_create_ticket_id_lookup_file(bucket)

                print(f"{func}: Starting processing cluster output")
                process_output_of_aggregate_and_cluster_alerts(bucket, unique_id, input_to_sequence_alerts)
                print(f"{func}: Completed processing")

                try:
                    print(f"{func}: Upload sqlite to S3")

                    S3_CLIENT.upload_file(TEMP_SQLITE_FILENAME, bucket, PATH_TO_SQLITE)

                    print(f"{func}: Upload completed")
                except Exception as e:
                    print(f"{func}: Failed to upload sqlite from S3: {bucket}/{PATH_TO_SQLITE}")
                    raise e
            except Exception as ex:
                print(f"{func}: Exception occurred while running lambda function. Uploading error file to S3.")
                error_message = f"Error occurred in lambda function: {context.function_name}. More details can be found in CloudWatch Logs for this lambda function. The exception message is: {ex}"
                upload_error_to_s3(bucket, error_message)
                raise ex


        def setup_batch_transform_job_for_aggregate_and_cluster_alerts(bucket, unique_id, input_filename):
            '''
            Configure batch transform job for Temporal clustering model
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk of input
                input_filename: S3 input filename to batch tranform job
            Returns:
                batch_transform_job: Configuration to create batch transform job
            '''
            global BATCH_TRANSFORM_JOB_INSTANCE_TYPE
            global BATCH_TRANSFORM_JOB_TIMEOUT
            global BATCH_TRANSFORM_JOB_MAX_RETRIES
            global CLUSTER_MODEL_NAME
            global SCRATCH_DIR

            transform_job_name = f'transform-job-cluster-{unique_id}'

            transform_input = {
                "DataSource": {
                    "S3DataSource": {
                        "S3DataType": "S3Prefix",
                        "S3Uri": f"s3://{bucket}/{input_filename}"
                    }
                },
                "ContentType": "application/zip",
                "SplitType": "None"
            }
            transform_output = {
                "S3OutputPath": f"s3://{bucket}/{SCRATCH_DIR}/response/cluster_out/{unique_id}/",
                "KmsKeyId": ""
            }
            transform_resources = {
                "InstanceType": BATCH_TRANSFORM_JOB_INSTANCE_TYPE,
                "InstanceCount": 1
            }
            model_client_config = {
                'InvocationsTimeoutInSeconds': BATCH_TRANSFORM_JOB_TIMEOUT,
                'InvocationsMaxRetries': BATCH_TRANSFORM_JOB_MAX_RETRIES
            }

            batch_transform_job = {
                'TransformJobName': transform_job_name,
                'ModelName': CLUSTER_MODEL_NAME,
                'TransformInput': transform_input,
                'TransformOutput': transform_output,
                'MaxPayloadInMB': 0,
                'MaxConcurrentTransforms': 1,
                "TransformResources": transform_resources,
                "ModelClientConfig": model_client_config
            }
            return batch_transform_job


        def process_output_of_aggregate_and_cluster_alerts(bucket, unique_id, input_to_sequence_alerts):
            '''
            Process output of temporal clustering model, extract aggregated alerts, and clusters internally.
            create cluster ticket output for batch, global cluster output
            Create next batch transform job if in queue. else save input file for flow model
            Args:
                bucket: Bucket name
                unique_id: Unique id of current chunk
                input_to_sequence_alerts: S3 path to save input for flow model
            '''

            global S3_CLIENT
            global TEMP_AGGREGATE_AND_CLUSTER_ALERTS_OUTPUT_FILENAME
            global TEMP_CLUSTER_OF_AGGREGATED_ALERTS_JSON_FILENAME
            global TEMP_AGGREGATED_ALERTS_JSON_FILENAME
            global TEMP_INPUT_TO_SEQUENCE_ALERTS_JSON_FILENAME
            global TEMP_CONFIG_INPUT_FILENAME
            global TEMP_INTERMEDIATE_QUEUE_FILENAME
            global CONFIG_EXISTS_IN_REQUEST

            func = "process_output_of_aggregate_and_cluster_alerts"

            print(f"{func}: Extract cluster and alert output from zip")

            with zipfile.ZipFile(TEMP_AGGREGATE_AND_CLUSTER_ALERTS_OUTPUT_FILENAME, "r") as z:
                with z.open("cluster_output.json", "r") as f:
                    clusters_of_aggregated_alerts = json.load(f)

                with z.open("alert_output.json", "r") as f:
                    aggregated_alerts = json.load(f)

            with open(TEMP_INTERMEDIATE_QUEUE_FILENAME, "r") as f:
                queue_json = json.load(f)

            print(f"{func}: Sort output")

            aggregated_alerts.sort(key=sort_by_alert_id)
            clusters_of_aggregated_alerts.sort(key=sort_by_cluster_id)

            print(f"{func}: Save raw output")

            json.dump(clusters_of_aggregated_alerts, open(TEMP_CLUSTER_OF_AGGREGATED_ALERTS_JSON_FILENAME, "w"))
            json.dump(aggregated_alerts, open(TEMP_AGGREGATED_ALERTS_JSON_FILENAME, "w"))

            print(f"{func}: Update cluster id in input, get current batch cluster id list")

            batch_cluster_id_list = get_batch_cluster_ids_update_input_cluster_ids(bucket, unique_id)

            # save output from cluster model to S3
            print(f"{func}: save output from cluster model to S3")
            save_aggreate_cluster_json_output(bucket, unique_id)

            # update event sqlite table with current batch output
            print(f"{func}: update event table with current batch output")
            update_event_sql(bucket, unique_id)

            # update cluster output sqlite table with operations and current batch output
            print(f"{func}: update cluster output table with current batch output. get partial metrics dict")
            metrics = update_cluster_output_sql(batch_cluster_id_list)

            # update cluster ticket output sqlite for current batch output
            print(f"{func}: update cluster ticket output table with current batch output. create per per cluster metrics")
            update_cluster_ticket_output_sql(bucket, batch_cluster_id_list, metrics)

            if queue_json["next"] is None:
                print(f"{func}: Start flow detection as this is last chunk of input")

                print(f"{func}: Get global metrics for all clusters currently in sql")
                get_global_metrics_from_cluster_ticket_output_sql(bucket, unique_id)

                print(f"{func}: create cluster ticket output for input")
                create_input_cluster_ticket_output_sql(bucket, unique_id)

                print(f"{func}: Save input to sequence alerts to: {input_to_sequence_alerts}")

                print(f"{func}: Get last {FLOW_INPUT_WINDOW_SIZE} clusters as input to flow model")
                flow_input = get_last_n_clusters()
                flow_input = [dict(row) for row in flow_input]

                print(f"{func}: Process data for flow model input")
                for row in flow_input:
                    row["cluster_srcips"] = json.loads(row["cluster_srcips"])
                    row["cluster_dstips"] = json.loads(row["cluster_dstips"])
                    row["cluster_ips"] = list(set(row["cluster_srcips"] + row["cluster_dstips"]))
                    row["cluster_techs"] = json.loads(row["cluster_techs"])
                    row["cluster_tacs"] = json.loads(row["cluster_tacs"])
                    row["cluster_tacs"] = [int(t) for t in row["cluster_tacs"]]
                    row["cluster_stages"] = json.loads(row["cluster_stages"])
                    row["cluster_stages"] = [int(t) for t in row["cluster_stages"]]

                print(f"{func}: Save input for flow model")
                with open(TEMP_INPUT_TO_SEQUENCE_ALERTS_JSON_FILENAME, "w") as of:
                    req = {"request_id": "1", "input": flow_input}
                    if CONFIG_EXISTS_IN_REQUEST:
                        print(f"{func}: Flow model config added to request")
                        req["config"] = json.load(open(TEMP_CONFIG_INPUT_FILENAME, "r"))
                    json.dump(req, of)

                try:
                    S3_CLIENT.upload_file(TEMP_INPUT_TO_SEQUENCE_ALERTS_JSON_FILENAME, bucket, input_to_sequence_alerts)
                    print(f"{func}: Input to sequence alerts saved")
                except Exception as e:
                    print(f"{func}: Failed to upload file to S3: {bucket}/{input_to_sequence_alerts}")
                    raise e

            else:
                print(f"{func}: Start batch transform job for next aggregate and cluster and alerts")
                batch_transform_job_creation_response = aggregate_and_cluster_alerts(bucket, queue_json["next"])
                if batch_transform_job_creation_response is not None:
                    print(f"{func}: Created batch transform job for next aggregate and cluster alerts: {batch_transform_job_creation_response['TransformJobArn']}")
                else:
                    print(f"{func}: Skip creating next cluster batch transform job")


        def aggregate_and_cluster_alerts(bucket, unique_id):
            '''
            Create input and start next batch transform job from the queue
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk used for next batch transform job
            Returns:
                response: Response from sagemaker client create transform job
            '''

            global SCRATCH_DIR
            global S3_CLIENT
            global TEMP_AGGREGATE_AND_CLUSTER_ALERTS_OUTPUT_FILENAME
            global SAGEMAKER

            func = "aggregate_and_cluster_alerts"

            next_input_filename = f"{SCRATCH_DIR}/output/classification/{unique_id}/input.json"

            temp_input_dir = "/tmp/input/"
            os.makedirs(temp_input_dir, exist_ok=True)

            temp_next_input_filename = f"{temp_input_dir}input.json"

            print(f"{func}: Download next input file: {bucket}/{next_input_filename}")

            try:
                S3_CLIENT.head_object(Bucket=bucket, Key=next_input_filename)
            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e
                print(f"Technique classificaiton did not complete for {unique_id}. Skip create batch transform job for {unique_id}")
                return None
            else:
                try:
                    S3_CLIENT.download_file(bucket, next_input_filename, temp_next_input_filename)
                    print(f"{func}: Next input file download completed")
                except Exception as e:
                    print(f"{func}: Failed to download file from S3: {bucket}/{next_input_filename}")
                    raise e

            print(f"{func}: Create next zip for clustering")

            temp_enriched_alerts_with_techniques_output_filename = "input.json"

            with zipfile.ZipFile(TEMP_AGGREGATE_AND_CLUSTER_ALERTS_OUTPUT_FILENAME, "a") as z:
                z.write(temp_next_input_filename, arcname=temp_enriched_alerts_with_techniques_output_filename)
                print(f"{func}: Next zip input contents: {z.namelist()}")

            path_to_enriched_alerts_with_techniques = f"{SCRATCH_DIR}/output/classification/{unique_id}/input.zip"

            try:
                print(f"{func}: Save next zip input to clustering: {bucket}/{path_to_enriched_alerts_with_techniques}")
                S3_CLIENT.upload_file(TEMP_AGGREGATE_AND_CLUSTER_ALERTS_OUTPUT_FILENAME, bucket, path_to_enriched_alerts_with_techniques)
            except Exception as e:
                print(f"{func}: Failed to upload file to S3: {bucket}/{path_to_enriched_alerts_with_techniques}")
                raise e

            try:
                batch_transform_job = setup_batch_transform_job_for_aggregate_and_cluster_alerts(bucket, unique_id, path_to_enriched_alerts_with_techniques)
                response = SAGEMAKER.create_transform_job(**batch_transform_job)
            except Exception as e:
                print(f"{func}: Failed to create batch transform job for temporal clustering. Unique id: {unique_id}")
                raise e

            return response


        def get_batch_cluster_ids_update_input_cluster_ids(bucket, unique_id):
            '''
            Get cluster ids in current batch and update cluster ids in input
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk
            Returns:
                list of cluster ids in current batch
            '''
            global S3_CLIENT
            global SCRATCH_DIR
            global TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME
            global TEMP_INTERMEDIATE_QUEUE_FILENAME
            global TEMP_CLUSTER_OF_AGGREGATED_ALERTS_JSON_FILENAME

            func = "get_batch_cluster_ids_update_input_cluster_ids"

            queue_json = json.load(open(TEMP_INTERMEDIATE_QUEUE_FILENAME, "r"))

            # if its first batch of current input. initialize cluster id list. else download from previous batch
            if queue_json["first"] == unique_id:
                print(f"{func}: This is first batch of current input. Initialize cluster id list for input")
                json.dump([], open(TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME, "w"))
            else:
                path_to_prev_cluster_id_list = f"{SCRATCH_DIR}/intermediate/{queue_json['previous']}/input_cluster_id_list.json"
                try:
                    S3_CLIENT.download_file(bucket, path_to_prev_cluster_id_list, TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME)
                    print(f"{func}: Download file from S3: {bucket}/{path_to_prev_cluster_id_list}")
                except Exception as e:
                    print(f"{func}: Failed to donwnload file from S3: {bucket}/{path_to_prev_cluster_id_list}")
                    raise e

            print(f"{func}: Update cluster id list for input")
            cluster_id_list_json = json.load(open(TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME, "r"))
            cluster_id_list_set = set(cluster_id_list_json)

            batch_cluster_df = pd.read_json(TEMP_CLUSTER_OF_AGGREGATED_ALERTS_JSON_FILENAME)
            batch_cluster_id_list = batch_cluster_df["cluster_id"].to_list()
            batch_cluster_id_list.sort()

            cluster_id_list_list = list(cluster_id_list_set.union(batch_cluster_id_list))

            json.dump(cluster_id_list_list, open(TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME, "w"))

            path_to_cluster_id_list = f"{SCRATCH_DIR}/intermediate/{unique_id}/input_cluster_id_list.json"
            print(f"{func}: Save file to S3: {bucket}/{path_to_cluster_id_list}")

            try:
                S3_CLIENT.upload_file(TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME, bucket, path_to_cluster_id_list)
                print(f"{func}: Uploaded file to S3: {bucket}/{path_to_cluster_id_list}")
            except Exception as e:
                print(f"{func}: Failed to upload file to S3: {bucket}/{path_to_cluster_id_list}")
                raise e

            return batch_cluster_id_list


        def save_aggreate_cluster_json_output(bucket, unique_id):
            '''
            Save aggregated alerts and current batch cluster output to S3
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk
            '''

            global SCRATCH_DIR
            global S3_CLIENT
            global TEMP_CLUSTER_OF_AGGREGATED_ALERTS_JSON_FILENAME
            global TEMP_AGGREGATED_ALERTS_JSON_FILENAME

            func = "save_aggreate_cluster_json_output"
            aggregate_and_cluster_alerts_json_output_prefix = f"{SCRATCH_DIR}/intermediate/{unique_id}/"

            cluster_of_aggregated_alerts_json_filename = f"{aggregate_and_cluster_alerts_json_output_prefix}cluster_output.json"
            aggregated_alerts_json_filename = f"{aggregate_and_cluster_alerts_json_output_prefix}aggregated_alerts_output.json"

            print(f"{func}: Save aggregated alerts and clusters json output: {aggregate_and_cluster_alerts_json_output_prefix}")
            try:
                S3_CLIENT.upload_file(TEMP_CLUSTER_OF_AGGREGATED_ALERTS_JSON_FILENAME, bucket,
                                      cluster_of_aggregated_alerts_json_filename)
            except Exception as e:
                print(f"{func}: Failed to upload file to S3: {bucket}/{cluster_of_aggregated_alerts_json_filename}")
                raise e

            try:
                S3_CLIENT.upload_file(TEMP_AGGREGATED_ALERTS_JSON_FILENAME, bucket, aggregated_alerts_json_filename)
            except Exception as e:
                print(f"{func}: Failed to upload file to S3: {bucket}/{aggregated_alerts_json_filename}")
                raise e


        def update_event_sql(bucket, unique_id):
            '''
            Update event table in sqlite from current batch cluster output.
            Save current batch cluster output with current batch user given alert ids associated to it
            Args:
                bucket: Bucket name
                unique_id: Unique id of the current batch
            '''
            global S3_CLIENT
            global SCRATCH_DIR
            global TEMP_SQLITE_FILENAME
            global TEMP_AGGREGATED_ALERTS_JSON_FILENAME
            global TEMP_CLUSTER_OF_AGGREGATED_ALERTS_JSON_FILENAME
            global TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE

            func = "update_event_sql"

            print(f"{func}: Read aggregate alert output, cluster output from current batch")
            agg_df = pd.read_json(TEMP_AGGREGATED_ALERTS_JSON_FILENAME)
            cluster_output_df = pd.read_json(TEMP_CLUSTER_OF_AGGREGATED_ALERTS_JSON_FILENAME)

            internal_id_to_user_id_json = json.load(open(TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE, "r"))

            # get all user alert ids for cluster
            print(f"{func}: Get user alert ids in aggregate alert output")
            internal_id_exploded_agg_df = agg_df.explode("ids")
            internal_id_exploded_agg_df = internal_id_exploded_agg_df.apply(lambda row: get_user_ids(row,
                                                                                                    internal_id_to_user_id_json),
                                                                            axis=1)

            # get all original alerts
            print(f"{func}: Get original input alerts for current batch")
            chunk_input_file = f"{SCRATCH_DIR}/intermediate/{unique_id}/original_input.json"

            temp_chunk_input_file = "/tmp/original_input.json"

            try:
                S3_CLIENT.download_file(bucket, chunk_input_file, temp_chunk_input_file)
            except Exception as e:
                print(f"{func}: Failed to download file: {bucket}/{chunk_input_file}")
                raise e

            original_alerts_df = pd.read_json(temp_chunk_input_file)
            if "other_attributes_dict" not in original_alerts_df:
                original_alerts_df["other_attributes_dict"] = [{}] * len(original_alerts_df)
            else:
                original_alerts_df["other_attributes_dict"] = original_alerts_df["other_attributes_dict"].apply(lambda x: {} if pd.isna(x) else x)
            if "tech" in original_alerts_df:
                original_alerts_df.drop(columns=["tech"], inplace=True)

            original_alert_keep_cols = original_alerts_df.columns.to_list()
            original_alert_keep_cols.extend(["tech", "tac", "stage"])

            print(f"{func}: Original alert keep columns: {original_alert_keep_cols}")

            # filter aggregate alert df with alerts for current batch only
            print(f"{func}: Filter aggregated alert with alerts from current batch")
            internal_id_exploded_agg_df = internal_id_exploded_agg_df[internal_id_exploded_agg_df["user_id"].isin(original_alerts_df["id"])]

            to_merge_internal_id_exploded_agg_df = internal_id_exploded_agg_df[["tech", "tac", "stage", "user_id"]]

            print(f"{func}: Length of original alerts before merge: {len(original_alerts_df)}")
            # enrich original alerts with tech, tac, stage
            print(f"{func}: Enrich original alerts with tech, tac, stage")
            original_alerts_df = original_alerts_df.merge(to_merge_internal_id_exploded_agg_df,
                                                          how="left",
                                                          left_on="id",
                                                          right_on="user_id")
            print(f"{func}: Original alert columns after enriching: {original_alerts_df.columns.to_list()}")
            print(f"{func}: Length of original alerts after merge: {len(original_alerts_df)}")
            original_alerts_df = original_alerts_df[original_alert_keep_cols]

            cluster_aggalertids_exploded_cluster_output_df = cluster_output_df.explode("cluster_aggalertids")

            # filter aggregate ids in cluster output for only current batch aggregated alerts
            print(f"{func}: Filter cluster output with only current batch alerts")
            cluster_aggalertids_exploded_cluster_output_df = cluster_aggalertids_exploded_cluster_output_df[
                cluster_aggalertids_exploded_cluster_output_df["cluster_aggalertids"].isin(internal_id_exploded_agg_df["aggalert_id"])]

            to_merge_internal_id_exploded_agg_df = internal_id_exploded_agg_df[["aggalert_id", "ids"]]

            # get internal ids in batch cluster output
            print(f"{func}: Get internal ids in current batch cluster output")
            merge_cluster_output_agg_output = cluster_aggalertids_exploded_cluster_output_df.merge(to_merge_internal_id_exploded_agg_df,
                                                                                                  left_on="cluster_aggalertids",
                                                                                                  right_on="aggalert_id",
                                                                                                  how="left")

            internal_id_cluster_output = merge_cluster_output_agg_output.groupby("cluster_id")["ids"].apply(list).reset_index()

            add_internal_id_to_cluster_output_df = internal_id_cluster_output.merge(cluster_output_df, how="inner")

            print(f"{func}: Columns for add_internal_id_to_cluster_output_df: {add_internal_id_to_cluster_output_df.columns.to_list()}")

            # get involved event ids in cluster_output
            print(f"{func}: Get user alert ids in cluster output for current batch")
            add_internal_id_to_cluster_output_df["ids"] = \
                add_internal_id_to_cluster_output_df["ids"].apply(lambda ids: [internal_id_to_user_id_json[id] for id in ids])

            add_internal_id_to_cluster_output_df.to_json(TEMP_CLUSTER_OF_AGGREGATED_ALERTS_JSON_FILENAME, orient="records")

            insert_event = """
            INSERT INTO event(
                alert_id,
                src,
                dst,
                time,
                name,
                tech,
                tac,
                stage,
                other_attributes
            ) VALUES(
                :alert_id,
                :src,
                :dst,
                :time,
                :name,
                :tech,
                :tac,
                :stage,
                :other_attributes
            );
            """

            insert_global_feature = """
            INSERT OR IGNORE INTO global_feature(
                feature,
                feature_type,
                added_to_ui
            ) VALUES(
                :feature,
                :feature_type,
                :added_to_ui
            );
            """

            insert_or_ignore_cluster = """
            INSERT OR IGNORE INTO cluster_output(
                cluster_id
            )
            VALUES(
                :cluster_id
            )
            ;
            """

            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.execute("PRAGMA foreign_keys = ON;")
                    with closing(conn.cursor()) as cursor:

                        insert_event_params = []
                        unique_other_attributes_key_set = set()
                        for _, row_original_alerts in original_alerts_df.iterrows():
                            # Execute a query
                            params = {
                                "alert_id": row_original_alerts['id'],
                                "src": row_original_alerts['src'],
                                "dst": row_original_alerts['dst'],
                                "time": row_original_alerts['time'],
                                "name": row_original_alerts['name'],
                                "tech": json.dumps(row_original_alerts['tech']),
                                "tac": json.dumps(list(map(str, row_original_alerts['tac']))),
                                "stage": json.dumps(list(map(str, row_original_alerts['stage']))),
                                "other_attributes": json.dumps(row_original_alerts['other_attributes_dict'])
                            }
                            insert_event_params.append(params)

                            unique_other_attributes_key_set = unique_other_attributes_key_set.union(list(row_original_alerts['other_attributes_dict'].keys()))

                        insert_global_feature_params = []
                        for other_attrs_key in unique_other_attributes_key_set:

                            global_feature_params = {
                                "feature": other_attrs_key,
                                "feature_type": "event",
                                "added_to_ui": 0
                            }
                            insert_global_feature_params.append(global_feature_params)

                        print(f"{func}: Add current batch alert to event table")
                        cursor.executemany(insert_event, insert_event_params)

                        print(f"{func}: Add event features to table")
                        cursor.executemany(insert_global_feature, insert_global_feature_params)

                        insert_or_ignore_cluster_params = []
                        update_cluster_id_in_event_queries = []
                        for _, row_cluster_output in add_internal_id_to_cluster_output_df.iterrows():
                            params = {
                                "cluster_id": row_cluster_output["cluster_id"]
                            }
                            insert_or_ignore_cluster_params.append(params)

                            user_alert_id_list = "','".join(map(str, row_cluster_output['ids']))
                            user_alert_id_list = f"('{user_alert_id_list}')"
                            update_event_query = f"""
                            UPDATE event
                            SET cluster_id = {row_cluster_output["cluster_id"]}
                            WHERE alert_id IN {user_alert_id_list}
                            ;
                            """
                            update_cluster_id_in_event_queries.append(update_event_query)

                        # insert or ignore cluster output
                        print(f"{func}: Add or ignore current batch cluster to cluster output table")
                        cursor.executemany(insert_or_ignore_cluster, insert_or_ignore_cluster_params)

                        # update cluster id for alerts
                        print(f"{func}: Update cluster id for event")
                        for query in update_cluster_id_in_event_queries:
                            cursor.execute(query)

                    conn.commit()
            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e


        def update_cluster_output_sql(batch_cluster_id_list):
            '''
            Update cluster output table in sqlite
            Args:
                batch_cluster_id_list: cluster id in current batch
            Returns:
                metrics dictionary for cluster id (contains count of unique tech, tac, stage per cluster)
            '''
            global TEMP_SQLITE_FILENAME

            func = "update_cluster_output_sql"

            update_cluster_query = """
            UPDATE cluster_output
            SET cluster_starttime = :cluster_starttime,
                cluster_endtime = :cluster_endtime,
                cluster_srcips = :cluster_srcips,
                cluster_dstips = :cluster_dstips,
                cluster_techs = :cluster_techs,
                cluster_tacs = :cluster_tacs,
                cluster_stages = :cluster_stages
            WHERE cluster_id = :cluster_id
            ;
            """

            metrics = {}

            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.row_factory = sqlite3.Row
                    conn.execute("PRAGMA foreign_keys = ON;")
                    with closing(conn.cursor()) as cursor:

                        print(f"{func}: Update cluster details in cluster output table")
                        cluster_id_list = ",".join(map(str, batch_cluster_id_list))
                        cluster_id_list = f"({cluster_id_list})"

                        select_events_filter_cluster_query = f"""
                        SELECT
                            cluster_id,
                            MIN(time) AS min_time,
                            MAX(time) AS max_time,
                            GROUP_CONCAT(DISTINCT src) AS unique_src,
                            GROUP_CONCAT(DISTINCT dst) AS unique_dst,
                            GROUP_CONCAT(tech,';') AS combined_tech,
                            GROUP_CONCAT(tac,';') AS combined_tac,
                            GROUP_CONCAT(stage,';') AS combined_stage
                        FROM
                            event
                        WHERE cluster_id IN {cluster_id_list}
                        GROUP BY cluster_id
                        ORDER BY cluster_id
                        ;
                        """
                        cursor.execute(select_events_filter_cluster_query)
                        result = cursor.fetchall()

                        update_cluster_output_params = []
                        for row in result:
                            row = dict(row)
                            src_ips = row['unique_src']
                            src_ips = src_ips.split(",")

                            dst_ips = row['unique_dst']
                            dst_ips = dst_ips.split(",")

                            combined_tech = row['combined_tech']
                            combined_tech = combined_tech.split(";")
                            tech_list = []
                            for techs in combined_tech:
                                tech_list += json.loads(techs)
                            tech_list = list(set(tech_list))

                            combined_tac = row['combined_tac']
                            combined_tac = combined_tac.split(";")
                            tac_list = []
                            for tacs in combined_tac:
                                tac_list += json.loads(tacs)
                            tac_list = list(set(tac_list))

                            combined_stage = row['combined_stage']
                            combined_stage = combined_stage.split(";")
                            stage_list = []
                            for stage in combined_stage:
                                stage_list += json.loads(stage)
                            stage_list = list(set(stage_list))

                            metrics[row['cluster_id']] = {
                                "tech": {"count": len(tech_list)},
                                "tac": {"count": len(tac_list)},
                                "stage": {"count": len(stage_list)}
                            }

                            # if cluster id is present in table. Update fields for existing row.
                            params = {
                                "cluster_starttime": row['min_time'],
                                "cluster_endtime": row['max_time'],
                                "cluster_srcips": json.dumps(src_ips),
                                "cluster_dstips": json.dumps(dst_ips),
                                "cluster_techs": json.dumps(tech_list),
                                "cluster_tacs": json.dumps(tac_list),
                                "cluster_stages": json.dumps(stage_list),
                                "cluster_id": row['cluster_id']
                            }

                            update_cluster_output_params.append(params)

                        cursor.executemany(update_cluster_query, update_cluster_output_params)

                    conn.commit()
                return metrics
            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e


        def update_cluster_ticket_output_sql(bucket, batch_cluster_id_list, metrics):
            '''
            Update cluster ticket output table and add per cluster metrics
            Args:
                bucket: Bucket name
                batch_cluster_id_list: cluster id in current batch
                metrics: dict of metric for cluster id (contains count of unique tech, tac, stage per cluster)
            '''
            global TEMP_SQLITE_FILENAME
            global TEMP_TICKET_ID_LOOKUP_FILE

            func = "update_cluster_ticket_output_sql"

            metrics_cols = ["tech", "tac", "stage"]

            ticket_id_json = json.load(open(TEMP_TICKET_ID_LOOKUP_FILE, "r"))
            ticket_id_start = ticket_id_json["last_ticket_id"]

            insert_cluster_ticket_output_query = """
            INSERT INTO cluster_ticket_output (
                cluster_id,
                ticket_id,
                metrics
            )
            VALUES (
                :cluster_id,
                :ticket_id,
                :metrics
            )
            ;
            """

            update_cluster_ticket_output_query = """
            UPDATE cluster_ticket_output
            SET 
                metrics = :metrics
            WHERE cluster_id = :cluster_id
            ;
            """

            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.row_factory = sqlite3.Row
                    conn.execute("PRAGMA foreign_keys = ON;")
                    with closing(conn.cursor()) as cursor:
                        print(f"{func}: Get events for clusters in current batch")
                        cluster_id_list = ",".join(map(str, batch_cluster_id_list))
                        cluster_id_list = f"({cluster_id_list})"
                        select_event_query = f"""
                        SELECT *
                        FROM event
                        WHERE cluster_id IN {cluster_id_list}
                        ORDER BY cluster_id
                        ;
                        """
                        cursor.execute(select_event_query)
                        results = cursor.fetchall()
                        results = [dict(row) for row in results]
                        events_df = pd.DataFrame(results)

                        print(f"{func}: Get existing cluster ticket output for clusters in current batch")
                        select_cluster_ticket_output_query = f"""
                        SELECT *
                        FROM cluster_ticket_output
                        WHERE cluster_id IN {cluster_id_list}
                        ORDER BY cluster_id
                        ;
                        """
                        cursor.execute(select_cluster_ticket_output_query)
                        results = cursor.fetchall()
                        results = [dict(row) for row in results]
                        cluster_ticket_output_df = pd.DataFrame(results)

                        cluster_ticket_dict = {}
                        if len(cluster_ticket_output_df):
                            cluster_ticket_dict = cluster_ticket_output_df.set_index('cluster_id')['ticket_id'].to_dict()

                        print(f"{func}: Get per cluster metrics and update cluster ticket output")
                        insert_cluster_ticket_output_params = []
                        update_cluster_ticket_output_params = []
                        for cluster_id in batch_cluster_id_list:
                            filtered_events = events_df[events_df["cluster_id"] == cluster_id]
                            cluster_metrics = per_cluster_metrics_events(filtered_events)

                            for metric_col in metrics_cols:
                                cluster_metrics[metric_col]["count"] = float(metrics[cluster_id][metric_col]["count"])

                            if cluster_id not in cluster_ticket_dict:
                                ticket_id_start += 1
                                params = {
                                    "cluster_id": cluster_id,
                                    "ticket_id": ticket_id_start,
                                    "metrics": json.dumps(cluster_metrics)
                                }
                                insert_cluster_ticket_output_params.append(params)
                            else:
                                params = {
                                    "cluster_id": cluster_id,
                                    "metrics": json.dumps(cluster_metrics)
                                }
                                update_cluster_ticket_output_params.append(params)
                        if insert_cluster_ticket_output_params:
                            cursor.executemany(insert_cluster_ticket_output_query, insert_cluster_ticket_output_params)
                        if update_cluster_ticket_output_params:
                            cursor.executemany(update_cluster_ticket_output_query, update_cluster_ticket_output_params)

                    conn.commit()

                print(f"{func}: Save ticket id lookup file")
                ticket_id_json["last_ticket_id"] = ticket_id_start
                json.dump(ticket_id_json, open(TEMP_TICKET_ID_LOOKUP_FILE, "w"))
                save_ticket_id_lookup_file(bucket)
            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e


        def create_input_cluster_ticket_output_sql(bucket, unique_id):
            '''
            Create cluster ticket output for current input and save to S3
            Args:
                bucket: Bucket name
                unique_id: Unique id of chunk of input
            '''
            global SCRATCH_DIR
            global S3_CLIENT
            global TEMP_SQLITE_FILENAME
            global TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME
            global TEMP_INPUT_CLUSTER_TICKET_OUTPUT_FILENAME

            func = "create_input_cluster_ticket_output"

            input_cluster_id_list = json.load(open(TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME, "r"))

            try:
                with open(TEMP_INPUT_CLUSTER_TICKET_OUTPUT_FILENAME, "w") as f:
                    with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                        conn.row_factory = sqlite3.Row
                        conn.execute("PRAGMA foreign_keys = ON;")
                        with closing(conn.cursor()) as cursor:
                            cluster_id_list = ",".join(map(str, input_cluster_id_list))
                            cluster_id_list = f"({cluster_id_list})"
                            select_query = f"""
                            SELECT
                                co.cluster_id,
                                co.cluster_starttime,
                                co.cluster_endtime,
                                co.cluster_srcips,
                                co.cluster_dstips,
                                co.cluster_techs,
                                co.cluster_tacs,
                                co.cluster_stages,
                                cto.ticket_id,
                                cto.metrics,
                                e.alert_id,
                                e.src AS event_src,
                                e.dst AS event_dst,
                                e.time AS event_time,
                                e.name AS event_name,
                                e.tech AS event_tech,
                                e.tac AS event_tac,
                                e.stage AS event_stage,
                                e.other_attributes AS event_other_attributes
                            FROM cluster_ticket_output cto
                            INNER JOIN cluster_output co ON co.cluster_id = cto.cluster_id
                            INNER JOIN event e ON e.cluster_id = cto.cluster_id
                            WHERE cto.cluster_id IN {cluster_id_list}
                            ORDER BY cto.cluster_id
                            ;
                            """

                            cursor.execute(select_query)

                            row = cursor.fetchone()
                            prev_cluster = None

                            involved_events = []

                            while row:
                                row = dict(row)

                                curr_cluster = row
                                if prev_cluster is None:
                                    prev_cluster = curr_cluster

                                if curr_cluster["cluster_id"] != prev_cluster["cluster_id"]:
                                    row_cluster_ticket_output = {
                                        "ticket_id": prev_cluster["ticket_id"],
                                        "cluster_id": prev_cluster["cluster_id"],
                                        "involved_events": involved_events,
                                        "start_time": prev_cluster["cluster_starttime"],
                                        "end_time": prev_cluster["cluster_endtime"],
                                        "involved_entities": list(set(json.loads(prev_cluster["cluster_dstips"]) + json.loads(prev_cluster["cluster_srcips"]))),
                                        "involved_techs": json.loads(prev_cluster["cluster_techs"]),
                                        "involved_tacs": json.loads(prev_cluster["cluster_tacs"]),
                                        "involved_stages": json.loads(prev_cluster["cluster_stages"]),
                                        "metrics": json.loads(prev_cluster["metrics"]),
                                    }
                                    f.write(json.dumps(row_cluster_ticket_output) + "\n")
                                    involved_events = []

                                events = {
                                    "id": row["alert_id"],
                                    "src": row["event_src"],
                                    "dst": row["event_dst"],
                                    "time": row["event_time"],
                                    "name": row["event_name"],
                                    "tech": json.loads(row["event_tech"]),
                                    "tac": json.loads(row["event_tac"]),
                                    "stage": json.loads(row["event_stage"]),
                                    "other_attributes_dict": json.loads(row["event_other_attributes"]),
                                }
                                involved_events.append(events)

                                prev_cluster = curr_cluster
                                row = cursor.fetchone()

                            row_cluster_ticket_output = {
                                "ticket_id": prev_cluster["ticket_id"],
                                "cluster_id": prev_cluster["cluster_id"],
                                "involved_events": involved_events,
                                "start_time": prev_cluster["cluster_starttime"],
                                "end_time": prev_cluster["cluster_endtime"],
                                "involved_entities": list(set(json.loads(prev_cluster["cluster_dstips"]) + json.loads(prev_cluster["cluster_srcips"]))),
                                "involved_techs": json.loads(prev_cluster["cluster_techs"]),
                                "involved_tacs": json.loads(prev_cluster["cluster_tacs"]),
                                "involved_stages": json.loads(prev_cluster["cluster_stages"]),
                                "metrics": json.loads(prev_cluster["metrics"]),
                            }
                            f.write(json.dumps(row_cluster_ticket_output) + "\n")

            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e

            try:
                path_to_batch_input_cluster_ticket_output = f"{SCRATCH_DIR}/intermediate/{unique_id}/cluster_ticket_output.json"
                S3_CLIENT.upload_file(TEMP_INPUT_CLUSTER_TICKET_OUTPUT_FILENAME, bucket, path_to_batch_input_cluster_ticket_output)
            except Exception as e:
                print(f"{func}: Failed to upload file to S3: {bucket}/{path_to_batch_input_cluster_ticket_output}")
                raise e


        def per_cluster_metrics_events(involved_events_df):
            '''
            Get metrics per cluster
            Args:
                involved_events_df: involved events of one cluster
            Returns:
                metrics per cluster
            '''

            metrics_cols = ["tech", "tac", "stage"]

            metrics = {}

            for metric_col in metrics_cols:
                involved_events_df[f'{metric_col}'] = involved_events_df[f'{metric_col}'].apply(json.loads)
                involved_events_df[f'{metric_col}_count'] = involved_events_df[f'{metric_col}'].apply(len)

                metrics[metric_col] = {
                    "avg": float(involved_events_df[f"{metric_col}_count"].mean()),
                    "min": float(involved_events_df[f"{metric_col}_count"].min()),
                    "max": float(involved_events_df[f"{metric_col}_count"].max()),
                    "median": float(involved_events_df[f"{metric_col}_count"].median())
                }

            return metrics


        def get_global_metrics_from_cluster_ticket_output_sql(bucket, unique_id):
            '''
            Get global metrics from an SQLite database.
            Args:
                bucket: Bucket name
                unique_id: Unique id of chunk of input
            Returns:
                metrics dictionary
            '''
            global TEMP_SQLITE_FILENAME
            global TEMP_GLOBAL_METRICS_FILENAME
            global SCRATCH_DIR
            global S3_CLIENT

            func = "get_global_metrics_from_cluster_ticket_output_sql"

            metrics = {}
            metrics_cols = ["tech", "tac", "stage", "event", "entity"]
            mapped_metric_cols = ["cluster_techs", "cluster_tacs", "cluster_stages", "cluster_events", "cluster_entities"]

            # Initialize counters for metrics
            metrics_data = {col: [] for col in metrics_cols}
            time_ranges = []

            event_count = 0

            select_query = """
            SELECT
                co.cluster_id,
                co.cluster_starttime,
                co.cluster_endtime,
                co.cluster_srcips,
                co.cluster_dstips,
                co.cluster_techs,
                co.cluster_tacs,
                co.cluster_stages,
                COUNT(e.cluster_id) AS cluster_events
            FROM
                cluster_output co
            INNER JOIN
                event e ON co.cluster_id = e.cluster_id
            GROUP BY
                co.cluster_id
            ;
            """

            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.row_factory = sqlite3.Row
                    with closing(conn.cursor()) as cursor:
                        cursor.execute(select_query)

                        row = cursor.fetchone()
                        while row:
                            row = dict(row)
                            record = {
                                "cluster_id": row["cluster_id"],
                                "cluster_starttime": row["cluster_starttime"],
                                "cluster_endtime": row["cluster_endtime"],
                                "cluster_techs": json.loads(row["cluster_techs"]),
                                "cluster_tacs": json.loads(row["cluster_tacs"]),
                                "cluster_stages": json.loads(row["cluster_stages"]),
                                "cluster_events": row["cluster_events"],
                                "cluster_entities": list(set(json.loads(row["cluster_dstips"]) + json.loads(row["cluster_srcips"])))
                            }

                            for metric_col, mapped_metric_col in zip(metrics_cols, mapped_metric_cols):

                                if metric_col == "event":
                                    count = record[mapped_metric_col]
                                    event_count += count
                                    metrics_data[metric_col].append(count)
                                else:
                                    count = float(len(record[mapped_metric_col]))
                                    metrics_data[metric_col].append(count)

                            start_time = pd.to_datetime(record['cluster_starttime'], unit='s')
                            end_time = pd.to_datetime(record['cluster_endtime'], unit='s')
                            time_range = (end_time - start_time).total_seconds()
                            time_ranges.append(time_range)

                            row = cursor.fetchone()
            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e

            # Calculate statistics for each metric
            for metric_col in metrics_cols:
                if metric_col not in metrics:
                    metrics[metric_col] = {}

                if metrics_data[metric_col]:
                    metrics[metric_col]["avg"] = float(pd.Series(metrics_data[metric_col]).mean())
                    metrics[metric_col]["min"] = float(pd.Series(metrics_data[metric_col]).min())
                    metrics[metric_col]["max"] = float(pd.Series(metrics_data[metric_col]).max())
                    metrics[metric_col]["median"] = float(pd.Series(metrics_data[metric_col]).median())
                    metrics[metric_col]["q1"] = float(pd.Series(metrics_data[metric_col]).quantile(0.25))
                    metrics[metric_col]["q3"] = float(pd.Series(metrics_data[metric_col]).quantile(0.75))
                    if metric_col == "event":
                        metrics[metric_col]["count"] = event_count

            # Calculate statistics for time range
            if time_ranges:
                metrics["time_range"] = {
                    "avg": float(pd.Series(time_ranges).mean()),
                    "min": float(pd.Series(time_ranges).min()),
                    "max": float(pd.Series(time_ranges).max()),
                    "median": float(pd.Series(time_ranges).median()),
                    "q1": float(pd.Series(time_ranges).quantile(0.25)),
                    "q3": float(pd.Series(time_ranges).quantile(0.75)),
                }

            json.dump(metrics, open(TEMP_GLOBAL_METRICS_FILENAME, "w"))

            try:
                print(f"{func}: Upload global metric output file to S3")
                global_metrics_filename = f"{SCRATCH_DIR}/intermediate/{unique_id}/global_metrics.json"
                S3_CLIENT.upload_file(TEMP_GLOBAL_METRICS_FILENAME, bucket, global_metrics_filename)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{global_metrics_filename}")
                raise e

            return metrics


        def download_intermediate_input_files(bucket, unique_id):
            '''
            Download intermediate files required
            Args:
                bucket: Bucket name
                unique_id: unique id of current batch
            '''
            global S3_CLIENT
            global SCRATCH_DIR
            global TEMP_CONFIG_INPUT_FILENAME
            global CONFIG_EXISTS_IN_REQUEST
            global TEMP_INTERMEDIATE_QUEUE_FILENAME
            global INTERNAL_ID_TO_USER_ID_OBJECT_KEY
            global TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE
            global PATH_TO_SQLITE
            global TEMP_SQLITE_FILENAME

            func = "download_intermediate_input_files"

            path_to_intermediate_queue = f"{SCRATCH_DIR}/intermediate/{unique_id}/queue.json"
            path_to_config_from_input = f"{SCRATCH_DIR}/intermediate/{unique_id}/config.json"

            try:
                print(f"{func}: Download queue json file from S3")
                S3_CLIENT.download_file(bucket, path_to_intermediate_queue, TEMP_INTERMEDIATE_QUEUE_FILENAME)
            except Exception as e:
                print(f"{func}: Failed to download file: {bucket}/{path_to_intermediate_queue}")
                raise e

            try:
                S3_CLIENT.download_file(bucket, INTERNAL_ID_TO_USER_ID_OBJECT_KEY, TEMP_INTERNAL_ID_TO_USER_ID_LOOKUP_FILE)
            except Exception as e:
                print(f"{func}: Failed to download file: {bucket}/{INTERNAL_ID_TO_USER_ID_OBJECT_KEY}")
                raise e

            try:
                print(f"{func}: Checking if config file exists on S3 path {bucket}/{path_to_config_from_input}")
                S3_CLIENT.head_object(Bucket=bucket, Key=path_to_config_from_input)
                CONFIG_EXISTS_IN_REQUEST = True

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e
                print(f"{func}: Config file does not exist")
            else:
                try:
                    S3_CLIENT.download_file(bucket, path_to_config_from_input, TEMP_CONFIG_INPUT_FILENAME)
                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download file: {bucket}/{path_to_config_from_input}")
                    raise e

            try:
                print(f"{func}: Download sqlite file from S3")
                S3_CLIENT.download_file(bucket, PATH_TO_SQLITE, TEMP_SQLITE_FILENAME)
            except Exception as e:
                print(f"{func}: Failed to download file: {bucket}/{PATH_TO_SQLITE}")
                raise e


        def save_ticket_id_lookup_file(bucket):
            '''
            Save ticket id lookup file to S3
            Args:
                bucket: The S3 bucket name
            '''
            global TICKET_ID_OBJECT_KEY
            global S3_CLIENT
            global TEMP_TICKET_ID_LOOKUP_FILE
            func = "save_ticket_id_lookup_file"
            try:
                print(f"{func}: Upload ticket id lookup file to S3")
                S3_CLIENT.upload_file(TEMP_TICKET_ID_LOOKUP_FILE, bucket, TICKET_ID_OBJECT_KEY)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{TICKET_ID_OBJECT_KEY}")
                raise e


        def fetch_or_create_ticket_id_lookup_file(bucket):
            '''
            Check if the ticket_id lookup file exists on S3. Create if it does not
            Args:
                bucket: The S3 bucket name
            '''
            global TICKET_ID_OBJECT_KEY
            global S3_CLIENT
            global TEMP_TICKET_ID_LOOKUP_FILE

            func = "fetch_or_create_ticket_id_lookup_file"

            try:
                print(
                    f"{func}: Checking if ticket id lookup file exists on S3 path {bucket}/{TICKET_ID_OBJECT_KEY}")

                S3_CLIENT.head_object(Bucket=bucket, Key=TICKET_ID_OBJECT_KEY)

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: Ticket id lookup file does not exist on S3. Create empty table")

                internal_id_to_unique_id_json = {"last_ticket_id": 0}
                json.dump(internal_id_to_unique_id_json, open(TEMP_TICKET_ID_LOOKUP_FILE, "w"))

                print(f"{func}: Initiated ticket id")
            else:
                try:
                    print(f"{func}: Download available internal id to unique id lookup file from S3")

                    S3_CLIENT.download_file(bucket, TICKET_ID_OBJECT_KEY, TEMP_TICKET_ID_LOOKUP_FILE)

                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download lookup file from S3: {bucket}/{TICKET_ID_OBJECT_KEY}")
                    raise e


        def get_user_ids(row, internal_id_to_user_id_json):
            '''
            Get user alert id for a row
            Args:
                row: A row containing internal id
                internal_id_to_user_id_json: A dictionary mapping user ids to internal ids
            Returns:
                The row with the added unique_id field
            '''

            row["user_id"] = internal_id_to_user_id_json[row["ids"]]
            return row


        def sort_by_alert_id(obj):
            '''
            sort by alert id
            Args:
                obj: aggregate alert dictionary object
            Returns:
                aggregate alert id
            '''
            return obj["aggalert_id"]


        def sort_by_cluster_id(obj):
            '''
            sort by cluster id
            Args:
                obj: cluster dictionary object
            Returns:
                cluster id
            '''
            return obj["cluster_id"]


        def get_last_n_clusters():
            '''
            Get last n clusters from cluster output

            Returns:
                list of cluster output rows
            '''
            global TEMP_SQLITE_FILENAME
            global FLOW_INPUT_WINDOW_SIZE

            func = "get_last_n_clusters"

            select_query = """
            SELECT *
            FROM cluster_output
            ORDER BY cluster_id DESC
            LIMIT :limit
            ;
            """
            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.row_factory = sqlite3.Row
                    with closing(conn.cursor()) as cursor:
                        params = {"limit": FLOW_INPUT_WINDOW_SIZE}
                        cursor.execute(select_query, params)

                        results = cursor.fetchall()
            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e

            return results


        def upload_error_to_s3(bucket, error_message):
            '''
            Upload error message to S3
            Args:
                error_message: The error message to upload
                unique_id: Unique ID for identifying the error log
            '''
            global S3_CLIENT
            error_log_key = "output/error_log.txt"
            try:
                S3_CLIENT.put_object(Bucket=bucket, Key=error_log_key, Body=error_message)
                print(f"Uploaded error log to s3://{bucket}/{error_log_key}")
            except Exception as e:
                print(f"Failed to upload error log to S3. Error: {str(e)}")


        def clear_temp_dir():
            rm_list = glob.glob("/tmp/**", recursive=True)
            for f in rm_list:
                if os.path.isfile(f):
                    os.remove(f)

      Handler: index.lambda_handler
      Runtime: python3.11
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Layers:
        - !FindInMap
          - RegionMap
          - !Ref AWS::Region
          - lambdaLayer
      Environment:
        Variables:
          batch_transform_job_instance_type: !Ref ClusterBatchInstanceType
          batch_transform_job_max_retries: "0"
          batch_transform_job_timeout: !Ref ClusterBatchTimeout
          cluster_model_name: !Ref ClusterModelName
          flow_input_window_size: !Ref FlowInputMaxClusters
      Events:
        S3ObjectCreated:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: scratch/response/cluster_out/
                  - Name: suffix
                    Value: .out
  processClusterLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${processCluster}
  processClusterLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref processCluster
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  createFlow:
    Type: AWS::Serverless::Function
    Properties:
      FunctionName: !Ref CreateFlowFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: createFlow
      InlineCode: |
        '''
        Create batch transform job for Flow detection

        Input: cluster output from Temporal clustering
        '''

        import os
        import urllib
        import glob
        import boto3

        FLOW_MODEL_NAME = os.getenv("flow_model_name")

        BATCH_TRANSFORM_JOB_INSTANCE_TYPE = os.getenv("batch_transform_job_instance_type")

        BATCH_TRANSFORM_JOB_MAX_RETRIES = int(os.getenv("batch_transform_job_max_retries"))

        BATCH_TRANSFORM_JOB_TIMEOUT = int(os.getenv("batch_transform_job_timeout"))

        missing_variables = []
        if FLOW_MODEL_NAME is None:
            missing_variables.append("flow_model_name")
        if BATCH_TRANSFORM_JOB_INSTANCE_TYPE is None:
            missing_variables.append("batch_transform_job_instance_type")
        if BATCH_TRANSFORM_JOB_MAX_RETRIES is None:
            missing_variables.append("batch_transform_job_max_retries")
        if BATCH_TRANSFORM_JOB_TIMEOUT is None:
            missing_variables.append("batch_transform_job_timeout")

        if missing_variables:
            raise ValueError(f"Please enter environment variable(s): {missing_variables}")

        SCRATCH_DIR = "scratch"
        S3_CLIENT = boto3.client("s3")


        def lambda_handler(event, context):
            '''
            Entrypoint from the trigger setup from lambda
            Args:
                event: Event triggered
                context: Context of the lambda function
            '''
            func = "lambda_handler"

            sagemaker = boto3.client(service_name='sagemaker')
            bucket = event["Records"][0]["s3"]["bucket"]["name"]

            try: 
                input_filename = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"], encoding="utf-8")

                clear_temp_dir()

                print(f"{func}: Input file from S3 bucket: {bucket}/{input_filename}")

                unique_id = input_filename.split("/")[-2]

                try:

                    print(f"{func}: Start batch transform job for sequence alerts")

                    batch_transform_job = setup_batch_transform_job_for_sequence_alerts(bucket, unique_id, input_filename)
                    batch_transform_job_creation_response = sagemaker.create_transform_job(**batch_transform_job)

                    print(f"{func}: Created batch transform job for sequence alerts: {batch_transform_job_creation_response['TransformJobArn']}")

                except Exception as e:
                    print(f"{func}: Failed to created transform job for sequence alerts. Unique id: {unique_id}")
                    raise e
            except Exception as ex:
                print(f"{func}: Exception occurred while running lambda function. Uploading error file to S3.")
                error_message = f"Error occurred in lambda function: {context.function_name}. More details can be found in CloudWatch Logs for this lambda function. The exception message is: {ex}"
                upload_error_to_s3(bucket, error_message)
                raise ex


        def setup_batch_transform_job_for_sequence_alerts(bucket, unique_id, input_filename):
            '''
            Configure batch transform job for Flow model
            Args:
                bucket: Bucket name
                unique_id: Unique id of the chunk of input
                input_filename: S3 input filename to batch tranform job
            Returns:
                batch_transform_job: Configuration to create batch transform job
            '''
            global BATCH_TRANSFORM_JOB_INSTANCE_TYPE
            global BATCH_TRANSFORM_JOB_TIMEOUT
            global BATCH_TRANSFORM_JOB_MAX_RETRIES
            global FLOW_MODEL_NAME
            global SCRATCH_DIR

            transform_job_name = f'transform-job-flow-{unique_id}'

            transform_input = {
                "DataSource": {
                    "S3DataSource": {
                        "S3DataType": "S3Prefix",
                        "S3Uri": f"s3://{bucket}/{input_filename}"
                    }
                },
                "ContentType": "text/plain",
                "SplitType": "None"
            }
            transform_output = {
                "S3OutputPath": f"s3://{bucket}/{SCRATCH_DIR}/response/flow_out/{unique_id}/",
                "KmsKeyId": ""
            }
            transform_resources = {
                "InstanceType": BATCH_TRANSFORM_JOB_INSTANCE_TYPE,
                "InstanceCount": 1
            }
            model_client_config = {
                'InvocationsTimeoutInSeconds': BATCH_TRANSFORM_JOB_TIMEOUT,
                'InvocationsMaxRetries': BATCH_TRANSFORM_JOB_MAX_RETRIES
            }

            batch_transform_job = {
                'TransformJobName': transform_job_name,
                'ModelName': FLOW_MODEL_NAME,
                'TransformInput': transform_input,
                'TransformOutput': transform_output,
                'MaxPayloadInMB': 0,
                'MaxConcurrentTransforms': 1,
                "TransformResources": transform_resources,
                "ModelClientConfig": model_client_config
            }
            return batch_transform_job


        def upload_error_to_s3(bucket, error_message):
            '''
            Upload error message to S3
            Args:
                error_message: The error message to upload
                unique_id: Unique ID for identifying the error log
            '''
            global S3_CLIENT
            error_log_key = "output/error_log.txt"
            try:
                S3_CLIENT.put_object(Bucket=bucket, Key=error_log_key, Body=error_message)
                print(f"Uploaded error log to s3://{bucket}/{error_log_key}")
            except Exception as e:
                print(f"Failed to upload error log to S3. Error: {str(e)}")


        def clear_temp_dir():
            rm_list = glob.glob("/tmp/**", recursive=True)
            for f in rm_list:
                if os.path.isfile(f):
                    os.remove(f)

      Handler: index.lambda_handler
      Runtime: python3.11
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Layers:
        - !FindInMap
          - RegionMap
          - !Ref AWS::Region
          - lambdaLayer
      Environment:
        Variables:
          batch_transform_job_instance_type: !Ref FlowBatchInstanceType
          batch_transform_job_max_retries: "0"
          batch_transform_job_timeout: !Ref FlowBatchTimeout
          flow_model_name: !Ref FlowModelName
      Events:
        S3ObjectCreated:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: scratch/output/cluster/
                  - Name: suffix
                    Value: .json
  createFlowLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${createFlow}
  createFlowLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref createFlow
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  processFlow:
    Type: AWS::Serverless::Function
    Properties:
      FunctionName: !Ref ProcessFlowFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: processFlow
      InlineCode: |
        '''
        Process response from flow detection model. Extract cluster, flow. Create enriched_alerts_input

        Input: Response from Flow detection model
        '''

        import os
        import json
        import urllib
        import glob
        from datetime import datetime, timezone
        import sqlite3
        from contextlib import closing
        import copy

        import boto3
        from botocore.exceptions import ClientError
        import pandas as pd

        FLOW_INPUT_WINDOW_SIZE = int(os.getenv("flow_input_window_size"))

        CAMPAIGN_MAP = os.getenv("campaign_map")

        SKIP_SINGLE_ALERT = os.getenv("skip_single_alert")

        missing_variables = []
        if FLOW_INPUT_WINDOW_SIZE is None:
            missing_variables.append("flow_input_window_size")
        if CAMPAIGN_MAP is None:
            missing_variables.append("campaign_map")
        if SKIP_SINGLE_ALERT is None:
            SKIP_SINGLE_ALERT = "true"

        if missing_variables:
            raise ValueError(f"Please enter environment variable(s): {missing_variables}")
        if CAMPAIGN_MAP not in ["flow", "cluster"]:
            raise ValueError(f"Valid values for 'campaign_map' environemnt variable are 'flow', 'cluster'. Entered value is: '{CAMPAIGN_MAP}'")
        if SKIP_SINGLE_ALERT not in ["true", "false"]:
            raise ValueError(f"Valid values for 'skip_single_alert' environemnt variable are 'true', 'false'. Entered value is: '{SKIP_SINGLE_ALERT}'")

        S3_CLIENT = boto3.client("s3")

        SCRATCH_DIR = "scratch"

        NODE_FEATURE_EXISTS_IN_REQUEST = False

        PATH_TO_SQLITE = f"{SCRATCH_DIR}/cypienta.db"
        TEMP_SQLITE_FILENAME = "/tmp/cypienta.db"

        TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME = "/tmp/input_cluster_id_list.json"

        TEMP_USER_CLUSTER_OUTPUT_FILENAME = "/tmp/user_cluster_output.json"

        TEMP_SEQUENCE_ALERTS_OUTPUT_FILENAME = "/tmp/aggregate_model_output.json"

        TEMP_FLOW_OUTPUT_JSON_FILENAME = "/tmp/flow_output.json"
        TEMP_USER_FLOW_OUTPUT_JSON_FILENAME = "/tmp/user_flow_output.json"
        TEMP_FLOW_TICKET_OUTPUT_FILENAME = "/tmp/flow_ticket_output.json"

        SPLUNK_USER_CLUSTER_JSON_LINES = f"/tmp/splunk_user_cluster_output.json"
        SPLUNK_USER_FLOW_JSON_LINES = f"/tmp/splunk_user_flow_output.json"

        TICKET_ID_OBJECT_KEY = f"{SCRATCH_DIR}/ticket_id.json"
        TEMP_TICKET_ID_LOOKUP_FILE = "/tmp/ticket_id.json"

        QUEUE_LOOKUP_FILE = f"{SCRATCH_DIR}/queue_lookup.json"
        TEMP_QUEUE_LOOKUP_FILE = "/tmp/queue_lookup.json"

        TEMP_NODE_FEATURE_INPUT_FILENAME = "/tmp/node_feature.json"
        NODE_FEATURE_LOOKUP_FILE = f"{SCRATCH_DIR}/node_feature_lookup.json"
        TEMP_NODE_FEATURE_LOOKUP_FILE = "/tmp/node_feature_lookup.json"

        TEMP_GLOBAL_FEATURE_FILENAME = "/tmp/global_feature.json"


        def lambda_handler(event, context):
            '''
            Entrypoint from the trigger setup from lambda
            Args:
                event: Event triggered
                context: Context of the lambda function
            '''
            global S3_CLIENT
            global TEMP_SEQUENCE_ALERTS_OUTPUT_FILENAME
            global SCRATCH_DIR
            global PATH_TO_SQLITE
            global TEMP_SQLITE_FILENAME
            global NODE_FEATURE_EXISTS_IN_REQUEST
            func = "lambda_handler"

            NODE_FEATURE_EXISTS_IN_REQUEST = False

            bucket = event["Records"][0]["s3"]["bucket"]["name"]
            try:
                sequence_alerts_output = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"], encoding="utf-8")

                clear_temp_dir()

                print(f"{func}: Download flow output from S3 bucket: {bucket}/{sequence_alerts_output}")
                try:
                    S3_CLIENT.download_file(bucket, sequence_alerts_output, TEMP_SEQUENCE_ALERTS_OUTPUT_FILENAME)

                except Exception as e:
                    print(f"{func}: Failed to download files to process")
                    raise e

                unique_id = sequence_alerts_output.split("/")[-2]

                print(f"{func}: Unique ID: {unique_id}")

                try:
                    print(f"{func}: Download intermediate files from S3 bucket")
                    download_intermediate_input_files(bucket, unique_id)
                except Exception as e:
                    print(f"{func}: Failed to download files to process")
                    raise e

                fetch_or_create_ticket_id_lookup_file(bucket)

                print(f"{func}: Starting processing flow output")
                process_output_of_sequence_alerts(bucket, unique_id)
                print(f"{func}: Completed processing flow output")

                try:
                    print(f"{func}: Upload sqlite to S3")

                    S3_CLIENT.upload_file(TEMP_SQLITE_FILENAME, bucket, PATH_TO_SQLITE)

                    print(f"{func}: Upload completed")
                except Exception as e:
                    print(f"{func}: Failed to upload sqlite from S3: {bucket}/{PATH_TO_SQLITE}")
                    raise e
            except Exception as ex:
                print(f"{func}: Exception occurred while running lambda function. Uploading error file to S3.")
                error_message = f"Error occurred in lambda function: {context.function_name}. More details can be found in CloudWatch Logs for this lambda function. The exception message is: {ex}"
                upload_error_to_s3(bucket, error_message)
                raise ex


        def process_output_of_sequence_alerts(bucket, unique_id):
            '''
            Extract cluster, flow. Create flow ticket output.
            Upload user flow, cluster output, flow ticket output
            Args:
                bucket: Bucket name
                unique_id: Unique id of chunk of input
            '''
            global TEMP_SEQUENCE_ALERTS_OUTPUT_FILENAME
            global TEMP_FLOW_OUTPUT_JSON_FILENAME
            global SCRATCH_DIR
            global CAMPAIGN_MAP
            global S3_CLIENT
            global TEMP_QUEUE_LOOKUP_FILE
            global NODE_FEATURE_EXISTS_IN_REQUEST

            func = "process_output_of_sequence_alerts"

            sequence_alerts_output_json = json.load(open(TEMP_SEQUENCE_ALERTS_OUTPUT_FILENAME))
            sequence_alerts_output_json = sequence_alerts_output_json[0]["output"]
            flow_output = sequence_alerts_output_json["flow_output"]

            json.dump(flow_output, open(TEMP_FLOW_OUTPUT_JSON_FILENAME, "w"))

            print(f"{func}: Save flow output to scratch intermediate path")

            flow_output_json_filename = f"{SCRATCH_DIR}/intermediate/{unique_id}/flow.json"
            try:
                print(f"{func}: Save aggregated flows json output: {flow_output_json_filename}")
                S3_CLIENT.upload_file(TEMP_FLOW_OUTPUT_JSON_FILENAME, bucket, flow_output_json_filename)
            except Exception as e:
                print(f"{func}: Failed to save sequence output to S3: {bucket}/{flow_output_json_filename}")
                raise e

            if NODE_FEATURE_EXISTS_IN_REQUEST:
                print(f"{func}: Update global feature for nodes in sql for current input")
                update_global_feature_node_sql()

            create_global_feature_for_ui()

            print(f"{func}: Save global feature for UI to S3")
            global_feature_filename = f"{SCRATCH_DIR}/intermediate/{unique_id}/global_feature.json"
            try:
                S3_CLIENT.upload_file(TEMP_GLOBAL_FEATURE_FILENAME, bucket, global_feature_filename)
            except Exception as e:
                print(f"{func}: Failed to upload to S3: {bucket}/{global_feature_filename}")
                raise e

            print(f"{func}: Create cluster output for current input")
            create_input_cluster_output()

            if len(flow_output) < 1:
                print(f"{func}: No flows were detected for the input. Skip creating flow ticket output.")
            else:

                print(f"{func}: Update flow in sql. Create flow ticket output")
                update_flow_sql(bucket, unique_id)

            print(f"{func}: Process cluster output to user readable structure. Save cluster, flow output")
            upload_cluster_and_flow_output(bucket, unique_id)
            print(f"{func}: Saved cluster, flow output")

            if CAMPAIGN_MAP == "cluster":
                output_filename = f"{SCRATCH_DIR}/output/flow/{unique_id}/cluster_ticket_output.json"
                source_filename = f"{SCRATCH_DIR}/intermediate/{unique_id}/cluster_ticket_output.json"
                copy_source = {
                    'Bucket': bucket,
                    'Key': source_filename
                }
            else:
                output_filename = f"{SCRATCH_DIR}/output/flow/{unique_id}/flow_ticket_output.json"
                source_filename = f"{SCRATCH_DIR}/intermediate/{unique_id}/flow_ticket_output.json"
                copy_source = {
                    'Bucket': bucket,
                    'Key': source_filename
                }

            try:
                print(f"{func}: Upload {CAMPAIGN_MAP} ticket output file to output for flow in S3")
                S3_CLIENT.copy(copy_source, bucket, output_filename)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{output_filename}")
                raise e

            print(f"{func}: Save ticket output")
            log_json = {"message": "saved ticket output", "bucket": bucket, "key": output_filename}
            print(f"{json.dumps(log_json)}")

            print(f"{func}: Check if next input is present to start processing")
            queue_lookup_json = json.load(open(TEMP_QUEUE_LOOKUP_FILE, "r"))

            if len(queue_lookup_json["input_queue"]) > 0:
                print(f"{func}: Next input file to process: {queue_lookup_json['input_queue'][0]}")
                try:
                    print(f"{func}: Upload {CAMPAIGN_MAP} ticket output file to output for flow in S3")
                    next_input_queue_entry = queue_lookup_json["input_queue"][0]
                    next_input_bucket = next_input_queue_entry.split("/")[0]
                    next_input_key = "/".join(next_input_queue_entry.split("/")[1:])
                    copy_source = {
                        'Bucket': next_input_bucket,
                        'Key': next_input_key
                    }
                    S3_CLIENT.copy(copy_source, next_input_bucket, next_input_key)
                except Exception as e:
                    print(f"{func}: Failed to save file to S3: {next_input_bucket}/{next_input_key}")
                    raise e


        def update_global_feature_node_sql():
            '''
            Update global feature table for current input
            '''
            global TEMP_SQLITE_FILENAME
            global TEMP_NODE_FEATURE_LOOKUP_FILE

            func = "update_global_feature_node_sql"

            node_feature_lookup_json = json.load(open(TEMP_NODE_FEATURE_LOOKUP_FILE, "r"))

            insert_global_feature = """
            INSERT OR IGNORE INTO global_feature(
                feature,
                feature_type,
                added_to_ui
            ) VALUES(
                :feature,
                :feature_type,
                :added_to_ui
            );
            """

            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.execute("PRAGMA foreign_keys = ON;")
                    with closing(conn.cursor()) as cursor:

                        insert_global_feature_params = []

                        for node_key in node_feature_lookup_json.keys():
                            global_feature_params = {
                                "feature": node_key,
                                "feature_type": "node",
                                "added_to_ui": 0
                            }
                            insert_global_feature_params.append(global_feature_params)

                        print(f"{func}: Add node features to table")
                        cursor.executemany(insert_global_feature, insert_global_feature_params)

                    conn.commit()
            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e


        def create_global_feature_for_ui():
            '''
            Create global feature file to update on UI
            '''
            global TEMP_SQLITE_FILENAME
            global TEMP_GLOBAL_FEATURE_FILENAME

            func = "update_global_feature_node_sql"

            select_global_feature_event = """
            SELECT * FROM global_feature
            WHERE added_to_ui = 0 AND feature_type = 'event'
            ;
            """

            select_global_feature_node = """
            SELECT * FROM global_feature
            WHERE added_to_ui = 0 AND feature_type = 'node'
            ;
            """

            update_global_feature_added_to_ui = """
            UPDATE global_feature
            SET added_to_ui = 1
            ;
            """

            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.row_factory = sqlite3.Row
                    conn.execute("PRAGMA foreign_keys = ON;")
                    with closing(conn.cursor()) as cursor:

                        global_feature = {
                            "event_feature": [],
                            "node_feature": []
                        }

                        print(f"{func}: Select global feature event")
                        cursor.execute(select_global_feature_event)

                        result = cursor.fetchall()
                        result = [dict(row) for row in result]
                        global_feature["event_feature"] = [feat["feature"] for feat in result]

                        print(f"{func}: Select global feature node")
                        cursor.execute(select_global_feature_node)

                        result = cursor.fetchall()
                        result = [dict(row) for row in result]
                        global_feature["node_feature"] = [feat["feature"] for feat in result]

                        print(f"{func}: Save global feature")
                        json.dump(global_feature, open(TEMP_GLOBAL_FEATURE_FILENAME, "w"))

                        print(f"{func}: Update sql for features that are used")
                        cursor.execute(update_global_feature_added_to_ui)

                    conn.commit()
            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e


        def create_input_cluster_output():
            '''
            Create user cluster output for input
            '''
            global TEMP_USER_CLUSTER_OUTPUT_FILENAME
            global TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME
            global TEMP_SQLITE_FILENAME

            func = "create_input_cluster_output"

            input_cluster_id_list = json.load(open(TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME, "r"))

            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.row_factory = sqlite3.Row
                    conn.execute("PRAGMA foreign_keys = ON;")
                    with closing(conn.cursor()) as cursor:
                        cluster_id_list = ",".join(map(str, input_cluster_id_list))
                        cluster_id_list = f"({cluster_id_list})"
                        select_cluster_output_query = f"""
                        SELECT
                            co.cluster_id,
                            co.cluster_starttime,
                            co.cluster_endtime,
                            co.cluster_srcips,
                            co.cluster_dstips,
                            co.cluster_techs,
                            co.cluster_tacs,
                            co.cluster_stages,
                            GROUP_CONCAT(e.alert_id) AS alert_ids
                        FROM
                            cluster_output co
                        INNER JOIN
                            event e ON co.cluster_id = e.cluster_id
                        WHERE co.cluster_id IN {cluster_id_list}
                        GROUP BY
                            co.cluster_id
                        ;
                        """
                        cursor.execute(select_cluster_output_query)
                        rows = cursor.fetchall()

                        user_cluster_output = []

                        for row in rows:
                            row = dict(row)
                            alert_ids_list = row["alert_ids"].split(",")
                            if SKIP_SINGLE_ALERT and len(alert_ids_list) <= 1:
                                continue

                            alert_ids_list = [alert_id[:-40] for alert_id in alert_ids_list]
                            splunk_query = ") OR (".join(alert_ids_list)
                            splunk_query = f"({splunk_query})"

                            cluster = {
                                "cluster_id": row["cluster_id"],
                                "ids": alert_ids_list,
                                "splunk_query": splunk_query,
                                "start_time": row["cluster_starttime"],
                                "end_time": row["cluster_endtime"],
                                "src": json.loads(row["cluster_srcips"]),
                                "dst": json.loads(row["cluster_dstips"]),
                                "cluster_techs": json.loads(row["cluster_techs"]),
                                "cluster_tacs": json.loads(row["cluster_tacs"]),
                                "cluster_stages": json.loads(row["cluster_stages"])
                            }

                            user_cluster_output.append(cluster)
            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e

            with open(TEMP_USER_CLUSTER_OUTPUT_FILENAME, "w") as f:
                json.dump(user_cluster_output, f)


        def upload_cluster_and_flow_output(bucket, unique_id):
            '''
            Upload user cluster output, flow output to output folder, splunk
            Args:
                bucket: Bucket name
                unique_id: Unique id of current batch
            '''
            global S3_CLIENT
            global TEMP_USER_FLOW_OUTPUT_JSON_FILENAME
            global TEMP_USER_CLUSTER_OUTPUT_FILENAME
            global SPLUNK_USER_FLOW_JSON_LINES
            global SPLUNK_USER_CLUSTER_JSON_LINES
            func = "upload_cluster_and_flow_output"

            current_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S %z")
            path_to_user_cluster_output = f"output/{current_timestamp}/cluster.json"
            path_to_user_flow_output = f"output/{current_timestamp}/flow.json"

            flow_exists = os.path.exists(TEMP_USER_FLOW_OUTPUT_JSON_FILENAME)

            print(f"{func}: Save user cluster and flow output to S3: {path_to_user_cluster_output}, {path_to_user_flow_output}")
            try:
                S3_CLIENT.upload_file(TEMP_USER_CLUSTER_OUTPUT_FILENAME, bucket, path_to_user_cluster_output)
                print(f"{func}: Saved user cluster output to S3")
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{path_to_user_cluster_output}")
                raise e

            if flow_exists:
                try:
                    S3_CLIENT.upload_file(TEMP_USER_FLOW_OUTPUT_JSON_FILENAME, bucket, path_to_user_flow_output)
                    print(f"{func}: Saved user flow output to S3")
                except Exception as e:
                    print(f"{func}: Failed to save file to S3: {bucket}/{path_to_user_flow_output}")
                    raise e

            # upload to splunk
            path_to_splunk_flow_output = f"splunk/flow_{unique_id}.json"
            path_to_splunk_cluster_output = f"splunk/cluster_{unique_id}.json"

            # convert to json lines format

            if flow_exists:
                with open(TEMP_USER_FLOW_OUTPUT_JSON_FILENAME, "r") as f_read, open(SPLUNK_USER_FLOW_JSON_LINES, "w") as f_write:
                    user_cluster_output = json.load(f_read)
                    for cluster in user_cluster_output:
                        f_write.write(json.dumps(cluster) + "\n")
            
            with open(TEMP_USER_CLUSTER_OUTPUT_FILENAME, "r") as f_read, open(SPLUNK_USER_CLUSTER_JSON_LINES, "w") as f_write:
                user_cluster_output = json.load(f_read)
                for cluster in user_cluster_output:
                    f_write.write(json.dumps(cluster) + "\n")

            print(f"{func}: Save flow output to S3 for splunk: {path_to_splunk_flow_output}")

            if flow_exists:
                try:
                    S3_CLIENT.upload_file(SPLUNK_USER_FLOW_JSON_LINES, bucket, path_to_splunk_flow_output)
                    print(f"{func}: Saved flow output to S3")
                except Exception as e:
                    print(f"{func}: Failed to save file to S3: {bucket}/{path_to_splunk_flow_output}")
                    raise e

            try:
                S3_CLIENT.upload_file(SPLUNK_USER_CLUSTER_JSON_LINES, bucket, path_to_splunk_cluster_output)
                print(f"{func}: Saved cluster output to S3")
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{path_to_splunk_cluster_output}")
                raise e


        def update_flow_sql(bucket, unique_id):
            '''
            Update flow output sqlite and create flow ticket output
            Args:
                bucket: Bucket name
                unique_id: Unique id of chunk of input
            '''
            global SCRATCH_DIR
            global S3_CLIENT
            global TEMP_SQLITE_FILENAME
            global TEMP_FLOW_OUTPUT_JSON_FILENAME
            global TEMP_USER_FLOW_OUTPUT_JSON_FILENAME
            global TEMP_FLOW_TICKET_OUTPUT_FILENAME
            global TEMP_TICKET_ID_LOOKUP_FILE

            func = "update_flow_sql"

            flow_output_json = json.load(open(TEMP_FLOW_OUTPUT_JSON_FILENAME, "r"))

            print(f"{func}: Number of flows: {len(flow_output_json)}")

            # create ticket id
            ticket_id_json = json.load(open(TEMP_TICKET_ID_LOOKUP_FILE, "r"))
            ticket_id_start = ticket_id_json["last_ticket_id"]

            insert_flow_output_query = """
            INSERT OR IGNORE INTO flow_output (
                flow_id,
                cluster_prob,
                alert_ids
            )
            VALUES (
                :flow_id,
                :cluster_prob,
                :alert_ids
            )
            ;
            """

            try:
                with open(TEMP_FLOW_TICKET_OUTPUT_FILENAME, "w") as f:
                    with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                        conn.row_factory = sqlite3.Row
                        conn.execute("PRAGMA foreign_keys = ON;")
                        with closing(conn.cursor()) as cursor:
                            insert_flow_output_params = []
                            for flow in flow_output_json:
                                flow["flow_id"] = flow["Flow_id"]
                                flow.pop("Flow_id")
                                flow["ids"] = []

                                ticket_id_start += 1
                                flow_ticket = copy.deepcopy(flow)
                                flow_ticket.pop("cluster_ids")
                                flow_ticket["ticket_id"] = ticket_id_start

                                cluster_id_list = ",".join(map(str, flow["cluster_ids"]))
                                cluster_id_list = f"({cluster_id_list})"
                                select_event_query = f"""
                                SELECT
                                    cluster_id,
                                    MIN(time) AS start_time,
                                    MAX(time) AS end_time,
                                    GROUP_CONCAT(DISTINCT src) AS unique_src,
                                    GROUP_CONCAT(DISTINCT dst) AS unique_dst,
                                    GROUP_CONCAT(tech,';') AS combined_tech,
                                    GROUP_CONCAT(tac,';') AS combined_tac,
                                    GROUP_CONCAT(stage,';') AS combined_stage,
                                    GROUP_CONCAT(alert_id) AS alert_ids
                                FROM
                                    event
                                WHERE cluster_id IN {cluster_id_list}
                                GROUP BY cluster_id
                                ORDER BY cluster_id
                                ;
                                """
                                cursor.execute(select_event_query)

                                clusters = cursor.fetchall()
                                clusters = [dict(cluster) for cluster in clusters]

                                cluster_df = pd.DataFrame(clusters)

                                flow_ticket["start_time"] = cluster_df["start_time"].min()
                                flow_ticket["end_time"] = cluster_df["end_time"].max()

                                flow_ticket["involved_entities"] = []
                                flow_ticket["involved_techs"] = []
                                flow_ticket["involved_tacs"] = []
                                flow_ticket["involved_stages"] = []

                                cluster_df = cluster_df.apply(process_row, axis=1)
                                for index, row_cluster in cluster_df.iterrows():

                                    flow_ticket["involved_entities"] = list(set(flow_ticket["involved_entities"] + row_cluster["involved_entities"]))
                                    flow_ticket["involved_techs"] = list(set(flow_ticket["involved_techs"] + row_cluster["involved_techs"]))
                                    flow_ticket["involved_tacs"] = list(set(flow_ticket["involved_tacs"] + row_cluster["involved_tacs"]))
                                    flow_ticket["involved_stages"] = list(set(flow_ticket["involved_stages"] + row_cluster["involved_stages"]))
                                    flow_ticket["ids"] = list(set(flow_ticket["ids"] + row_cluster["involved_events"]))

                                flow_ticket["metrics"] = get_global_metrics_from_cluster_sql(cluster_df)

                                f.write(json.dumps(flow_ticket) + "\n")

                                flow["ids"] = flow_ticket["ids"]

                                flow.pop("cluster_ids")

                                params = {
                                    "flow_id": flow["flow_id"],
                                    "cluster_prob": flow["cluster_prob"],
                                    "alert_ids": json.dumps(flow["ids"])
                                }
                                insert_flow_output_params.append(params)

                            print(f"{func}: Insert flow output to table")
                            cursor.executemany(insert_flow_output_query, insert_flow_output_params)

                            print(f"{func}: Save ticket id lookup file")
                            ticket_id_json["last_ticket_id"] = ticket_id_start
                            json.dump(ticket_id_json, open(TEMP_TICKET_ID_LOOKUP_FILE, "w"))
                            save_ticket_id_lookup_file(bucket)
            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e

            print(f"{func}: Save flow output for user")
            json.dump(flow_output_json, open(TEMP_USER_FLOW_OUTPUT_JSON_FILENAME, "w"))

            try:
                print(f"{func}: Upload flow ticket output file to S3")
                flow_ticket_output_filename = f"{SCRATCH_DIR}/intermediate/{unique_id}/flow_ticket_output.json"
                S3_CLIENT.upload_file(TEMP_FLOW_TICKET_OUTPUT_FILENAME, bucket, flow_ticket_output_filename)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{flow_ticket_output_filename}")
                raise e


        def process_row(row):
            combined_tech = row['combined_tech']
            combined_tech = combined_tech.split(";")
            tech_list = []
            for techs in combined_tech:
                tech_list += json.loads(techs)
            tech_list = list(set(tech_list))

            combined_tac = row['combined_tac']
            combined_tac = combined_tac.split(";")
            tac_list = []
            for tacs in combined_tac:
                tac_list += json.loads(tacs)
            tac_list = list(set(tac_list))

            combined_stage = row['combined_stage']
            combined_stage = combined_stage.split(";")
            stage_list = []
            for stage in combined_stage:
                stage_list += json.loads(stage)
            stage_list = list(set(stage_list))

            row["involved_entities"] = list(set(row["unique_src"].split(",") + row["unique_dst"].split(",")))
            row["involved_techs"] = tech_list
            row["involved_tacs"] = tac_list
            row["involved_stages"] = stage_list
            row["involved_events"] = row["alert_ids"].split(",")
            return row


        def get_global_metrics_from_cluster_sql(cluster_df: pd.DataFrame):
            '''
            Get global metrics from cluster
            Args:
                cluster_df: dataframe of cluster output for which metrics is to be calculated
            Returns:
                metrics dictionary
            '''
            func = "get_global_metrics_from_cluster_sql"

            metrics = {}
            metrics_cols = ["tech", "tac", "stage", "event", "entity"]
            mapped_metric_cols = ["involved_techs", "involved_tacs", "involved_stages", "involved_events", "involved_entities"]

            for metric_col, mapped_metric_col in zip(metrics_cols, mapped_metric_cols):
                cluster_df[f'{metric_col}_count'] = cluster_df[mapped_metric_col].apply(lambda x: float(len(x)))

                metrics[metric_col] = {
                    "avg": cluster_df[f"{metric_col}_count"].mean(),
                    "min": cluster_df[f"{metric_col}_count"].min(),
                    "max": cluster_df[f"{metric_col}_count"].max(),
                    "median": cluster_df[f"{metric_col}_count"].median(),
                    "q1": cluster_df[f"{metric_col}_count"].quantile(0.25),
                    "q3": cluster_df[f"{metric_col}_count"].quantile(0.75)
                }

                if metric_col == "event":
                    metrics[metric_col]["count"] = float(cluster_df[f"{metric_col}_count"].sum())

            # time range metric
            cluster_df["start_time"] = pd.to_datetime(cluster_df['start_time'], unit="s")
            cluster_df["end_time"] = pd.to_datetime(cluster_df['end_time'], unit="s")
            cluster_df["time_range"] = (cluster_df["end_time"] - cluster_df["start_time"]).dt.total_seconds()
            metrics["time_range"] = {
                "avg": cluster_df["time_range"].mean(),
                "min": cluster_df["time_range"].min(),
                "max": cluster_df["time_range"].max(),
                "median": cluster_df["time_range"].median(),
                "q1": cluster_df["time_range"].quantile(0.25),
                "q3": cluster_df["time_range"].quantile(0.75)
            }

            # for _, row in cluster_ticket_output_for_flow.iterrows():
            #     unique_other_attributes = row["metrics"].keys()

            return metrics


        def save_ticket_id_lookup_file(bucket):
            '''
            Save ticket id lookup file to S3
            Args:
                bucket: The S3 bucket name
            '''
            global TICKET_ID_OBJECT_KEY
            global S3_CLIENT
            global TEMP_TICKET_ID_LOOKUP_FILE
            func = "save_ticket_id_lookup_file"
            try:
                print(f"{func}: Upload ticket id lookup file to S3")
                S3_CLIENT.upload_file(TEMP_TICKET_ID_LOOKUP_FILE, bucket, TICKET_ID_OBJECT_KEY)
            except Exception as e:
                print(f"{func}: Failed to save file to S3: {bucket}/{TICKET_ID_OBJECT_KEY}")
                raise e


        def fetch_or_create_ticket_id_lookup_file(bucket):
            '''
            Check if the ticket_id lookup file exists on S3. Create if it does not
            Args:
                bucket: The S3 bucket name
            '''
            global TICKET_ID_OBJECT_KEY
            global S3_CLIENT
            global TEMP_TICKET_ID_LOOKUP_FILE

            func = "fetch_or_create_ticket_id_lookup_file"

            try:
                print(
                    f"{func}: Checking if ticket id lookup file exists on S3 path {bucket}/{TICKET_ID_OBJECT_KEY}")

                S3_CLIENT.head_object(Bucket=bucket, Key=TICKET_ID_OBJECT_KEY)

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: Ticket id lookup file does not exist on S3. Create empty table")

                internal_id_to_unique_id_json = {"last_ticket_id": 0}
                json.dump(internal_id_to_unique_id_json, open(TEMP_TICKET_ID_LOOKUP_FILE, "w"))

                print(f"{func}: Initiated ticket id")
            else:
                try:
                    print(f"{func}: Download available internal id to unique id lookup file from S3")

                    S3_CLIENT.download_file(bucket, TICKET_ID_OBJECT_KEY, TEMP_TICKET_ID_LOOKUP_FILE)

                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download lookup file from S3: {bucket}/{TICKET_ID_OBJECT_KEY}")
                    raise e


        def download_intermediate_input_files(bucket, unique_id):
            '''
            Download intermediate files for processing.
            Args:
                bucket: Bucket name
                unique_id: unique id for current batch
            '''
            global S3_CLIENT
            global SCRATCH_DIR
            global PATH_TO_SQLITE
            global TEMP_SQLITE_FILENAME
            global TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME
            global QUEUE_LOOKUP_FILE
            global TEMP_QUEUE_LOOKUP_FILE
            global NODE_FEATURE_EXISTS_IN_REQUEST
            global TEMP_NODE_FEATURE_INPUT_FILENAME
            global NODE_FEATURE_LOOKUP_FILE
            global TEMP_NODE_FEATURE_LOOKUP_FILE

            func = "download_intermediate_input_files"

            try:
                print(f"{func}: Download sqlite file from S3")
                S3_CLIENT.download_file(bucket, PATH_TO_SQLITE, TEMP_SQLITE_FILENAME)
            except Exception as e:
                print(f"{func}: Failed to download file: {bucket}/{PATH_TO_SQLITE}")
                raise e

            try:
                print(f"{func}: Download queue lookup file from S3")
                S3_CLIENT.download_file(bucket, QUEUE_LOOKUP_FILE, TEMP_QUEUE_LOOKUP_FILE)
            except Exception as e:
                print(f"{func}: Failed to download file: {bucket}/{QUEUE_LOOKUP_FILE}")
                raise e

            path_to_cluster_id_list = f"{SCRATCH_DIR}/intermediate/{unique_id}/input_cluster_id_list.json"
            print(f"{func}: Save file to S3: {bucket}/{path_to_cluster_id_list}")

            try:
                S3_CLIENT.download_file(bucket, path_to_cluster_id_list, TEMP_INPUT_CLUSTER_ID_LIST_JSON_FILENAME)
                print(f"{func}: Uploaded file to S3: {bucket}/{path_to_cluster_id_list}")
            except Exception as e:
                print(f"{func}: Failed to upload file to S3: {bucket}/{path_to_cluster_id_list}")
                raise e

            path_to_node_feature_from_input = f"{SCRATCH_DIR}/intermediate/{unique_id}/node_feature.json"
            try:
                print(f"{func}: Checking if node feaures file exists on S3 path {bucket}/{path_to_node_feature_from_input}")
                S3_CLIENT.head_object(Bucket=bucket, Key=path_to_node_feature_from_input)
                NODE_FEATURE_EXISTS_IN_REQUEST = True

            except ClientError as e:
                if e.response['Error']['Code'] == '404':
                    print(f"{func}: Node feaures file does not exist")
                else:
                    raise e
            else:
                try:
                    S3_CLIENT.download_file(bucket, NODE_FEATURE_LOOKUP_FILE, TEMP_NODE_FEATURE_LOOKUP_FILE)
                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download file: {bucket}/{NODE_FEATURE_LOOKUP_FILE}")
                    raise e


        def upload_error_to_s3(bucket, error_message):
            '''
            Upload error message to S3
            Args:
                error_message: The error message to upload
                unique_id: Unique ID for identifying the error log
            '''
            global S3_CLIENT
            error_log_key = "output/error_log.txt"
            try:
                S3_CLIENT.put_object(Bucket=bucket, Key=error_log_key, Body=error_message)
                print(f"Uploaded error log to s3://{bucket}/{error_log_key}")
            except Exception as e:
                print(f"Failed to upload error log to S3. Error: {str(e)}")


        def clear_temp_dir():
            rm_list = glob.glob("/tmp/**", recursive=True)
            for f in rm_list:
                if os.path.isfile(f):
                    os.remove(f)

      Handler: index.lambda_handler
      Runtime: python3.11
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Layers:
        - !FindInMap
          - RegionMap
          - !Ref AWS::Region
          - lambdaLayer
      Environment:
        Variables:
          campaign_map: !Ref ClusterOrFlowMapToCampaign
          flow_input_window_size: !Ref FlowInputMaxClusters
      Events:
        S3ObjectCreated:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: scratch/response/flow_out/
                  - Name: suffix
                    Value: .out
  processFlowLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${processFlow}
  processFlowLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref processFlow
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  updateLookupTable:
    Type: AWS::Serverless::Function
    Properties:
      FunctionName: !Ref UpdateLookupFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: updateLookupTable
      InlineCode: |
        '''
        Update technique lookup table
        Input: Row to be added to lookup table
        '''

        import os
        import json
        import glob
        import boto3
        from botocore.exceptions import ClientError
        import pandas as pd

        TECHNIQUE_LOOKUP_OBJECT = os.getenv("technique_lookup_object")

        if not TECHNIQUE_LOOKUP_OBJECT:
            raise ValueError("Please enter environment variable: 'technique_lookup_object'")

        S3_CLIENT = boto3.client("s3")

        TEMP_INPUT_FILENAME = "/tmp/classification_response.csv"

        TEMP_NEW_LOOKUP_TABLE_FILENAME = "/tmp/new_lookup_table.csv"

        TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE = "/tmp/technique_lookup.csv"


        def lambda_handler(event, context):
            '''
            Entrypoint from the trigger setup from lambda
            Args:
                event: Event triggered
                context: Context of the lambda function
            '''
            global S3_CLIENT
            global TEMP_INPUT_FILENAME
            global TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE

            func = "lambda_handler"

            print(f"{func}: Received event: {json.dumps(event)}")

            bucket = event['Records'][0]['s3']['bucket']['name']
            input_filename = event['Records'][0]['s3']['object']['key']

            clear_temp_dir()

            print(f"{func}: Download file to process from S3 bucket: {bucket}/{input_filename}")

            try:
                S3_CLIENT.download_file(bucket, input_filename, TEMP_INPUT_FILENAME)
                print(f"{func}: File to process download completed")
            except Exception as e:
                print(f"{func}: Failed to download file to process")
                raise e

            fetch_or_create_technique_classification_lookup_file(bucket)

            print(f"{func}: Update lookup table")

            update_lookup_table(bucket)
            clean_up(bucket, input_filename)

            print(f"{func}: Lookup table updated.")


        def update_lookup_table(bucket):
            '''
            Update lookup table
            Args:
                bucket: Bucket name
            '''

            global TEMP_INPUT_FILENAME
            global TEMP_NEW_LOOKUP_TABLE_FILENAME
            global S3_CLIENT
            global TECHNIQUE_LOOKUP_OBJECT
            global TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE

            func = "update_lookup_table"

            technique_classification_lookup_df = pd.read_csv(TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE)
            before_len_technique_classification_lookup_df = len(technique_classification_lookup_df)

            classification_response_df = pd.read_csv(TEMP_INPUT_FILENAME)

            new_technique_classification_lookup_df = pd.concat([technique_classification_lookup_df, classification_response_df])
            new_technique_classification_lookup_df_no_dupe = new_technique_classification_lookup_df.drop_duplicates(subset=["alerts"], keep="first")
            after_len_technique_classification_lookup_df = len(new_technique_classification_lookup_df_no_dupe)

            new_technique_classification_lookup_df_no_dupe.to_csv(TEMP_NEW_LOOKUP_TABLE_FILENAME, index=False)

            try:
                S3_CLIENT.upload_file(TEMP_NEW_LOOKUP_TABLE_FILENAME, bucket, TECHNIQUE_LOOKUP_OBJECT)
            except Exception as e:
                print(f"{func}: Failed to save lookup table to S3: {bucket}/{TECHNIQUE_LOOKUP_OBJECT}")
                raise e

            print(f"{func}: Added number of rows: {after_len_technique_classification_lookup_df - before_len_technique_classification_lookup_df}")


        def fetch_or_create_technique_classification_lookup_file(bucket):
            '''
            Fetch or create lookup file
            Args:
                bucket: Bucket name
            '''

            global TECHNIQUE_LOOKUP_OBJECT
            global S3_CLIENT
            global TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE

            func = "fetch_or_create_technique_classification_lookup_file"
            try:
                print(
                    f"{func}: Checking if technique classification lookup file for alerts exists on S3 path {bucket}/{TECHNIQUE_LOOKUP_OBJECT}")

                S3_CLIENT.head_object(Bucket=bucket, Key=TECHNIQUE_LOOKUP_OBJECT)

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: Technique classification lookup file for alerts does not exist on S3. Create empty table")

                technique_classification_lookup_df = pd.DataFrame(columns=["alerts", "techniques"])
                technique_classification_lookup_df.to_csv(TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE, index=False)

                print(f"{func}: Empty table created")
            else:
                try:
                    print(f"{func}: Download available technique classification lookup file for alerts from S3")

                    S3_CLIENT.download_file(bucket, TECHNIQUE_LOOKUP_OBJECT, TEMP_TECHNIQUE_CLASSIFICATION_LOOKUP_FILE)

                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download lookup table from S3: {bucket}/{TECHNIQUE_LOOKUP_OBJECT}")
                    raise e


        def clean_up(bucket, input_filename):
            '''
            Delete input file
            Args:
                bucket: Bucket name
                input_filename: S3 object to delete
            '''
            global S3_CLIENT

            func = "clean_up"

            try:
                print(f"{func}: Delete message payload from S3")
                S3_CLIENT.delete_object(Bucket=bucket, Key=input_filename)
                print(f"{func}: Message payload deleted")
            except Exception as e:
                print(f"{func}: Failed to delete message payload from S3. You can manually delete the object: {bucket}/{input_filename}")


        def clear_temp_dir():
            rm_list = glob.glob("/tmp/**", recursive=True)
            for f in rm_list:
                if os.path.isfile(f):
                    os.remove(f)

      Handler: index.lambda_handler
      Runtime: python3.11
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      ReservedConcurrentExecutions: 1
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Layers:
        - !FindInMap
          - RegionMap
          - !Ref AWS::Region
          - lambdaLayer
      Environment:
        Variables:
          technique_lookup_object: !Ref TechniqueLookupObject
      Events:
        S3ObjectCreated:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: scratch/queue/
                  - Name: suffix
                    Value: .csv
  updateLookupTableLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${updateLookupTable}
  updateLookupTableLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref updateLookupTable
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  createCampaign:
    Type: AWS::Serverless::Function
    DependsOn: LoadBalancer
    Properties:
      FunctionName: !Ref CreateCampaignFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: createCampaign
      InlineCode: |
        '''
        This lambda function will create campaigns, events on the UI.

        Input: cluster_ticket_output.json
        '''

        import gzip
        import json
        import os
        import time
        import urllib
        import glob

        import boto3
        import pandas as pd
        import requests

        UI_LB_URL = os.getenv("ui_lb_url")

        EVENT_THRESHOLD = int(os.getenv("event_threshold"))

        TAC_THRESHOLD = int(os.getenv("tac_threshold"))

        UI_USERNAME = os.getenv("ui_username")

        UI_PASSWORD = os.getenv("ui_password")

        CAMPAIGN_MAP = os.getenv("campaign_map").lower()

        missing_variables = []
        if UI_LB_URL is None:
            missing_variables.append("ui_lb_url")
        if EVENT_THRESHOLD is None:
            missing_variables.append("event_threshold")
        if TAC_THRESHOLD is None:
            missing_variables.append("tac_threshold")
        if UI_USERNAME is None:
            missing_variables.append("ui_username")
        if UI_PASSWORD is None:
            missing_variables.append("ui_password")
        if CAMPAIGN_MAP is None:
            missing_variables.append("campaign_map")

        if missing_variables:
            raise ValueError(f"Please enter environment variable(s): {missing_variables}")

        if CAMPAIGN_MAP not in ["flow", "cluster"]:
            raise ValueError(f"Valid values for 'campaign_map' environemnt variable are 'flow', 'cluster'. Entered value is: '{CAMPAIGN_MAP}'")

        CONTENT_TYPE = "application/json"

        SCRATCH_DIR = "scratch"

        TEMP_CLUSTER_TICKET_OUTPUT = "/tmp/cluster_ticket_output.json"
        TEMP_FLOW_TICKET_OUTPUT = "/tmp/flow_ticket_output.json"

        TEMP_GLOBAL_METRICS_FILENAME = "/tmp/global_metrics.json"
        TEMP_GLOBAL_FEATURE_FILENAME = "/tmp/global_feature.json"

        HOST = f"http://{UI_LB_URL}:8000/"

        S3_CLIENT = boto3.client("s3")


        def lambda_handler(event, context):
            '''
            Entrypoint from the trigger setup from lambda
            Args:
                event: Event triggered
                context: Context of the lambda function
            '''
            global TEMP_CLUSTER_TICKET_OUTPUT
            global TEMP_FLOW_TICKET_OUTPUT
            global SCRATCH_DIR
            global S3_CLIENT
            global TEMP_GLOBAL_METRICS_FILENAME
            global TEMP_GLOBAL_FEATURE_FILENAME

            func = "lambda_handler"

            bucket = event["Records"][0]["s3"]["bucket"]["name"]
            try:
                input_file = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"], encoding="utf-8")

                clear_temp_dir()
                unique_id = input_file.split("/")[-2]

                print(f"{func}: input file: {bucket}/{input_file}")

                temp_input_file = TEMP_CLUSTER_TICKET_OUTPUT
                if CAMPAIGN_MAP == "flow":
                    temp_input_file = TEMP_FLOW_TICKET_OUTPUT

                try:
                    print(f"{func}: Download the input file")
                    S3_CLIENT.download_file(bucket, input_file, temp_input_file)
                except Exception as e:
                    print(f"{func}: Failed to download file from S3: {bucket}/{input_file}")
                    raise e
                
                if CAMPAIGN_MAP == "flow":
                    try:
                        print(f"{func}: Download intermediate file")
                        cluster_ticket_output_file = f"{SCRATCH_DIR}/intermediate/{unique_id}/cluster_ticket_output.json"
                        S3_CLIENT.download_file(bucket, cluster_ticket_output_file, TEMP_CLUSTER_TICKET_OUTPUT)
                    except Exception as e:
                        print(f"{func}: Failed to download file from S3: {bucket}/{cluster_ticket_output_file}")
                        raise e
                global_metrics_filename = f"{SCRATCH_DIR}/intermediate/{unique_id}/global_metrics.json"
                try:
                    print(f"{func}: Download the global metrics file")
                    S3_CLIENT.download_file(bucket, global_metrics_filename, TEMP_GLOBAL_METRICS_FILENAME)
                except Exception as e:
                    print(f"{func}: Failed to download file from S3: {bucket}/{global_metrics_filename}")
                    raise e

                print(f"{func}: Download global feature file")
                global_feature_filename = f"{SCRATCH_DIR}/intermediate/{unique_id}/global_feature.json"
                try:
                    print(f"{func}: Download the global feature file")
                    S3_CLIENT.download_file(bucket, global_feature_filename, TEMP_GLOBAL_FEATURE_FILENAME)
                except Exception as e:
                    print(f"{func}: Failed to download file from S3: {bucket}/{global_feature_filename}")
                    raise e

                print("Starting campaign creation")
                create_campaigns()
                print(f"{func}: Complete campaign creation")
            except Exception as ex:
                print(f"{func}: Exception occurred while running lambda function. Uploading error file to S3.")
                error_message = f"Error occurred in lambda function: {context.function_name}. More details can be found in CloudWatch Logs for this lambda function. The exception message is: {ex}"
                upload_error_to_s3(bucket, error_message)
                raise ex


        def create_campaigns():
            '''
            Create events, campaigns. Where campaigns could be a flow or a cluster.
            '''
            global TEMP_CLUSTER_TICKET_OUTPUT
            global CAMPAIGN_MAP
            global EVENT_THRESHOLD
            global HOST
            func = "create_campaign"

            retry_limit = 3
            retry_delay = 5

            print(f"{func}: Get UI session")
            for attempt in range(retry_limit):
                try:
                    ui_cookies, ui_headers, ui_session = get_ui_session()
                except Exception as e:
                    print(f"Attempt {attempt + 1} failed: {e}")
                    if attempt < retry_limit - 1:
                        print(f"Retrying in {retry_delay} seconds")
                        time.sleep(retry_delay)
                    else:
                        print("Max retries reached. Failed to get session.")
                        raise

            create_global_metrics(ui_cookies, ui_headers, ui_session)

            create_global_features(ui_cookies, ui_headers, ui_session)

            if CAMPAIGN_MAP == "cluster":
                create_campaign_for_cluster(ui_cookies, ui_headers, ui_session)
            else:
                create_campaign_for_flow(ui_cookies, ui_headers, ui_session)


        def create_global_metrics(ui_cookies, ui_headers, ui_session):
            '''
            Update or upload global metrics to UI
            '''
            global S3_CLIENT
            global SCRATCH_DIR
            global TEMP_GLOBAL_METRICS_FILENAME
            func = "create_global_metrics"

            global_metrics_json = json.load(open(TEMP_GLOBAL_METRICS_FILENAME))
            payload = {"metric": global_metrics_json}

            try:
                response_from_ui = ui_session.post(f"{HOST}api/v2/globalmetric/", json=payload, cookies=ui_cookies, headers=ui_headers)
                print(f"{func}: Got response", response_from_ui)
                response_from_ui.raise_for_status()
                print(f"{func}: Global metrics added successful")
            except requests.exceptions.RequestException as req_err:
                print("Request error occurred.")
                raise req_err
            except Exception as e:
                print(f"{func}: An unexpected error occurred")
                raise e


        def create_global_features(ui_cookies, ui_headers, ui_session):
            '''
            Update or upload global metrics to UI
            '''
            global S3_CLIENT
            global SCRATCH_DIR
            global TEMP_GLOBAL_FEATURE_FILENAME
            func = "create_global_features"

            global_feature_json = json.load(open(TEMP_GLOBAL_FEATURE_FILENAME))

            if len(global_feature_json["event_feature"]) < 1 and len(global_feature_json["node_feature"]) < 1:
                print(f"{func}: No new global features to add. Skip adding global features")
                return

            try:
                response_from_ui = ui_session.post(f"{HOST}api/v2/globalfeature/bulk_create/", json=global_feature_json, cookies=ui_cookies, headers=ui_headers)
                print(f"{func}: Got response", response_from_ui)
                response_from_ui.raise_for_status()
                print(f"{func}: Global features added successful")
            except requests.exceptions.RequestException as req_err:
                print("Request error occurred.")
                raise req_err
            except Exception as e:
                print(f"{func}: An unexpected error occurred")
                raise e


        def create_campaign_for_cluster(ui_cookies, ui_headers, ui_session):
            '''
            Create campaign for cluster
            Args:
                ui_cookies: UI cookies
                ui_headers: UI header
                ui_session: UI session
            '''
            global TEMP_CLUSTER_TICKET_OUTPUT
            global CAMPAIGN_MAP
            global EVENT_THRESHOLD
            global HOST
            func = "create_campaign_for_cluster"
            try:

                with open(TEMP_CLUSTER_TICKET_OUTPUT, "r") as f:
                    for line in f:
                        row_cluster = json.loads(line)

                        print(f"{func}: Evaluating for {CAMPAIGN_MAP}: {row_cluster['cluster_id']}")

                        involved_events_df = pd.DataFrame(row_cluster["involved_events"])

                        involved_events_df['time_df'] = pd.to_datetime(involved_events_df['time'], unit="s")
                        involved_events_df['time_df'] = involved_events_df['time_df'].dt.strftime('%Y-%m-%d %H:%M:%S')

                        events = create_events_list_for_ui(involved_events_df)

                        if len(events) < EVENT_THRESHOLD:
                            print(
                                f"{func}: Length of list of events is less than threshold set. Skip creation of campaign. length: {len(events)}")
                            continue

                        files, payload = create_payload_to_create_campaign_and_events_in_ui(events, row_cluster['cluster_id'])

                        print(f"{func}: Sending campaign create request")

                        try:
                            response_from_ui = ui_session.post(f"{HOST}api/v2/campaign/", data=payload, files=files, cookies=ui_cookies, headers=ui_headers)
                            print(f"{func}: Got response", response_from_ui)
                            response_from_ui.raise_for_status()
                            print(f"{func}: Campaign creation successful")
                        except Exception as e:
                            print(f"{func}: Failed to create campaign")
                            raise e

            except requests.exceptions.RequestException as req_err:
                print("Request error occurred.")
                raise req_err
            except Exception as e:
                print(f"{func}: An unexpected error occurred")
                raise e


        def create_campaign_for_flow(ui_cookies, ui_headers, ui_session):
            '''
            Create campaign for flow
            Args:
                ui_cookies: UI cookies
                ui_headers: UI header
                ui_session: UI session
            '''
            global TEMP_FLOW_TICKET_OUTPUT
            global TEMP_CLUSTER_TICKET_OUTPUT
            global CAMPAIGN_MAP
            global EVENT_THRESHOLD
            global HOST
            func = "create_campaign_for_flow"
            try:
                flow_ticket_output_df = pd.read_json(TEMP_FLOW_TICKET_OUTPUT, lines=True)
                cluster_ticket_output_df = pd.read_json(TEMP_CLUSTER_TICKET_OUTPUT, lines=True)
                
                if len(flow_ticket_output_df) < 1:
                    print(f"{func}: There is no {CAMPAIGN_MAP} detected for input. No campaigns to push to UI.")
                    exit(0)

                for _, row_flow in flow_ticket_output_df.iterrows():
                    print(f"{func}: Evaluating for {CAMPAIGN_MAP}: {row_flow['flow_id']}")

                    involved_clusters_df = cluster_ticket_output_df[cluster_ticket_output_df["ticket_id"] == row_flow["cluster_ticket_ids"]]
                    involved_events = involved_clusters_df['involved_events'].sum()
                    involved_events_df = pd.DataFrame(involved_events)

                    involved_events_df['time_df'] = pd.to_datetime(involved_events_df['time'], unit="s")
                    involved_events_df['time_df'] = involved_events_df['time_df'].dt.strftime('%Y-%m-%d %H:%M:%S')

                    events = create_events_list_for_ui(involved_events_df)

                    if len(events) < EVENT_THRESHOLD:
                        print(
                            f"{func}: Length of list of events is less than threshold set. Skip creation of campaign. length: {len(events)}")
                        continue

                    files, payload = create_payload_to_create_campaign_and_events_in_ui(events, row_flow['flow_id'])

                    print(f"{func}: Sending campaign create request")

                    try:
                        response_from_ui = ui_session.post(f"{HOST}api/v2/campaign/", data=payload, files=files, cookies=ui_cookies, headers=ui_headers)
                        print(f"{func}: Got response", response_from_ui)
                        response_from_ui.raise_for_status()
                        print(f"{func}: Campaign creation successful")
                    except Exception as e:
                        print(f"{func}: Failed to create campaign")
                        raise e

            except requests.exceptions.RequestException as req_err:
                print("Request error occurred.")
                raise req_err
            except Exception as e:
                print(f"{func}: An unexpected error occurred")
                raise e


        def create_payload_to_create_campaign_and_events_in_ui(events, campaign):
            '''
            Create payload for creating campaign and event in UI
            Args:
                events: list of events
                campaign: campaign name
            Return:
                files: The zip file that contains events data
                payload: JSON object to create campaign
            '''
            global CAMPAIGN_MAP
            req = json.dumps(events)
            req = gzip.compress(bytes(req, "utf-8"))
            open("/tmp/req.gzip", "wb").write(req)
            payload = {"name": f"{CAMPAIGN_MAP}-{campaign}", "description": "automatically created alerts", "auto_extract_iocs": "true"}
            files = [('details', ('req.gzip', open('/tmp/req.gzip', 'rb'), 'application/octet-stream'))]
            return files, payload


        def create_events_list_for_ui(involved_events_df: pd.DataFrame):
            '''
            Create list of events for a campaign
            Args:
                involved_events_df: pandas dataframe of involved events
            Return:
                events: List of events for UI
            '''
            global TAC_THRESHOLD
            events = []

            for _, row in involved_events_df.iterrows():
                row_dict = row.to_dict()
                raw_data = row_dict
                tac = row_dict["tac"]
                if len(tac) < TAC_THRESHOLD:
                    continue
                tech = row_dict["tech"]
                tech = [int(t[1:]) for t in tech]

                mapped_data = {
                    "TTP": {"TACTICS": tac, "TECHNIQUES": tech},
                    "suggested_ttp": tac,
                    "UID": row_dict["id"],
                    "SOURCE_IP": row_dict["src"],
                    "DESTINATION_IP": row_dict["dst"],
                    "HOSTNAME": row_dict["name"],
                    "MESSAGE": row_dict["name"],
                    "TIMESTAMP": row_dict["time_df"],
                }

                raw_data.pop("time_df")
                if "label" in raw_data:
                    raw_data.pop("label")
                if "other_attributes_dict" in raw_data and "empty" in raw_data["other_attributes_dict"]:
                    raw_data["other_attributes_dict"].pop("empty")
                for key, value in raw_data.items():
                    raw_data[key] = str(value)
                events.append({"raw_data": raw_data, "mapped_data": mapped_data})

            return events


        def get_ui_session():
            '''
            Get UI session and cookies
            Returns:
                cookies: Cookies for the connection to UI
                headers: Headers for the connection to UI
                s: session for the connection to UI
            '''
            global HOST
            global UI_PASSWORD
            global UI_USERNAME

            try:
                s = requests.Session()
                response = s.get(HOST + "login")
                response.raise_for_status()

                cookies = s.cookies.get_dict()
                csrf = cookies.get("csrftoken")
                if not csrf:
                    raise ValueError("CSRF token not found in cookies")

                headers = {"X-CSRFToken": csrf, "Accept": "application/json"}

                response = s.post(f"{HOST}login/?next",
                                  data={"username": UI_USERNAME, "password": UI_PASSWORD, "csrfmiddlewaretoken": csrf})

                if response.status_code != 404 and not response.url.endswith("/accounts/profile/"):
                    response.raise_for_status()

                print("login successful")
                return cookies, headers, s

            except requests.exceptions.RequestException as e:
                print("Request error occurred.")
                raise e
            except Exception as e:
                print("An unexpected error occurred.")
                raise e


        def upload_error_to_s3(bucket, error_message):
            '''
            Upload error message to S3
            Args:
                error_message: The error message to upload
                unique_id: Unique ID for identifying the error log
            '''
            global S3_CLIENT
            error_log_key = "output/error_log.txt"
            try:
                S3_CLIENT.put_object(Bucket=bucket, Key=error_log_key, Body=error_message)
                print(f"Uploaded error log to s3://{bucket}/{error_log_key}")
            except Exception as e:
                print(f"Failed to upload error log to S3. Error: {str(e)}")


        def clear_temp_dir():
            rm_list = glob.glob("/tmp/**", recursive=True)
            for f in rm_list:
                if os.path.isfile(f):
                    os.remove(f)

      Handler: index.lambda_handler
      Runtime: python3.11
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Layers:
        - !FindInMap
          - RegionMap
          - !Ref AWS::Region
          - lambdaLayer
      Environment:
        Variables:
          campaign_map: !Ref ClusterOrFlowMapToCampaign
          event_threshold: !Ref EventThreshold
          tac_threshold: !Ref TacticThreshold
          ui_lb_url: !GetAtt LoadBalancer.DNSName 
          ui_password: !Ref SuperuserPassword
          ui_username: !Ref SuperuserUsername
      Events:
        S3ObjectCreated:
          Type: S3
          Properties:
            Bucket: !Ref Bucket
            Events: s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: prefix
                    Value: scratch/output/flow/
                  - Name: suffix
                    Value: .json
  createCampaignLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${createCampaign}
  createCampaignLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref createCampaign
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  saveFeedback:
    Type: AWS::Serverless::Function
    DependsOn:
      - LoadBalancer
      - ECSLogGroup
    Properties:
      FunctionName: !Ref SaveFeedbackFunctionName
      Description: !Sub
        - Stack ${AWS::StackName} Function ${ResourceName}
        - ResourceName: saveFeedback
      InlineCode: |
        '''
        Save copy/cut action feedback to S3

        Input: log message with action id and campaign ids involved
        '''

        import ast
        import base64
        import gzip
        import json
        import os
        import time
        import uuid
        import glob
        from datetime import datetime, timezone
        import sqlite3
        from contextlib import closing

        import pandas as pd
        import boto3
        from botocore.exceptions import ClientError
        import requests
        from requests.exceptions import RequestException

        pd.options.mode.chained_assignment = None

        UI_LB_URL = os.getenv("ui_lb_url")

        BUCKET = os.getenv("bucket")

        UI_USERNAME = os.getenv("ui_username")

        UI_PASSWORD = os.getenv("ui_password")

        missing_variables = []

        if UI_LB_URL is None:
            missing_variables.append("ui_lb_url")
        if BUCKET is None:
            missing_variables.append("bucket")
        if UI_USERNAME is None:
            missing_variables.append("ui_username")
        if UI_PASSWORD is None:
            missing_variables.append("ui_password")

        if missing_variables:
            raise ValueError(f"Please enter environment variable(s): {missing_variables}")

        SCRATCH_DIR = "scratch"

        S3_CLIENT = boto3.client("s3")

        TEMP_CLUSTER_OF_ALERTS_JSON_FILENAME = "/tmp/feedback_alerts_cluster.json"

        PATH_TO_SQLITE = f"{SCRATCH_DIR}/cypienta.db"
        TEMP_SQLITE_FILENAME = "/tmp/cypienta.db"

        TEMP_GLOBAL_ATTRIBUTE_WEIGHTS_FILENAME = "/tmp/attribute_weights.json"
        TEMP_GLOBAL_CLUSTER_OPERATIONS_FILENAME = "/tmp/global_cluster_operations.json"

        TEMP_CURRENT_OPERATIONS_FILENAME = "/tmp/current_cluster_operations.json"

        TEMP_CURRENT_CLUSTER_TICKET_OUTPUT_FILENAME = "/tmp/current_cluster_ticket_output.json"

        PATH_TO_FINAL_CLUSTER_OUTPUT = f"{SCRATCH_DIR}/cluster.json"
        TEMP_FINAL_CLUSTER_OUTPUT_FILENAME = "/tmp/final_cluster_output.json"

        PATH_TO_FINAL_CLUSTER_TICKET_OUTPUT = f"{SCRATCH_DIR}/cluster_ticket_output.json"
        TEMP_FINAL_CLUSTER_TICKET_OUTPUT_FILENAME = "/tmp/final_cluster_ticket_output.json"

        INITIAL_WEIGHTS_VALUE = 100
        MIN_WEIGHTS_VALUE = 0
        MAX_WEIGHTS_VALUE = 200

        SOURCE_EMPTY = False

        HOST = f"http://{UI_LB_URL}:8000/"


        def lambda_handler(event, context):
            '''
            Entrypoint from the trigger setup from lambda
            Args:
                event: Event triggered
                context: Context of the lambda function
            '''
            global SCRATCH_DIR
            global PATH_TO_SQLITE
            global TEMP_SQLITE_FILENAME
            global SOURCE_EMPTY

            func = "lambda_handler"

            SOURCE_EMPTY = False

            print(f"{func}: Received event: {json.dumps(event)}")

            aws_log = event['awslogs']['data']
            compressed_payload = base64.b64decode(aws_log)
            uncompressed_payload = gzip.decompress(compressed_payload)
            decoded_payload = json.loads(uncompressed_payload.decode('utf-8'))

            print("Decoded CloudWatch data: ", decoded_payload)

            message = decoded_payload["logEvents"][0]["message"]

            clear_temp_dir()

            try:
                message_json = json.loads(message)

                campaign_id_list = message_json["message"]["campaign_id"]
                action_id = message_json["message"]["action_id"]
            except json.JSONDecodeError as json_error:
                print(f"Error parsing JSON from message: {message}")
                raise json_error
            except KeyError as key_error:
                print("Missing key in JSON message")
                raise key_error

            print(f"{func}: Save feedback for campaign id(s): {campaign_id_list}")
            print(f"{func}: Save feedback for action id: {action_id}")

            try:
                print(f"{func}: Download sqlite file from S3")
                S3_CLIENT.download_file(BUCKET, PATH_TO_SQLITE, TEMP_SQLITE_FILENAME)
            except Exception as e:
                print(f"{func}: Failed to download file: {BUCKET}/{PATH_TO_SQLITE}")
                raise e

            save_feedback(action_id)

            try:
                print(f"{func}: Upload sqlite to S3")

                S3_CLIENT.upload_file(TEMP_SQLITE_FILENAME, BUCKET, PATH_TO_SQLITE)

                print(f"{func}: Upload completed")
            except Exception as e:
                print(f"{func}: Failed to upload sqlite from S3: {BUCKET}/{PATH_TO_SQLITE}")
                raise e


        def save_feedback(action_id):
            '''
            Fetch action performed on campaigns and create operations list, update final cluster json
            create new cluster ticket output
            Args:
                action_id: action id in UI that has details for the action performed
            '''
            global BUCKET
            global S3_CLIENT

            func = "save_feedback"

            try:
                ui_cookies, ui_headers, ui_session = get_ui_session()
            except Exception as e:
                print(f"{func}: Failed to get session.")
                raise e

            try:
                action_attributes = get_action_attributes(ui_cookies, ui_headers, ui_session, action_id)
            except Exception as e:
                print(f"{func}: Failed to get action attributes.")
                raise e

            # get weights from the action and save to global weights
            update_global_attributes_weights(action_attributes)

            # create current action operations
            source_cluster_id, destination_cluster_id = create_current_operations(action_attributes, ui_cookies, ui_headers, ui_session)

            metrics = update_cluster_output_sql(source_cluster_id, destination_cluster_id)

            update_cluster_ticket_output_sql(source_cluster_id, destination_cluster_id, metrics)

            create_current_cluster_ticket_output_sql(source_cluster_id, destination_cluster_id)

            # upload feedback cluster ticket output to S3
            current_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S %z")
            path_to_feedback_cluster_ticket_output = f"{SCRATCH_DIR}/feedback/cluster_ticket_output_{current_timestamp}.json"
            try:
                S3_CLIENT.upload_file(TEMP_CURRENT_CLUSTER_TICKET_OUTPUT_FILENAME, BUCKET, path_to_feedback_cluster_ticket_output)
                print(f"{func}: Uploaded file to S3: {BUCKET}/{path_to_feedback_cluster_ticket_output}")
            except Exception as e:
                print(f"{func}: Failed to upload file to S3: {BUCKET}/{path_to_feedback_cluster_ticket_output}")
                raise e


        def create_current_operations(action_attributes, ui_cookies, ui_headers, ui_session):
            '''
            Create operations of create and delete to perform on clusters
            Args:
                action_attributes: Record of current action performed on which the function is triggered
            '''
            global TEMP_CURRENT_OPERATIONS_FILENAME
            global TEMP_SQLITE_FILENAME

            func = "create_current_operations"

            print(f"{func}: Get source cluster id for campaign id: {action_attributes['src_campaign']}")
            campaign_details = get_campaign(ui_cookies, ui_headers, ui_session, action_attributes['src_campaign'])

            print(f"{func}: Source campaign name: {campaign_details['name']}")

            source_cluster_id = int(campaign_details["name"].split("-")[-1])
            print(f"{func}: Source cluster id: {source_cluster_id}")

            print(f"{func}: Get destination cluster id for campaign id: {action_attributes['dst_campaign'][0]}")
            campaign_details = get_campaign(ui_cookies, ui_headers, ui_session, action_attributes['dst_campaign'][0])

            print(f"{func}: Destination campaign name: {campaign_details['name']}")

            destination_cluster_id = int(campaign_details["name"].split("-")[-1])
            print(f"{func}: Destination cluster id: {destination_cluster_id}")

            print(f"{func}: Get involved events list")
            events_involved = action_attributes["events_involved"][str(action_attributes['dst_campaign'][0])]
            print(f"{func}: Involved events: {events_involved}")

            # get user alert ids
            print(f"{func}: Get involved user alert ids")
            involved_user_alert_ids = []
            for event_id in events_involved:
                raw_event = get_raw_event(ui_cookies, ui_headers, ui_session, action_attributes['dst_campaign'][0], event_id)
                involved_user_alert_ids.append(raw_event["raw_data"]["id"])

            print(f"{func}: Involved user alert ids: {involved_user_alert_ids}")

            # sorting the operations by cluster id to make it easier for updating global cluster, global cluster ticket
            # the cluster id is unique in this case
            with open(TEMP_CURRENT_OPERATIONS_FILENAME, "w") as f:
                operation = {
                    "action_type": "DELETE",
                    "cluster_id": source_cluster_id,
                    "alert_ids": [involved_user_alert_ids]
                }
                f.write(json.dumps(operation) + "\n")

                operation = {
                    "action_type": "INSERT",
                    "cluster_id": destination_cluster_id,
                    "alert_ids": [involved_user_alert_ids]
                }
                f.write(json.dumps(operation) + "\n")

            insert_operation_query = """
            INSERT INTO operation_on_cluster (
                cluster_id,
                alert_ids,
                operation_type
            )
            VALUES (
                :cluster_id,
                :alert_ids,
                :action_type
            )
            ;
            """

            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.row_factory = sqlite3.Row
                    conn.execute("PRAGMA foreign_keys = ON;")
                    with closing(conn.cursor()) as cursor:
                        insert_operation_params = [
                            {
                                "action_type": "DELETE",
                                "cluster_id": source_cluster_id,
                                "alert_ids": json.dumps([involved_user_alert_ids])
                            },
                            {
                                "action_type": "INSERT",
                                "cluster_id": destination_cluster_id,
                                "alert_ids": json.dumps([involved_user_alert_ids])
                            }
                        ]

                        cursor.executemany(insert_operation_query, insert_operation_params)

                        update_event_params = {
                            "cluster_id": destination_cluster_id
                        }
                        alert_id_list = "','".join(map(str, involved_user_alert_ids))
                        alert_id_list = f"('{alert_id_list}')"
                        update_event_query = f"""
                        UPDATE event
                        SET cluster_id = :cluster_id
                        WHERE alert_id IN {alert_id_list}
                        ;
                        """
                        print(update_event_query)
                        cursor.execute(update_event_query, update_event_params)
                    conn.commit()

            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e

            return source_cluster_id, destination_cluster_id


        def update_cluster_output_sql(source_cluster_id, destination_cluster_id):
            '''
            Update cluster output table in sqlite
            Args:
                source_cluster_id: source cluster id
                destination_cluster_id: destination cluster id
            Returns:
                metrics dictionary for cluster id (contains count of unique tech, tac, stage per cluster)
            '''
            global TEMP_SQLITE_FILENAME
            global SOURCE_EMPTY

            func = "update_cluster_output_sql"

            batch_cluster_id_list = [source_cluster_id, destination_cluster_id]

            update_cluster_query = """
            UPDATE cluster_output
            SET cluster_starttime = :cluster_starttime,
                cluster_endtime = :cluster_endtime,
                cluster_srcips = :cluster_srcips,
                cluster_dstips = :cluster_dstips,
                cluster_techs = :cluster_techs,
                cluster_tacs = :cluster_tacs,
                cluster_stages = :cluster_stages
            WHERE cluster_id = :cluster_id
            ;
            """

            metrics = {}

            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.row_factory = sqlite3.Row
                    conn.execute("PRAGMA foreign_keys = ON;")
                    with closing(conn.cursor()) as cursor:

                        cluster_id_list = ",".join(map(str, batch_cluster_id_list))
                        cluster_id_list = f"({cluster_id_list})"

                        select_events_filter_cluster_query = f"""
                        SELECT
                            cluster_id,
                            MIN(time) AS min_time,
                            MAX(time) AS max_time,
                            GROUP_CONCAT(DISTINCT src) AS unique_src,
                            GROUP_CONCAT(DISTINCT dst) AS unique_dst,
                            GROUP_CONCAT(tech,';') AS combined_tech,
                            GROUP_CONCAT(tac,';') AS combined_tac,
                            GROUP_CONCAT(stage,';') AS combined_stage
                        FROM
                            event
                        WHERE cluster_id IN {cluster_id_list}
                        GROUP BY cluster_id
                        ORDER BY cluster_id
                        ;
                        """
                        cursor.execute(select_events_filter_cluster_query)
                        result = cursor.fetchall()

                        update_cluster_output_params = []
                        for row in result:
                            row = dict(row)
                            src_ips = row['unique_src']
                            src_ips = src_ips.split(",")

                            dst_ips = row['unique_dst']
                            dst_ips = dst_ips.split(",")

                            combined_tech = row['combined_tech']
                            combined_tech = combined_tech.split(";")
                            tech_list = []
                            for techs in combined_tech:
                                tech_list += json.loads(techs)
                            tech_list = list(set(tech_list))

                            combined_tac = row['combined_tac']
                            combined_tac = combined_tac.split(";")
                            tac_list = []
                            for tacs in combined_tac:
                                tac_list += json.loads(tacs)
                            tac_list = list(set(tac_list))

                            combined_stage = row['combined_stage']
                            combined_stage = combined_stage.split(";")
                            stage_list = []
                            for stage in combined_stage:
                                stage_list += json.loads(stage)
                            stage_list = list(set(stage_list))

                            metrics[row['cluster_id']] = {
                                "tech": {"count": len(tech_list)},
                                "tac": {"count": len(tac_list)},
                                "stage": {"count": len(stage_list)}
                            }

                            # if cluster id is present in table. Update fields for existing row.
                            params = {
                                "cluster_starttime": row['min_time'],
                                "cluster_endtime": row['max_time'],
                                "cluster_srcips": json.dumps(src_ips),
                                "cluster_dstips": json.dumps(dst_ips),
                                "cluster_techs": json.dumps(tech_list),
                                "cluster_tacs": json.dumps(tac_list),
                                "cluster_stages": json.dumps(stage_list),
                                "cluster_id": row['cluster_id']
                            }

                            update_cluster_output_params.append(params)

                        if source_cluster_id not in metrics:
                            SOURCE_EMPTY = True
                            params = {
                                "cluster_starttime": 0.0,
                                "cluster_endtime": 0.0,
                                "cluster_srcips": json.dumps([]),
                                "cluster_dstips": json.dumps([]),
                                "cluster_techs": json.dumps([]),
                                "cluster_tacs": json.dumps([]),
                                "cluster_stages": json.dumps([]),
                                "cluster_id": source_cluster_id
                            }

                            update_cluster_output_params.append(params)

                        cursor.executemany(update_cluster_query, update_cluster_output_params)

                    conn.commit()
                return metrics
            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e


        def update_cluster_ticket_output_sql(source_cluster_id, destination_cluster_id, metrics):
            '''
            Update cluster ticket output metrics
            Args:
                source_cluster_id: source cluster id
                destination_cluster_id: destination cluster id
                metrics: list of metric for cluster id (contains count of unique tech, tac, stage per cluster)
            '''
            global TEMP_SQLITE_FILENAME

            func = "update_cluster_ticket_output_sql"

            batch_cluster_id_list = [source_cluster_id, destination_cluster_id]

            metrics_cols = ["tech", "tac", "stage"]

            update_cluster_ticket_output_query = """
            UPDATE cluster_ticket_output
            SET metrics = :metrics
            WHERE cluster_id = :cluster_id
            ;
            """

            try:
                with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                    conn.row_factory = sqlite3.Row
                    conn.execute("PRAGMA foreign_keys = ON;")
                    with closing(conn.cursor()) as cursor:
                        cluster_id_list = ",".join(map(str, batch_cluster_id_list))
                        cluster_id_list = f"({cluster_id_list})"
                        select_event_query = f"""
                        SELECT *
                        FROM event
                        WHERE cluster_id IN {cluster_id_list}
                        ORDER BY cluster_id
                        ;
                        """
                        cursor.execute(select_event_query)
                        results = cursor.fetchall()
                        results = [dict(row) for row in results]
                        events_df = pd.DataFrame(results)

                        update_cluster_ticket_output_params = []
                        for cluster_id in batch_cluster_id_list:
                            if cluster_id in metrics:
                                filtered_events = events_df[events_df["cluster_id"] == cluster_id]
                                cluster_metrics = per_cluster_metrics_events(filtered_events)

                                for metric_col in metrics_cols:
                                    cluster_metrics[metric_col]["count"] = float(metrics[cluster_id][metric_col]["count"])

                                params = {
                                    "cluster_id": cluster_id,
                                    "metrics": json.dumps(cluster_metrics)
                                }
                                update_cluster_ticket_output_params.append(params)
                            else:
                                params = {
                                    "cluster_id": cluster_id,
                                    "metrics": json.dumps({})
                                }
                                update_cluster_ticket_output_params.append(params)
                        cursor.executemany(update_cluster_ticket_output_query, update_cluster_ticket_output_params)

                    conn.commit()

            except Exception as e:
                print(f"{func}: Failed to update cluster ticket output table.")
                raise e


        def per_cluster_metrics_events(involved_events_df):
            '''
            Get metrics per cluster
            Args:
                row_cluster_ticket_output: one record of cluster ticket output
            Returns:
                metrics of one record of cluster ticket output
            '''

            metrics_cols = ["tech", "tac", "stage"]

            metrics = {}

            for metric_col in metrics_cols:
                involved_events_df[f'{metric_col}'] = involved_events_df[f'{metric_col}'].apply(json.loads)
                involved_events_df[f'{metric_col}_count'] = involved_events_df[f'{metric_col}'].apply(len)

                metrics[metric_col] = {
                    "avg": float(involved_events_df[f"{metric_col}_count"].mean()),
                    "min": float(involved_events_df[f"{metric_col}_count"].min()),
                    "max": float(involved_events_df[f"{metric_col}_count"].max()),
                    "median": float(involved_events_df[f"{metric_col}_count"].median())
                }

            return metrics


        def create_current_cluster_ticket_output_sql(source_cluster_id, destination_cluster_id):
            '''
            Create cluster ticket output for current operation clusters
            Args:
                source_cluster_id: source cluster id
                destination_cluster_id: destination cluster id
            '''
            global SCRATCH_DIR
            global S3_CLIENT
            global TEMP_SQLITE_FILENAME
            global TEMP_CURRENT_CLUSTER_TICKET_OUTPUT_FILENAME
            global SOURCE_EMPTY

            func = "create_current_cluster_ticket_output_sql"

            input_cluster_id_list = [source_cluster_id, destination_cluster_id]

            try:
                with open(TEMP_CURRENT_CLUSTER_TICKET_OUTPUT_FILENAME, "w") as f:
                    with closing(sqlite3.connect(TEMP_SQLITE_FILENAME)) as conn:
                        conn.row_factory = sqlite3.Row
                        conn.execute("PRAGMA foreign_keys = ON;")
                        with closing(conn.cursor()) as cursor:
                            cluster_id_list = ",".join(map(str, input_cluster_id_list))
                            cluster_id_list = f"({cluster_id_list})"
                            select_query = f"""
                            SELECT 
                                co.cluster_id,
                                co.cluster_starttime,
                                co.cluster_endtime,
                                co.cluster_srcips,
                                co.cluster_dstips,
                                co.cluster_techs,
                                co.cluster_tacs,
                                co.cluster_stages,
                                cto.ticket_id,
                                cto.metrics,
                                e.alert_id,
                                e.src AS event_src,
                                e.dst AS event_dst,
                                e.time AS event_time,
                                e.name AS event_name,
                                e.tech AS event_tech,
                                e.tac AS event_tac,
                                e.stage AS event_stage,
                                e.other_attributes AS event_other_attributes
                            FROM cluster_ticket_output cto
                            INNER JOIN cluster_output co ON co.cluster_id = cto.cluster_id
                            INNER JOIN event e ON e.cluster_id = cto.cluster_id
                            WHERE cto.cluster_id IN {cluster_id_list}
                            ORDER BY cto.cluster_id
                            ;
                            """
                            cursor.execute(select_query)

                            row = cursor.fetchone()
                            prev_cluster = None

                            involved_events = []

                            while row:
                                row = dict(row)

                                curr_cluster = row
                                if prev_cluster is None:
                                    prev_cluster = curr_cluster

                                if curr_cluster["cluster_id"] != prev_cluster["cluster_id"]:
                                    row_cluster_ticket_output = {
                                        "ticket_id": prev_cluster["ticket_id"],
                                        "cluster_id": prev_cluster["cluster_id"],
                                        "involved_events": involved_events,
                                        "start_time": prev_cluster["cluster_starttime"],
                                        "end_time": prev_cluster["cluster_endtime"],
                                        "involved_entities": list(set(json.loads(prev_cluster["cluster_dstips"]) + json.loads(prev_cluster["cluster_srcips"]))),
                                        "involved_techs": json.loads(prev_cluster["cluster_techs"]),
                                        "involved_tacs": json.loads(prev_cluster["cluster_tacs"]),
                                        "involved_stages": json.loads(prev_cluster["cluster_stages"]),
                                        "metrics": json.loads(prev_cluster["metrics"]),
                                    }
                                    f.write(json.dumps(row_cluster_ticket_output) + "\n")
                                    involved_events = []

                                events = {
                                    "id": row["alert_id"],
                                    "src": row["event_src"],
                                    "dst": row["event_dst"],
                                    "time": row["event_time"],
                                    "name": row["event_name"],
                                    "tech": json.loads(row["event_tech"]),
                                    "tac": json.loads(row["event_tac"]),
                                    "stage": json.loads(row["event_stage"]),
                                    "other_attributes_dict": json.loads(row["event_other_attributes"]),
                                }
                                involved_events.append(events)

                                prev_cluster = curr_cluster
                                row = cursor.fetchone()

                            row_cluster_ticket_output = {
                                "ticket_id": prev_cluster["ticket_id"],
                                "cluster_id": prev_cluster["cluster_id"],
                                "involved_events": involved_events,
                                "start_time": prev_cluster["cluster_starttime"],
                                "end_time": prev_cluster["cluster_endtime"],
                                "involved_entities": list(set(json.loads(prev_cluster["cluster_dstips"]) + json.loads(prev_cluster["cluster_srcips"]))),
                                "involved_techs": json.loads(prev_cluster["cluster_techs"]),
                                "involved_tacs": json.loads(prev_cluster["cluster_tacs"]),
                                "involved_stages": json.loads(prev_cluster["cluster_stages"]),
                                "metrics": json.loads(prev_cluster["metrics"]),
                            }
                            f.write(json.dumps(row_cluster_ticket_output) + "\n")

                            if SOURCE_EMPTY:
                                select_query = f"""
                                SELECT 
                                    co.cluster_id,
                                    co.cluster_starttime,
                                    co.cluster_endtime,
                                    co.cluster_srcips,
                                    co.cluster_dstips,
                                    co.cluster_techs,
                                    co.cluster_tacs,
                                    co.cluster_stages,
                                    cto.ticket_id,
                                    cto.metrics
                                FROM cluster_ticket_output cto
                                INNER JOIN cluster_output co ON co.cluster_id = cto.cluster_id
                                WHERE cto.cluster_id = {source_cluster_id}
                                ORDER BY cto.cluster_id
                                ;
                                """
                                cursor.execute(select_query)

                                row = cursor.fetchone()
                                row_cluster_ticket_output = {
                                    "ticket_id": row["ticket_id"],
                                    "cluster_id": row["cluster_id"],
                                    "involved_events": [],
                                    "start_time": row["cluster_starttime"],
                                    "end_time": row["cluster_endtime"],
                                    "involved_entities": list(set(json.loads(row["cluster_dstips"]) + json.loads(row["cluster_srcips"]))),
                                    "involved_techs": json.loads(row["cluster_techs"]),
                                    "involved_tacs": json.loads(row["cluster_tacs"]),
                                    "involved_stages": json.loads(row["cluster_stages"]),
                                    "metrics": json.loads(row["metrics"]),
                                }
                            f.write(json.dumps(row_cluster_ticket_output) + "\n")

            except Exception as e:
                print(f"{func}: Failed to update event table.")
                raise e


        def update_global_attributes_weights(action_attributes):
            '''
            Update global attributes weights file
            Args:
                action_attributes: Record of current action performed on which the function is triggered
            '''
            global S3_CLIENT
            global BUCKET
            global TEMP_GLOBAL_ATTRIBUTE_WEIGHTS_FILENAME
            global INITIAL_WEIGHTS_VALUE

            func = "update_global_attributes_weights"

            path_to_global_attribute_weights = f"{SCRATCH_DIR}/attribute_weights.json"
            fetch_or_create_global_attribute_weights(path_to_global_attribute_weights)

            global_attribute_weights_json = json.load(open(TEMP_GLOBAL_ATTRIBUTE_WEIGHTS_FILENAME))

            print(f"{func}: Current weights: {global_attribute_weights_json}")

            attributes_list = ["node", "event"]

            map_attributes = {
                "tech": "technique"
            }

            for attr in attributes_list:
                for key, value in action_attributes["attributes"][attr].items():
                    if key in map_attributes:
                        key = map_attributes[key]
                    if key not in global_attribute_weights_json[attr]:
                        global_attribute_weights_json[attr][key] = INITIAL_WEIGHTS_VALUE

                    global_attribute_weights_json[attr][key] += value

                    if global_attribute_weights_json[attr][key] < MIN_WEIGHTS_VALUE:
                        global_attribute_weights_json[attr][key] = MIN_WEIGHTS_VALUE

                    if global_attribute_weights_json[attr][key] > MAX_WEIGHTS_VALUE:
                        global_attribute_weights_json[attr][key] = MAX_WEIGHTS_VALUE
            
            print(f"{func}: After update weights: {global_attribute_weights_json}")
            json.dump(global_attribute_weights_json, open(TEMP_GLOBAL_ATTRIBUTE_WEIGHTS_FILENAME, "w"))

            try:
                print(f"{func}: Upload file to S3: {BUCKET}/{path_to_global_attribute_weights}")
                S3_CLIENT.upload_file(TEMP_GLOBAL_ATTRIBUTE_WEIGHTS_FILENAME, BUCKET, path_to_global_attribute_weights)
            except Exception as e:
                print(f"{func}: Failed to upload file to S3: {BUCKET}/{path_to_global_attribute_weights}")
                raise e


        def fetch_or_create_global_attribute_weights(path_to_global_attribute_weights):
            '''
            Get or initialize the global attribute weights file
            Args:
                path_to_global_attribute_weights: global attribute weights object key
            '''
            global SCRATCH_DIR
            global S3_CLIENT
            global BUCKET
            global TEMP_GLOBAL_ATTRIBUTE_WEIGHTS_FILENAME
            global INITIAL_WEIGHTS_VALUE
            func = "fetch_or_create_global_attribute_weights"

            try:
                print(
                    f"{func}: Checking if file exists on S3 path {BUCKET}/{path_to_global_attribute_weights}")

                S3_CLIENT.head_object(Bucket=BUCKET, Key=path_to_global_attribute_weights)

            except ClientError as e:
                if e.response['Error']['Code'] != '404':
                    raise e

                print(f"{func}: File does not exist on S3. Initialize file")

                global_attribute_weights_json = {
                    "event": {
                        "technique": INITIAL_WEIGHTS_VALUE,
                        "tactic": INITIAL_WEIGHTS_VALUE,
                        "stage": INITIAL_WEIGHTS_VALUE,
                        "count": INITIAL_WEIGHTS_VALUE,
                        "priority": INITIAL_WEIGHTS_VALUE,
                        "port": INITIAL_WEIGHTS_VALUE,
                        "url": INITIAL_WEIGHTS_VALUE,
                        "user_agent": INITIAL_WEIGHTS_VALUE,
                        "cert": INITIAL_WEIGHTS_VALUE
                    },
                    "node": {
                        "os": INITIAL_WEIGHTS_VALUE,
                        "risk": INITIAL_WEIGHTS_VALUE,
                        "user": INITIAL_WEIGHTS_VALUE,
                        "domain": INITIAL_WEIGHTS_VALUE,
                        "subnet": INITIAL_WEIGHTS_VALUE,
                        "usergroup": INITIAL_WEIGHTS_VALUE,
                        "geolocation": INITIAL_WEIGHTS_VALUE
                    }
                }
                json.dump(global_attribute_weights_json, open(TEMP_GLOBAL_ATTRIBUTE_WEIGHTS_FILENAME, "w"))

                print(f"{func}: Initiated file")
            else:
                try:
                    print(f"{func}: Download available file from S3")

                    S3_CLIENT.download_file(BUCKET, path_to_global_attribute_weights, TEMP_GLOBAL_ATTRIBUTE_WEIGHTS_FILENAME)

                    print(f"{func}: Download completed")
                except Exception as e:
                    print(f"{func}: Failed to download lookup file from S3: {BUCKET}/{path_to_global_attribute_weights}")
                    raise e


        def get_campaign(ui_cookies, ui_headers, ui_session, campaign_id):
            '''
            Get campaign details by sending a request to the UI
            Args:
                ui_cookies: Cookies for the UI session
                ui_headers: Headers for the UI session
                ui_session: Session object for the UI
                campaign_id: ID of the campaign
            Returns:
                JSON response containing the campaign
            '''
            global HOST

            func = "get_campaign"

            try:
                response_from_ui = ui_session.get(f"{HOST}api/v2/campaign/{campaign_id}", cookies=ui_cookies, headers=ui_headers)
                print(f"{func}: Got response", response_from_ui)

                response_from_ui.raise_for_status()

                return response_from_ui.json()
            except RequestException as req_err:
                print(f"{func}: Unable to get campaign details for campaign id: {campaign_id}")
                raise req_err
            except ValueError as json_err:
                print(f"{func}: Unable to parse JSON response for campaign_id id: {campaign_id}. Response: {response_from_ui.content}")
                raise json_err


        def get_raw_event(ui_cookies, ui_headers, ui_session, campaign_id, event_id):
            '''
            Get raw event details by event ID within a campaign.

            Args:
                ui_cookies: Cookies for the UI session
                ui_headers: Headers for the UI session
                ui_session: Session object for the UI
                campaign_id: ID of the campaign
                event_id: ID of the event
            Returns:
                JSON response containing the raw event details
            '''
            global HOST

            func = "get_raw_event"

            try:
                response_from_ui = ui_session.get(f"{HOST}api/v2/campaign/{campaign_id}/event/{event_id}", cookies=ui_cookies, headers=ui_headers)
                print(f"{func}: Got response", response_from_ui)

                response_from_ui.raise_for_status()

                return response_from_ui.json()
            except RequestException as req_err:
                print(f"{func}: Unable to get event details for event id: {event_id}")
                raise req_err
            except ValueError as json_err:
                print(f"{func}: Unable to parse JSON response for event id: {event_id}. Response: {response_from_ui.content}")
                raise json_err


        def get_action_attributes(ui_cookies, ui_headers, ui_session, action_id):
            '''
            Get action attributes by action ID.

            Args:
                ui_cookies: Cookies for the UI session
                ui_headers: Headers for the UI session
                ui_session: Session object for the UI
                action_id: ID of the action

            Returns:
                JSON response containing the action attributes
            '''
            global HOST

            func = "get_action_attributes"

            try:
                response_from_ui = ui_session.get(f"{HOST}api/v2/action/{action_id}", cookies=ui_cookies, headers=ui_headers)
                print(f"{func}: Got response", response_from_ui)

                response_from_ui.raise_for_status()

                return response_from_ui.json()
            except RequestException as req_err:
                print(f"{func}: Unable to get attributes from action id: {action_id}")
                raise req_err
            except ValueError as json_err:
                print(f"{func}: Unable to parse JSON response from action id: {action_id}. Response: {response_from_ui.content}")
                raise json_err


        def get_ui_session():
            '''
            Get UI session and cookies
            Returns:
                cookies: Cookies for the connection to UI
                headers: Headers for the connection to UI
                s: session for the connection to UI
            '''
            global HOST
            global UI_PASSWORD
            global UI_USERNAME

            try:
                s = requests.Session()
                response = s.get(HOST + "login")
                response.raise_for_status()

                cookies = s.cookies.get_dict()
                csrf = cookies.get("csrftoken")
                if not csrf:
                    raise ValueError("CSRF token not found in cookies")

                headers = {"X-CSRFToken": csrf, "Accept": "application/json"}

                response = s.post(f"{HOST}login/?next",
                                  data={"username": UI_USERNAME, "password": UI_PASSWORD, "csrfmiddlewaretoken": csrf})

                if response.status_code != 404 and not response.url.endswith("/accounts/profile/"):
                    response.raise_for_status()

                print("login successful")
                return cookies, headers, s

            except requests.exceptions.RequestException as e:
                print("Request error occurred.")
                raise e
            except Exception as e:
                print("An unexpected error occurred.")
                raise e


        def clear_temp_dir():
            rm_list = glob.glob("/tmp/**", recursive=True)
            for f in rm_list:
                if os.path.isfile(f):
                    os.remove(f)

      Handler: index.lambda_handler
      Runtime: python3.11
      MemorySize: 3008
      EphemeralStorage:
        Size: 10240
      Timeout: 900
      Role: !GetAtt LambdaRole.Arn
      Tracing: Active
      Layers:
        - !FindInMap
          - RegionMap
          - !Ref AWS::Region
          - lambdaLayer
      Environment:
        Variables:
          bucket: !Ref BucketName
          ui_lb_url: !GetAtt LoadBalancer.DNSName 
          ui_password: !Ref SuperuserPassword
          ui_username: !Ref SuperuserUsername
      Events:
        MyLogGroupTrigger:
          Type: CloudWatchLogs
          Properties:
            LogGroupName: !Ref ECSLogGroup
            FilterPattern: "Save campaign json"
  saveFeedbackLogGroup:
    Type: AWS::Logs::LogGroup
    DeletionPolicy: Retain
    Properties:
      LogGroupName: !Sub /aws/lambda/${saveFeedback}
  saveFeedbackLambdaEventInvokeConfig:
    Type: AWS::Lambda::EventInvokeConfig
    Properties:
      FunctionName: !Ref saveFeedback
      MaximumRetryAttempts: 0
      Qualifier: $LATEST

  ExecutionRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: '2012-10-17'
        Statement:
          - Effect: Allow
            Principal:
              Service: ecs-tasks.amazonaws.com
            Action: sts:AssumeRole
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy
        - arn:aws:iam::aws:policy/AWSMarketplaceMeteringRegisterUsage

  EcsInstanceExecutionRole:
    Type: AWS::IAM::Role
    Properties:
      AssumeRolePolicyDocument:
        Version: '2012-10-17'
        Statement:
          - Effect: Allow
            Principal:
              Service: ec2.amazonaws.com
            Action: sts:AssumeRole
      ManagedPolicyArns:
        - arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role
  
  EcsInstanceProfile:
    Type: AWS::IAM::InstanceProfile
    Properties:
      Path: /
      Roles:
        - !Ref EcsInstanceExecutionRole

  TaskDefinition:
    Type: AWS::ECS::TaskDefinition
    DependsOn: LoadBalancer
    Properties:
      Family: !Ref ServiceAndTaskDefinitionName
      Cpu: !Ref Cpu
      Memory: !Ref Memory
      NetworkMode: host
      RequiresCompatibilities: 
        - EC2
      ExecutionRoleArn: !GetAtt ExecutionRole.Arn
      TaskRoleArn: !GetAtt ExecutionRole.Arn
      ContainerDefinitions:
        - Name: bastet_db
          Image: postgres:13-bullseye
          Cpu: 0
          PortMappings: []
          Essential: false
          Environment:
            - Name: POSTGRES_USER
              Value: bastet
            - Name: POSTGRES_PASSWORD
              Value: bastet
            - Name: POSTGRES_DB
              Value: bastet
          MountPoints:
            - SourceVolume: postgres_data
              ContainerPath: /var/lib/postgresql/data/
              ReadOnly: false
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${ECSClusterName}
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: "ecs"
        - Name: bastet_redis
          Image: redis:6.2.6-alpine
          Cpu: 0
          PortMappings:
            - Name: bastet_redis-6379-tcp
              ContainerPort: 6379
              HostPort: 6379
              Protocol: tcp
          Essential: false
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${ECSClusterName}
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: "ecs"
        - Name: bastet_rabbitmq
          Image: rabbitmq:alpine
          Cpu: 0
          PortMappings:
            - Name: bastet_rabbitmq-5672-tcp
              ContainerPort: 5672
              HostPort: 5672
              Protocol: tcp
          Essential: false
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${ECSClusterName}
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: "ecs"
        - Name: bastet_web
          Image: !Ref WebContainerImage
          Cpu: 0
          PortMappings:
            - Name: bastet_web-8000-tcp
              ContainerPort: 8000
              HostPort: 8000
              Protocol: tcp
          Essential: false
          Environment:
            - Name: POSTGRES_USER
              Value: bastet
            - Name: DJANGO_SETTINGS_MODULE
              Value: bastet.settings.local
            - Name: REDIS_HOST
              Value: localhost
            - Name: DJANGO_SUPERUSER_EMAIL
              Value: !Ref SuperuserEmail
            - Name: POSTGRES_HOST
              Value: localhost
            - Name: DJANGO_SUPERUSER_USERNAME
              Value: !Ref SuperuserUsername
            - Name: POSTGRES_PASSWORD
              Value: bastet
            - Name: POSTGRES_PORT
              Value: "5432"
            - Name: RABBITMQ_HOST
              Value: localhost
            - Name: DJANGO_SUPERUSER_PASSWORD
              Value: !Ref SuperuserPassword
            - Name: POSTGRES_DB_NAME
              Value: bastet
            - Name: AWS_REGION
              Value: !Ref AWS::Region
          MountPoints:
            - SourceVolume: static_volume
              ContainerPath: /code/static
              ReadOnly: false
            - SourceVolume: media_volume
              ContainerPath: /code/media
              ReadOnly: false
          DependsOn:
            - ContainerName: bastet_db
              Condition: START
            - ContainerName: bastet_redis
              Condition: START
            - ContainerName: bastet_rabbitmq
              Condition: START
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${ECSClusterName}
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: "ecs"
        - Name: bastet_nginx
          Image: !Ref NginxContainerImage
          Cpu: 0
          PortMappings:
            - Name: bastet_nginx-80-tcp
              ContainerPort: 80
              HostPort: 80
              Protocol: tcp
          Essential: true
          MountPoints:
            - SourceVolume: static_volume
              ContainerPath: /home/app/web/staticfiles
              ReadOnly: false
            - SourceVolume: media_volume
              ContainerPath: /home/app/web/mediafiles
              ReadOnly: false
          DependsOn:
            - ContainerName: bastet_web
              Condition: START
          LogConfiguration:
            LogDriver: awslogs
            Options:
              awslogs-group: !Sub /ecs/${ECSClusterName}
              awslogs-region: !Ref AWS::Region
              awslogs-stream-prefix: "ecs"
      Volumes:
        - Name: postgres_data
          DockerVolumeConfiguration:
            Scope: task
            Driver: local
        - Name: static_volume
          DockerVolumeConfiguration:
            Scope: task
            Driver: local
        - Name: media_volume
          DockerVolumeConfiguration:
            Scope: task
            Driver: local

  EcsService:
    Type: AWS::ECS::Service
    DependsOn: 
      - TaskDefinition
      - ClusterCPAssociation
      - ECSCluster
    Properties:
      ServiceName: !Ref ServiceAndTaskDefinitionName
      Cluster: !Ref ECSCluster
      TaskDefinition: !GetAtt TaskDefinition.TaskDefinitionArn
      DesiredCount: 1
      HealthCheckGracePeriodSeconds: '20'
      LoadBalancers:
        - ContainerName: bastet_nginx
          ContainerPort: 80
          TargetGroupArn: !GetAtt TargetGroup.TargetGroupArn
  
  LoadBalancer:
    Type: AWS::ElasticLoadBalancingV2::LoadBalancer
    DependsOn: TargetGroup
    Properties:
      Name: !Ref LoadBalancerName
      Subnets: 
        - !Ref PublicSubnet1
        - !Ref PublicSubnet2
      SecurityGroups:
        - !Ref SecurityGroup

  TargetGroup:
    Type: AWS::ElasticLoadBalancingV2::TargetGroup
    Properties:
      Name: !Ref LoadBalancerName
      VpcId: !Ref VPC
      Port: 80
      Protocol: HTTP
      HealthCheckProtocol: HTTP
      HealthCheckPort: 80
      HealthCheckPath: /
      Matcher:
        HttpCode: 200-399
      TargetType: instance

  Listener:
    Type: AWS::ElasticLoadBalancingV2::Listener
    Properties:
      LoadBalancerArn: !GetAtt LoadBalancer.LoadBalancerArn
      Port: 8000
      Protocol: HTTP
      DefaultActions:
        - Type: forward
          TargetGroupArn: !GetAtt TargetGroup.TargetGroupArn

  ECSLaunchTemplate:
    Type: AWS::EC2::LaunchTemplate
    DependsOn: ECSCluster
    Properties:
      LaunchTemplateData:
        ImageId: !Ref LatestECSOptimizedAMI
        NetworkInterfaces:
          - AssociatePublicIpAddress: true
            DeviceIndex: 0
            DeleteOnTermination: true
            Groups: 
              - !Ref SecurityGroup
        InstanceType: !Ref ECSClusterInstanceType
        IamInstanceProfile:
          Arn: !GetAtt EcsInstanceProfile.Arn
        BlockDeviceMappings:
          - DeviceName: /dev/xvda
            Ebs:
              VolumeSize: '30'
        UserData: !Base64
          Fn::Sub:
            - |-
              #!/bin/bash
              echo ECS_CLUSTER=${ClusterName} >> /etc/ecs/ecs.config;
            - ClusterName: !Ref ECSClusterName
  
  ECSAutoScalingGroup:
    Type: AWS::AutoScaling::AutoScalingGroup
    DependsOn: ECSCluster
    Properties:
      MinSize: !Ref ClusterAutoScalingMinSize
      MaxSize: !Ref ClusterAutoScalingMaxSize
      DesiredCapacity: 0
      LaunchTemplate:
        LaunchTemplateId: !Ref ECSLaunchTemplate
        Version: !GetAtt ECSLaunchTemplate.LatestVersionNumber
      VPCZoneIdentifier:
        - !Ref PublicSubnet1
        - !Ref PublicSubnet2
      Tags:
        - Key: Name
          PropagateAtLaunch: true
          Value: !Join
            - ' - '
            - - ECS Instance
              - !Ref ECSClusterName
  
  ECSCluster:
    Type: AWS::ECS::Cluster
    DependsOn: EcsInstanceProfile
    Properties:
      ClusterName: !Ref ECSClusterName
      ClusterSettings:
        - Name: containerInsights
          Value: disabled
      Configuration:
        ExecuteCommandConfiguration:
          Logging: DEFAULT
      ServiceConnectDefaults:
        Namespace: !Ref ECSClusterName
      Tags: []
  
  ECSLogGroup:
    Type: AWS::Logs::LogGroup
    Properties:
      LogGroupName: !Sub /ecs/${ECSClusterName}
  
  EC2CapacityProvider:
    Type: AWS::ECS::CapacityProvider
    Properties:
      AutoScalingGroupProvider:
        AutoScalingGroupArn: !Ref ECSAutoScalingGroup
        ManagedScaling:
          Status: ENABLED
          TargetCapacity: 100
        ManagedTerminationProtection: DISABLED
  
  ClusterCPAssociation:
    Type: AWS::ECS::ClusterCapacityProviderAssociations
    DependsOn: ECSCluster
    Properties:
      Cluster: !Ref ECSClusterName
      CapacityProviders:
        - !Ref EC2CapacityProvider
      DefaultCapacityProviderStrategy:
        - Base: 0
          Weight: 1
          CapacityProvider: !Ref EC2CapacityProvider

  VPC:
    Type: AWS::EC2::VPC
    Properties:
      CidrBlock: !Ref VpcCidr
      EnableDnsSupport: true
      EnableDnsHostnames: true
      Tags:
        - Key: Name
          Value: !Sub "${VPCNamePrefix}-VPC"

  InternetGateway:
    Type: AWS::EC2::InternetGateway
    Properties:
      Tags:
        - Key: Name
          Value: !Sub "${VPCNamePrefix}-InternetGateway"

  AttachGateway:
    Type: AWS::EC2::VPCGatewayAttachment
    Properties:
      VpcId: !Ref VPC
      InternetGatewayId: !Ref InternetGateway

  PublicSubnet1:
    Type: AWS::EC2::Subnet
    Properties:
      VpcId: !Ref VPC
      CidrBlock: !Ref Subnet1Cidr
      MapPublicIpOnLaunch: true
      AvailabilityZone: !Select [0, !GetAZs ""]
      Tags:
        - Key: Name
          Value: !Sub "${VPCNamePrefix}-PublicSubnet1"

  PublicSubnet2:
    Type: AWS::EC2::Subnet
    Properties:
      VpcId: !Ref VPC
      CidrBlock: !Ref Subnet2Cidr
      MapPublicIpOnLaunch: true
      AvailabilityZone: !Select [1, !GetAZs ""]
      Tags:
        - Key: Name
          Value: !Sub "${VPCNamePrefix}-PublicSubnet2"

  RouteTable:
    Type: AWS::EC2::RouteTable
    Properties:
      VpcId: !Ref VPC
      Tags:
        - Key: Name
          Value: !Sub "${VPCNamePrefix}-PublicRouteTable"

  Route:
    Type: AWS::EC2::Route
    Properties:
      RouteTableId: !Ref RouteTable
      DestinationCidrBlock: 0.0.0.0/0
      GatewayId: !Ref InternetGateway

  SubnetRouteTableAssociation1:
    Type: AWS::EC2::SubnetRouteTableAssociation
    Properties:
      SubnetId: !Ref PublicSubnet1
      RouteTableId: !Ref RouteTable

  SubnetRouteTableAssociation2:
    Type: AWS::EC2::SubnetRouteTableAssociation
    Properties:
      SubnetId: !Ref PublicSubnet2
      RouteTableId: !Ref RouteTable

  SecurityGroup:
    Type: AWS::EC2::SecurityGroup
    Properties:
      GroupDescription: Allow HTTP
      VpcId: !Ref VPC
      SecurityGroupIngress:
        - IpProtocol: -1
          CidrIp: 0.0.0.0/0
      SecurityGroupEgress:
        - IpProtocol: -1
          CidrIp: 0.0.0.0/0
      Tags:
        - Key: Name
          Value: !Sub "${VPCNamePrefix}-PublicSecurityGroup"