From 27f46fa42412c0845981db965f261573e6603c50 Mon Sep 17 00:00:00 2001 From: Nikki Tebaldi <17799906+nikki-t@users.noreply.github.com> Date: Thu, 30 Nov 2023 14:51:05 +0000 Subject: [PATCH] Update OBPG query to include optional creation_date --- create_generic_download_list.py | 4 ++ download_list_creator_lambda.py | 3 +- .../startup_generic_download_list_creator.csh | 47 ++++++++++++------- terraform/download_list_creator-lambda.tf | 3 ++ terraform/variables.tf | 6 +++ 5 files changed, 45 insertions(+), 18 deletions(-) diff --git a/create_generic_download_list.py b/create_generic_download_list.py index 9145a3a..56390a5 100644 --- a/create_generic_download_list.py +++ b/create_generic_download_list.py @@ -844,6 +844,10 @@ def create_generic_download_list(search_dtype, # L2 else: # The content returned from the read() function is a large string with carriage return. + + # Search by creation date if OBPG_CREATION_DATE environment variable is set - this prevents the inclusion of files that have been modified + creation_date = int(os.getenv("CREATION_DATE_SEARCH")) + if creation_date: query_string += "&crdate=1" print(g_module_name + 'INFO:Executing query_string', query_string); #exit(0); diff --git a/download_list_creator_lambda.py b/download_list_creator_lambda.py index 26aeb30..49e2392 100644 --- a/download_list_creator_lambda.py +++ b/download_list_creator_lambda.py @@ -62,6 +62,7 @@ def event_handler(event, context): granule_start_date = event["granule_start_date"] granule_end_date = event["granule_end_date"] naming_pattern_indicator = event["naming_pattern_indicator"] if "naming_pattern_indicator" in event.keys() else "" + creation_date = event["creation_date"] if granule_start_date == "dummy" and granule_end_date == "dummy": year = f"{datetime.datetime.now().year}" else: @@ -93,7 +94,7 @@ def event_handler(event, context): try: subprocess.run([f"{lambda_task_root}/shell/startup_generic_download_list_creator.csh", \ search_pattern, output_directory, processing_type, processing_level, \ - state_file_name, num_days_back, txt_file_list, year, \ + state_file_name, num_days_back, txt_file_list, year, creation_date, \ granule_start_date, granule_end_date, naming_pattern_indicator], \ cwd=f"{lambda_task_root}/shell", check=True, stderr=PIPE) except subprocess.CalledProcessError as e: diff --git a/shell/startup_generic_download_list_creator.csh b/shell/startup_generic_download_list_creator.csh index 79d4925..e8f1cf4 100755 --- a/shell/startup_generic_download_list_creator.csh +++ b/shell/startup_generic_download_list_creator.csh @@ -83,11 +83,12 @@ endif # num_days_back = $6 How many days ago of file processing time do you want? If just starting out, run it with 3 or 4 manually and then reduced to 1 as part of the crontab when all files have been processed. # txt_file_list = $7 Name of text file that contains a list of text files generated from OBPG query # year = $8 The year to use in the search filter if granule start and end dates are set to 'dummy' +# creation_date = $9 Whether to use the creation date in the query to remove files with only modification timestamp changes # # For fetching specific granule start and end dates, we have provided 2 optional parameters. Which means the parameters state_file_name and num_days_back will not be used so any dummy parameters can be entered. # -# granule_start_date = $9 -# granule_end_date = $10 +# granule_start_date = $10 +# granule_end_date = $11 # # The format of the fields are 'yyyy-mm-dd' as in: # @@ -106,24 +107,25 @@ if ($debug_mode == 1) then echo "arg_6 [$6]" echo "arg_7 [$7]" echo "arg_8 [$8]" - if ($num_args >= 10) then - echo "arg_8 [$9]" - echo "arg_9 [$10]" + echo "arg_9 [$9]" + if ($num_args >= 11) then + echo "arg_8 [$10]" + echo "arg_9 [$11]" endif endif # Fetch the optional granule start and end dates. set granule_start_date = "" set granule_end_date = "" -if ($num_args >= 10) then - set granule_start_date = $9 - set granule_end_date = $10 +if ($num_args >= 11) then + set granule_start_date = $10 + set granule_end_date = $11 endif # Check for optional parameter to look for new names format. -if ($num_args >= 10) then -echo "11 [$11]" - if $11 == 'GHRSST_OBPG_USE_2019_NAMING_PATTERN_TRUE' then +if ($num_args >= 11) then +echo "12 [$12]" + if $12 == 'GHRSST_OBPG_USE_2019_NAMING_PATTERN_TRUE' then echo "11 is GHRSST_OBPG_USE_2019_NAMING_PATTERN_TRUE, setting GHRSST_OBPG_USE_2019_NAMING_PATTERN to true" setenv GHRSST_OBPG_USE_2019_NAMING_PATTERN true echo "GHRSST_OBPG_USE_2019_NAMING_PATTERN [$GHRSST_OBPG_USE_2019_NAMING_PATTERN]" @@ -166,19 +168,20 @@ set state_file_name = $5 set num_days_back = $6 set txt_file_list = $7 set year = $8 -set granule_start_date = $9 -set granule_end_date = $10 +set creation_date = $9 +set granule_start_date = $10 +set granule_end_date = $11 # Fetch the optional granule start and end dates. set granule_start_date = "" set granule_end_date = "" -if ($num_args >= 10) then - set granule_start_date = $9 - set granule_end_date = $10 +if ($num_args >= 11) then + set granule_start_date = $10 + set granule_end_date = $11 endif # Check for optional parameter and set to blanks if the dates parameters are dummy. -if ($num_args >= 11) then +if ($num_args >= 12) then # Set granule_start_date and granule_end_date back to empty string. if $granule_start_date == 'dummy' then set granule_start_date = "" @@ -203,6 +206,7 @@ if ($debug_mode == 1) then echo "search_pattern " "$search_pattern" echo "state_file_name " $state_file_name echo "year " $year + echo "creation_date " $creation_date echo "txt_file_list " $txt_file_list echo "granule_start_date " $granule_start_date echo "granule_end_date " $granule_end_date @@ -314,6 +318,15 @@ endif echo "ACTUAL SEARCH FILTER: $actual_filter" +# Set environment variable to indicate search by creation date +if ($creation_date) then + setenv CREATION_DATE_SEARCH 1 +else + setenv CREATION_DATE_SEARCH 0 +endif + +echo "startup_generic_downloader_job_index.csh - INFO: CREATION_DATE_SEARCH set to $CREATION_DATE_SEARCH" + # Reset the time zone back to GMT so we can have the correct current date when the Python script runs. setenv TZ GMT diff --git a/terraform/download_list_creator-lambda.tf b/terraform/download_list_creator-lambda.tf index e7a4a7e..bc1d58a 100644 --- a/terraform/download_list_creator-lambda.tf +++ b/terraform/download_list_creator-lambda.tf @@ -161,6 +161,7 @@ resource "aws_scheduler_schedule" "aws_schedule_dlc_aqua" { "granule_start_date" : "${var.granule_start_date}", "granule_end_date" : "${var.granule_end_date}", "naming_pattern_indicator" : "${var.naming_pattern_indicator}", + "creation_date" : "${var.creation_date}", "account" : "${local.account_id}", "region" : "${var.aws_region}", "prefix" : "${var.prefix}" @@ -187,6 +188,7 @@ resource "aws_scheduler_schedule" "aws_schedule_dlc_terra" { "granule_start_date" : "${var.granule_start_date}", "granule_end_date" : "${var.granule_end_date}", "naming_pattern_indicator" : "${var.naming_pattern_indicator}", + "creation_date" : "${var.creation_date}", "account" : "${local.account_id}", "region" : "${var.aws_region}", "prefix" : "${var.prefix}" @@ -213,6 +215,7 @@ resource "aws_scheduler_schedule" "aws_schedule_dlc_viirs" { "granule_start_date" : "${var.granule_start_date}", "granule_end_date" : "${var.granule_end_date}", "naming_pattern_indicator" : "${var.naming_pattern_indicator}", + "creation_date" : "${var.creation_date}", "account" : "${local.account_id}", "region" : "${var.aws_region}", "prefix" : "${var.prefix}" diff --git a/terraform/variables.tf b/terraform/variables.tf index 4e9f17e..584f11a 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -28,6 +28,12 @@ variable "aws_region" { default = "us-west-2" } +variable "creation_date" { + type = string + description = "Indicate whether OBPG query should use creation date" + default = "1" +} + variable "default_tags" { type = map(string) default = {}