From 0a4d44e342803e73bccab5a5f0acef6e9547d341 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 15 Oct 2024 08:30:49 -0400 Subject: [PATCH 1/9] Add options to benchmark scripts --- .../checkpoint_simulator.F90 | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 index 96bad4dfd6a5..0f7a6cff1048 100644 --- a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 +++ b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 @@ -58,6 +58,40 @@ module mapl_checkpoint_support_mod procedure :: reset end type + ! This will define the command line options we will use + ! The RC file currently has: + ! NX: 4 # NX and NY are the decomposition of each face of the cubed sphere + ! NY: 4 + ! IM_WORLD: 90 # the cubed-sphere resolution to write + ! LM: 137 # number of levels in each 3D variable + ! NUM_WRITERS: 1 # number of processes that will write (must be multiple of 6 + ! NUM_ARRAYS: 5 # number of 3D arrays to write + ! NTRIALS: 2 # number of trials + ! # the rest of these are optional + ! SPLIT_FILE: .false. # whether each process writes to it's own file or the same file default false + ! GATHER_3D: .false. # whether to gather a level at a time or full variables, default false + ! WRITE_BARRIER: .false. # put a barrier after the write + ! RANDOM_DATA: .true. # whether to put random data in the array to be written + ! DO_WRITES: .true. # whether to skip writing, so you can just time the MPI. default false + ! + ! We also want a new option to allow reading through an rc file + ! + type cli_options + integer :: nx + integer :: ny + integer :: im_world + integer :: lm + integer :: num_writers + integer :: num_arrays + integer :: n_trials + logical :: split_file = .false. + logical :: gather_3d = .false. + logical :: write_barrier = .false. + logical :: random_data = .true. + logical :: do_writes = .true. + character(len=:), allocatable :: config_file + end type cli_options + contains subroutine set_parameters(this,config_file) @@ -679,6 +713,7 @@ program checkpoint_tester use mapl_checkpoint_support_mod use MPI use NetCDF + use fargparse use, intrinsic :: iso_fortran_env, only: REAL64, INT64 implicit NONE @@ -692,6 +727,10 @@ program checkpoint_tester real(kind=REAL64) :: mean_throughput, mean_fs_throughput real(kind=REAL64) :: std_throughput, std_fs_throughput + type(ArgParser), target :: parser + type(StringUnlimitedMap) :: options + type(cli_options) :: cli + call system_clock(count=start_app,count_rate=count_rate) call MPI_Init(status) _VERIFY(status) @@ -706,6 +745,81 @@ program checkpoint_tester call MPI_Barrier(MPI_COMM_WORLD,status) _VERIFY(status) + parser = ArgParser() + + call parser%add_argument("--nx", & + help="The number of cells in the x direction", & + action="store", & + type="integer", & + default=4) + + call parser%add_argument("--ny", & + help="The number of cells in the y direction", & + action="store", & + type="integer", & + default=4) + + call parser%add_argument("--im_world", & + help="The resolution of the cubed sphere", & + action="store", & + type="integer", & + default=90) + + call parser%add_argument("--lm", & + help="The number of levels in each 3D variable", & + action="store", & + type="integer", & + default=137) + + call parser%add_argument("--num_writers", & + help="The number of processes that will write", & + action="store", & + type="integer", & + default=1) + + call parser%add_argument("--num_arrays", & + help="The number of 3D arrays to write", & + action="store", & + type="integer", & + default=5) + + call parser%add_argument("--ntrials", & + help="The number of trials to run", & + action="store", & + type="integer", & + default=3) + + call parser%add_argument("--split_file", & + help="Split the file into multiple files", & + action="store_true", & + default=.false.) + + call parser%add_argument("--gather_3d", & + help="Gather 3D data", & + action="store_true", & + default=.false.) + + call parser%add_argument("--write_barrier", & + help="Add a write barrier", & + action="store_true", & + default=.false.) + + call parser%add_argument("--random_data", & + help="Use random data", & + action="store_true", & + default=.true.) + + call parser%add_argument("--do_writes", & + help="Write data", & + action="store_true", & + default=.true.) + + call parser%add_argument("--config_file", & + help="The configuration file to use", & + action="store", & + type="string", & + default="*") + call support%set_parameters("checkpoint_benchmark.rc") call MPI_Barrier(MPI_COMM_WORLD,status) _VERIFY(status) From e1b572a3f8db56361aeb432c579f99863febf638 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 15 Oct 2024 09:02:54 -0400 Subject: [PATCH 2/9] More fixes --- .../checkpoint_simulator.F90 | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 index 0f7a6cff1048..53a6716df583 100644 --- a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 +++ b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 @@ -730,6 +730,7 @@ program checkpoint_tester type(ArgParser), target :: parser type(StringUnlimitedMap) :: options type(cli_options) :: cli + class(*), pointer :: option call system_clock(count=start_app,count_rate=count_rate) call MPI_Init(status) @@ -745,6 +746,7 @@ program checkpoint_tester call MPI_Barrier(MPI_COMM_WORLD,status) _VERIFY(status) + call parser%initialize() parser = ArgParser() call parser%add_argument("--nx", & @@ -820,6 +822,48 @@ program checkpoint_tester type="string", & default="*") + option => options%at("nx") + if (associated(option)) call cast(option, cli%nx) + + option => options%at("ny") + if (associated(option)) call cast(option, cli%ny) + + option => options%at("im_world") + if (associated(option)) call cast(option, cli%im_world) + + option => options%at("lm") + if (associated(option)) call cast(option, cli%lm) + + option => options%at("num_writers") + if (associated(option)) call cast(option, cli%num_writers) + + option => options%at("num_arrays") + if (associated(option)) call cast(option, cli%num_arrays) + + option => options%at("ntrials") + if (associated(option)) call cast(option, cli%n_trials) + + option => options%at("split_file") + if (associated(option)) call cast(option, cli%split_file) + + option => options%at("gather_3d") + if (associated(option)) call cast(option, cli%gather_3d) + + option => options%at("write_barrier") + if (associated(option)) call cast(option, cli%write_barrier) + + option => options%at("random_data") + if (associated(option)) call cast(option, cli%random_data) + + option => options%at("do_writes") + if (associated(option)) call cast(option, cli%do_writes) + + option => options%at("config_file") + if (associated(option)) call cast(option, cli%config_file) + + + + call support%set_parameters("checkpoint_benchmark.rc") call MPI_Barrier(MPI_COMM_WORLD,status) _VERIFY(status) From 53ef0e1c10d2a15fa91c906402f7fe5055047f63 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 15 Oct 2024 09:21:20 -0400 Subject: [PATCH 3/9] More fixes 2 --- .../checkpoint_simulator.F90 | 107 +++++++++++++----- 1 file changed, 77 insertions(+), 30 deletions(-) diff --git a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 index 53a6716df583..fa8a97e3704b 100644 --- a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 +++ b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 @@ -45,7 +45,8 @@ module mapl_checkpoint_support_mod integer(kind=INT64) :: create_file_time integer(kind=INT64) :: close_file_time contains - procedure :: set_parameters + procedure :: set_parameters_by_config + procedure :: set_parameters_by_cli procedure :: compute_decomposition procedure :: allocate_n_arrays procedure :: create_arrays @@ -94,7 +95,7 @@ module mapl_checkpoint_support_mod contains - subroutine set_parameters(this,config_file) + subroutine set_parameters_by_config(this,config_file) class(test_support), intent(inout) :: this character(len=*), intent(in) :: config_file type(ESMF_Config) :: config @@ -170,7 +171,46 @@ function get_integer_key(config,label,default_val) result(val) end if end function - end subroutine + end subroutine set_parameters_by_config + + subroutine set_parameters_by_cli(this,cli) + class(test_support), intent(inout) :: this + type(cli_options), intent(in) :: cli + + logical :: is_present + integer :: comm_size, status,error_code,rc + + this%extra_info = .false. + this%write_barrier = cli%write_barrier + this%do_writes = cli%do_writes + this%do_chunking = .true. + this%gather_3d = cli%gather_3d + this%split_file = cli%split_file + this%nx = cli%nx + this%ny = cli%ny + this%im_world = cli%im_world + this%lm = cli%lm + this%num_writers = cli%num_writers + this%num_arrays = cli%num_arrays + this%n_trials = cli%n_trials + this%random = cli%random_data + + this%write_counter = 0 + this%write_3d_time = 0 + this%write_2d_time = 0 + this%create_file_time = 0 + this%close_file_time = 0 + this%data_volume = 0.d0 + this%time_writing = 0.d0 + this%mpi_time = 0.0 + call MPI_COMM_SIZE(MPI_COMM_WORLD,comm_size,status) + _VERIFY(status) + if (comm_size /= (this%nx*this%ny*6)) then + call MPI_Abort(mpi_comm_world,error_code,status) + _VERIFY(status) + endif + + end subroutine set_parameters_by_cli subroutine reset(this) class(test_support), intent(inout) :: this @@ -822,49 +862,56 @@ program checkpoint_tester type="string", & default="*") - option => options%at("nx") - if (associated(option)) call cast(option, cli%nx) + ! We first look for a configuration file + option => options%at("config_file") + if (associated(option)) call cast(option, cli%config_file) - option => options%at("ny") - if (associated(option)) call cast(option, cli%ny) + ! if we have it, we load the configuration file + if (cli%config_file /= "*") then + call support%set_parameters_by_config(cli%config_file) + else - option => options%at("im_world") - if (associated(option)) call cast(option, cli%im_world) + option => options%at("nx") + if (associated(option)) call cast(option, cli%nx) - option => options%at("lm") - if (associated(option)) call cast(option, cli%lm) + option => options%at("ny") + if (associated(option)) call cast(option, cli%ny) - option => options%at("num_writers") - if (associated(option)) call cast(option, cli%num_writers) + option => options%at("im_world") + if (associated(option)) call cast(option, cli%im_world) - option => options%at("num_arrays") - if (associated(option)) call cast(option, cli%num_arrays) + option => options%at("lm") + if (associated(option)) call cast(option, cli%lm) - option => options%at("ntrials") - if (associated(option)) call cast(option, cli%n_trials) + option => options%at("num_writers") + if (associated(option)) call cast(option, cli%num_writers) - option => options%at("split_file") - if (associated(option)) call cast(option, cli%split_file) + option => options%at("num_arrays") + if (associated(option)) call cast(option, cli%num_arrays) - option => options%at("gather_3d") - if (associated(option)) call cast(option, cli%gather_3d) + option => options%at("ntrials") + if (associated(option)) call cast(option, cli%n_trials) - option => options%at("write_barrier") - if (associated(option)) call cast(option, cli%write_barrier) + option => options%at("split_file") + if (associated(option)) call cast(option, cli%split_file) - option => options%at("random_data") - if (associated(option)) call cast(option, cli%random_data) + option => options%at("gather_3d") + if (associated(option)) call cast(option, cli%gather_3d) - option => options%at("do_writes") - if (associated(option)) call cast(option, cli%do_writes) + option => options%at("write_barrier") + if (associated(option)) call cast(option, cli%write_barrier) - option => options%at("config_file") - if (associated(option)) call cast(option, cli%config_file) + option => options%at("random_data") + if (associated(option)) call cast(option, cli%random_data) + option => options%at("do_writes") + if (associated(option)) call cast(option, cli%do_writes) + call support%set_parameters_by_cli(cli) + end if - call support%set_parameters("checkpoint_benchmark.rc") + !call support%set_parameters("checkpoint_benchmark.rc") call MPI_Barrier(MPI_COMM_WORLD,status) _VERIFY(status) From fd473ee5891d9196eac691ea929c38739e53b681 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 15 Oct 2024 09:48:25 -0400 Subject: [PATCH 4/9] Add verbose --- .../checkpoint_simulator.F90 | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 index fa8a97e3704b..b7e2cac7438b 100644 --- a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 +++ b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 @@ -771,6 +771,7 @@ program checkpoint_tester type(StringUnlimitedMap) :: options type(cli_options) :: cli class(*), pointer :: option + logical :: verbose call system_clock(count=start_app,count_rate=count_rate) call MPI_Init(status) @@ -786,9 +787,19 @@ program checkpoint_tester call MPI_Barrier(MPI_COMM_WORLD,status) _VERIFY(status) - call parser%initialize() + call parser%initialize('checkpoint_simulator.x') parser = ArgParser() + call parser%add_argument("--config_file", & + help="The configuration file to use", & + action="store", & + type="string") + + call parser%add_argument("--verbose", & + help="Be verbose", & + action="store_true", & + default=.false.) + call parser%add_argument("--nx", & help="The number of cells in the x direction", & action="store", & @@ -856,23 +867,25 @@ program checkpoint_tester action="store_true", & default=.true.) - call parser%add_argument("--config_file", & - help="The configuration file to use", & - action="store", & - type="string", & - default="*") + options = parser%parse_args() + + option => options%at("verbose") + if (associated(option)) call cast(option, verbose) ! We first look for a configuration file option => options%at("config_file") + write(*,*) "config_file: associated(option) = ",associated(option) if (associated(option)) call cast(option, cli%config_file) ! if we have it, we load the configuration file - if (cli%config_file /= "*") then + if (allocated(cli%config_file)) then call support%set_parameters_by_config(cli%config_file) else option => options%at("nx") + write(*,*) "nx: associated(option) = ",associated(option) if (associated(option)) call cast(option, cli%nx) + write(*,*) "cli%nx = ",cli%nx option => options%at("ny") if (associated(option)) call cast(option, cli%ny) From c6285208a2141e0ccce5f2e400e9df82ab2ffc93 Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 15 Oct 2024 12:04:40 -0400 Subject: [PATCH 5/9] Add netcdf_writes --- .../checkpoint_simulator.F90 | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 index b7e2cac7438b..be344f4bf579 100644 --- a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 +++ b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 @@ -90,6 +90,7 @@ module mapl_checkpoint_support_mod logical :: write_barrier = .false. logical :: random_data = .true. logical :: do_writes = .true. + logical :: netcdf_writes = .true. character(len=:), allocatable :: config_file end type cli_options @@ -183,6 +184,7 @@ subroutine set_parameters_by_cli(this,cli) this%extra_info = .false. this%write_barrier = cli%write_barrier this%do_writes = cli%do_writes + this%netcdf_writes = cli%netcdf_writes this%do_chunking = .true. this%gather_3d = cli%gather_3d this%split_file = cli%split_file @@ -209,6 +211,7 @@ subroutine set_parameters_by_cli(this,cli) call MPI_Abort(mpi_comm_world,error_code,status) _VERIFY(status) endif + write (*,*) "comm_size: ", comm_size end subroutine set_parameters_by_cli @@ -867,6 +870,11 @@ program checkpoint_tester action="store_true", & default=.true.) + call parser%add_argument("--netcdf_writes", & + help="Write data as netcdf", & + action="store_true", & + default=.true.) + options = parser%parse_args() option => options%at("verbose") @@ -874,51 +882,61 @@ program checkpoint_tester ! We first look for a configuration file option => options%at("config_file") - write(*,*) "config_file: associated(option) = ",associated(option) if (associated(option)) call cast(option, cli%config_file) ! if we have it, we load the configuration file if (allocated(cli%config_file)) then + if (verbose .and. rank == 0) write(*,*) "Using configuration file ",cli%config_file call support%set_parameters_by_config(cli%config_file) else option => options%at("nx") - write(*,*) "nx: associated(option) = ",associated(option) if (associated(option)) call cast(option, cli%nx) - write(*,*) "cli%nx = ",cli%nx + if (verbose .and. rank == 0) write(*,*) "nx = ",cli%nx option => options%at("ny") if (associated(option)) call cast(option, cli%ny) + if (verbose .and. rank == 0) write(*,*) "ny = ",cli%ny option => options%at("im_world") if (associated(option)) call cast(option, cli%im_world) + if (verbose .and. rank == 0) write(*,*) "im_world = ",cli%im_world option => options%at("lm") if (associated(option)) call cast(option, cli%lm) + if (verbose .and. rank == 0) write(*,*) "lm = ",cli%lm option => options%at("num_writers") if (associated(option)) call cast(option, cli%num_writers) + if (verbose .and. rank == 0) write(*,*) "num_writers = ",cli%num_writers option => options%at("num_arrays") if (associated(option)) call cast(option, cli%num_arrays) + if (verbose .and. rank == 0) write(*,*) "num_arrays = ",cli%num_arrays option => options%at("ntrials") if (associated(option)) call cast(option, cli%n_trials) + if (verbose .and. rank == 0) write(*,*) "n_trials = ",cli%n_trials option => options%at("split_file") if (associated(option)) call cast(option, cli%split_file) + if (verbose .and. rank == 0) write(*,*) "split_file = ",cli%split_file option => options%at("gather_3d") if (associated(option)) call cast(option, cli%gather_3d) + if (verbose .and. rank == 0) write(*,*) "gather_3d = ",cli%gather_3d option => options%at("write_barrier") if (associated(option)) call cast(option, cli%write_barrier) + if (verbose .and. rank == 0) write(*,*) "write_barrier = ",cli%write_barrier option => options%at("random_data") if (associated(option)) call cast(option, cli%random_data) + if (verbose .and. rank == 0) write(*,*) "random_data = ",cli%random_data option => options%at("do_writes") if (associated(option)) call cast(option, cli%do_writes) + if (verbose .and. rank == 0) write(*,*) "do_writes = ",cli%do_writes call support%set_parameters_by_cli(cli) From 2f834befaf377bb97169e933e3ea6b5013b474ad Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Tue, 15 Oct 2024 13:18:50 -0400 Subject: [PATCH 6/9] Clean up --- benchmarks/io/checkpoint_simulator/README.md | 37 +- .../checkpoint_simulator.F90 | 344 +++++++++--------- benchmarks/io/restart_simulator/README.md | 40 +- .../restart_simulator/restart_simulator.F90 | 322 +++++++++++++++- 4 files changed, 542 insertions(+), 201 deletions(-) diff --git a/benchmarks/io/checkpoint_simulator/README.md b/benchmarks/io/checkpoint_simulator/README.md index 4466e69af71f..c74a048512c2 100644 --- a/benchmarks/io/checkpoint_simulator/README.md +++ b/benchmarks/io/checkpoint_simulator/README.md @@ -1,6 +1,27 @@ This benchmark simulates writing a series of 3D variables of a given cubed-sphere resolution to a file using the same strategies as used by the real checkpoint code in MAPL -The code has the following options and needs an ESMF rc file named checkpoint\_benchmark.rc +The code has the following command line options: +``` + optional arguments: + -h, --help This message. + --config_file The configuration file to use + --nx The number of cells in the x direction (default=4) + --ny The number of cells in the y direction (default=4) + --im_world The resolution of the cubed sphere (default=90) + --lm The number of levels in each 3D variable (default=137) + --num_writers The number of processes that will write (default=1) + --num_arrays The number of 3D arrays to write (default=5) + --ntrials The number of trials to run (default=3) + --split_file Split the file into multiple files (default=False) + --gather_3d Gather 3D data (default=False) + --write_barrier Add a write barrier (default=False) + --no_random_data Do not use random data (default=False) + --do_no_writes Do not write data (default=False) + --no_netcdf_writes Do not write data as netcdf (default=False) + --no_chunking Do not chunk (default=False) +``` + +NOTE 1: If you specify a `config_file` it must be an ESMF Config file with the following options: - "NX:" the x distribution for each face - "NY:" the y distribution for each face @@ -8,12 +29,12 @@ The code has the following options and needs an ESMF rc file named checkpoint\_b - "LM:" the number of levels - "NUM\_WRITERS:" the number of writing processes either to a single or independent files - "NUM\_ARRAYS:" the number of 3D variables to write to the file -- "CHUNK:" whether to chunk, default true -- "GATHER\_3D:" gather all levels at once (default is false which means a level at a time is gathered) -- "SPLIT\_FILE:" default false, if true, each writer writes to and independent file -- "WRITE\_BARRIER:" default false, add a barrier before each write to for synchronization -- "DO\_WRITES:" default true, if false skips writing (so just an mpi test at that point) +- "CHUNK:" whether to chunk, default `.true.` +- "GATHER\_3D:" gather all levels at once (default is `.false.` which means a level at a time is gathered) +- "SPLIT\_FILE:" default `.false.`, if `.true.`, each writer writes to and independent file +- "WRITE\_BARRIER:" default `.false.`, add a barrier before each write to for synchronization +- "DO\_WRITES:" default `.true.`, if `.false.` skips writing (so just an mpi test at that point) - "NTRIALS:" default 1, the number of trials to make writing -- "RANDOM\_DATA:" default true, if true will arrays with random data, if false sets the array to the rank of the process +- "RANDOM\_DATA:" default `.true.`, if `.true.` will arrays with random data, if `.false.` sets the array to the rank of the process -Note that whatever you set NX and NY to the program must be run on `6*NX*NY` processors and the number of writers must evenly divide `6*NY` +NOTE 2: that whatever you set NX and NY to the program must be run on `6*NX*NY` processors and the number of writers must evenly divide `6*NY` diff --git a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 index be344f4bf579..5b213d271b57 100644 --- a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 +++ b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 @@ -6,6 +6,7 @@ module mapl_checkpoint_support_mod use MPI use NetCDF use MAPL_ErrorHandlingMod + use fargparse use, intrinsic :: iso_fortran_env, only: INT64, REAL64, REAL32 implicit none @@ -59,24 +60,6 @@ module mapl_checkpoint_support_mod procedure :: reset end type - ! This will define the command line options we will use - ! The RC file currently has: - ! NX: 4 # NX and NY are the decomposition of each face of the cubed sphere - ! NY: 4 - ! IM_WORLD: 90 # the cubed-sphere resolution to write - ! LM: 137 # number of levels in each 3D variable - ! NUM_WRITERS: 1 # number of processes that will write (must be multiple of 6 - ! NUM_ARRAYS: 5 # number of 3D arrays to write - ! NTRIALS: 2 # number of trials - ! # the rest of these are optional - ! SPLIT_FILE: .false. # whether each process writes to it's own file or the same file default false - ! GATHER_3D: .false. # whether to gather a level at a time or full variables, default false - ! WRITE_BARRIER: .false. # put a barrier after the write - ! RANDOM_DATA: .true. # whether to put random data in the array to be written - ! DO_WRITES: .true. # whether to skip writing, so you can just time the MPI. default false - ! - ! We also want a new option to allow reading through an rc file - ! type cli_options integer :: nx integer :: ny @@ -91,11 +74,163 @@ module mapl_checkpoint_support_mod logical :: random_data = .true. logical :: do_writes = .true. logical :: netcdf_writes = .true. + logical :: do_chunking = .true. character(len=:), allocatable :: config_file end type cli_options contains + function parse_arguments() result(options) + + type(StringUnlimitedMap) :: options + type(ArgParser), target :: parser + + call parser%initialize('checkpoint_simulator.x') + parser = ArgParser() + + call parser%add_argument("--config_file", & + help="The configuration file to use", & + action="store", & + type="string") + + call parser%add_argument("--nx", & + help="The number of cells in the x direction (default=4)", & + action="store", & + type="integer", & + default=4) + + call parser%add_argument("--ny", & + help="The number of cells in the y direction (default=4)", & + action="store", & + type="integer", & + default=4) + + call parser%add_argument("--im_world", & + help="The resolution of the cubed sphere (default=90)", & + action="store", & + type="integer", & + default=90) + + call parser%add_argument("--lm", & + help="The number of levels in each 3D variable (default=137)", & + action="store", & + type="integer", & + default=137) + + call parser%add_argument("--num_writers", & + help="The number of processes that will write (default=1)", & + action="store", & + type="integer", & + default=1) + + call parser%add_argument("--num_arrays", & + help="The number of 3D arrays to write (default=5)", & + action="store", & + type="integer", & + default=5) + + call parser%add_argument("--ntrials", & + help="The number of trials to run (default=3)", & + action="store", & + type="integer", & + default=3) + + call parser%add_argument("--split_file", & + help="Split the file into multiple files (default=False)", & + action="store_true", & + default=.false.) + + call parser%add_argument("--gather_3d", & + help="Gather 3D data (default=False)", & + action="store_true", & + default=.false.) + + call parser%add_argument("--write_barrier", & + help="Add a write barrier (default=False)", & + action="store_true", & + default=.false.) + + call parser%add_argument("--no_random_data", & + help="Do not use random data (default=False)", & + action="store_true", & + default=.False.) + + call parser%add_argument("--do_no_writes", & + help="Do not write data (default=False)", & + action="store_true", & + default=.False.) + + call parser%add_argument("--no_netcdf_writes", & + help="Do not write data as netcdf (default=False)", & + action="store_true", & + default=.false.) + + call parser%add_argument("--no_chunking", & + help="Do not chunk (default=False)", & + action="store_true", & + default=.false.) + + options = parser%parse_args() + + end function parse_arguments + + subroutine get_cli_options(options, cli) + type(StringUnlimitedMap), intent(in) :: options + type(cli_options), intent(out) :: cli + class(*), pointer :: option + logical :: tmp + + option => options%at("config_file") + if (associated(option)) call cast(option, cli%config_file) + + option => options%at("nx") + if (associated(option)) call cast(option, cli%nx) + + option => options%at("ny") + if (associated(option)) call cast(option, cli%ny) + + option => options%at("im_world") + if (associated(option)) call cast(option, cli%im_world) + + option => options%at("lm") + if (associated(option)) call cast(option, cli%lm) + + option => options%at("num_writers") + if (associated(option)) call cast(option, cli%num_writers) + + option => options%at("num_arrays") + if (associated(option)) call cast(option, cli%num_arrays) + + option => options%at("ntrials") + if (associated(option)) call cast(option, cli%n_trials) + + option => options%at("split_file") + if (associated(option)) call cast(option, cli%split_file) + + option => options%at("gather_3d") + if (associated(option)) call cast(option, cli%gather_3d) + + option => options%at("write_barrier") + if (associated(option)) call cast(option, cli%write_barrier) + + option => options%at("no_random_data") + if (associated(option)) call cast(option, tmp) + cli%random_data = .not. tmp + + option => options%at("do_no_writes") + if (associated(option)) call cast(option, tmp) + cli%do_writes = .not. tmp + + option => options%at("no_netcdf_writes") + if (associated(option)) call cast(option, tmp) + cli%netcdf_writes = .not. tmp + + option => options%at("no_chunking") + if (associated(option)) call cast(option, tmp) + cli%do_chunking = .not. tmp + + end subroutine get_cli_options + subroutine set_parameters_by_config(this,config_file) class(test_support), intent(inout) :: this character(len=*), intent(in) :: config_file @@ -185,7 +320,7 @@ subroutine set_parameters_by_cli(this,cli) this%write_barrier = cli%write_barrier this%do_writes = cli%do_writes this%netcdf_writes = cli%netcdf_writes - this%do_chunking = .true. + this%do_chunking = cli%do_chunking this%gather_3d = cli%gather_3d this%split_file = cli%split_file this%nx = cli%nx @@ -211,7 +346,6 @@ subroutine set_parameters_by_cli(this,cli) call MPI_Abort(mpi_comm_world,error_code,status) _VERIFY(status) endif - write (*,*) "comm_size: ", comm_size end subroutine set_parameters_by_cli @@ -433,20 +567,25 @@ subroutine create_file(this) write(fc,'(I0.3)')writer_rank fname = "checkpoint_"//fc//".nc4" status = nf90_create(fname,ior(NF90_NETCDF4,NF90_CLOBBER), this%ncid) + _VERIFY(status) chunk_factor = 1 else fname = "checkpoint.nc4" status = nf90_create(fname,create_mode, this%ncid, comm=this%writers_comm, info=info) + _VERIFY(status) chunk_factor = this%num_writers end if status = nf90_def_dim(this%ncid,"lon",this%im_world,xdim) + _VERIFY(status) if (this%split_file) then y_size = this%im_world*6/this%num_writers else y_size = this%im_world*6 end if status = nf90_def_dim(this%ncid,"lat",y_size,ydim) + _VERIFY(status) status = nf90_def_dim(this%ncid,"lev",this%lm,zdim) + _VERIFY(status) if (this%gather_3d) then z_chunk = this%lm else @@ -455,11 +594,15 @@ subroutine create_file(this) do i=1,this%num_arrays if (this%do_chunking) then status = nf90_def_var(this%ncid,this%bundle(i)%field_name,NF90_FLOAT,[xdim,ydim,zdim],varid,chunksizes=[this%im_world,y_size/chunk_factor,z_chunk]) + _VERIFY(status) else status = nf90_def_var(this%ncid,this%bundle(i)%field_name,NF90_FLOAT,[xdim,ydim,zdim],varid) + _VERIFY(status) end if status = nf90_def_var_fill(this%ncid,varid,NF90_NOFILL,0) + _VERIFY(status) !status = nf90_var_par_access(this%ncid,varid,NF90_COLLECTIVE) ! you can turn this on if you really want to hork up performance + !_VERIFY(status) enddo status = nf90_enddef(this%ncid) end if @@ -558,15 +701,19 @@ subroutine write_variable(this,var_name,local_var) jsize=jsize + (this%jn(myrow+j) - this%j1(myrow+j) + 1) enddo allocate(VAR(IM_WORLD,jsize,this%lm), stat=status) + _VERIFY(status) allocate(recvbuf(IM_WORLD*jsize*this%lm), stat=status) + _VERIFY(status) end if if(myiorank/=0) then allocate(recvbuf(0), stat=status) + _VERIFY(status) endif call mpi_gatherv( local_var, size(local_var), MPI_REAL, recvbuf, recvcounts, displs, MPI_REAL, & 0, this%gather_comm, status ) + _VERIFY(status) call system_clock(count=end_mpi) this%time_mpi = this%mpi_time + (end_mpi - start_mpi) if (this%write_barrier) then @@ -609,7 +756,9 @@ subroutine write_variable(this,var_name,local_var) if (this%do_writes) then if (this%netcdf_writes) then status = nf90_inq_varid(this%ncid,name=var_name ,varid=varid) + _VERIFY(status) status = nf90_put_var(this%ncid,varid,var,start,cnt) + _VERIFY(status) else write(this%ncid)var end if @@ -726,7 +875,9 @@ subroutine write_level(this,var_name,local_var,z_index) if (this%do_writes) then if (this%netcdf_writes) then status = nf90_inq_varid(this%ncid,name=var_name ,varid=varid) + _VERIFY(status) status = nf90_put_var(this%ncid,varid,var,start,cnt) + _VERIFY(status) else write(this%ncid)var end if @@ -770,11 +921,8 @@ program checkpoint_tester real(kind=REAL64) :: mean_throughput, mean_fs_throughput real(kind=REAL64) :: std_throughput, std_fs_throughput - type(ArgParser), target :: parser type(StringUnlimitedMap) :: options type(cli_options) :: cli - class(*), pointer :: option - logical :: verbose call system_clock(count=start_app,count_rate=count_rate) call MPI_Init(status) @@ -790,159 +938,19 @@ program checkpoint_tester call MPI_Barrier(MPI_COMM_WORLD,status) _VERIFY(status) - call parser%initialize('checkpoint_simulator.x') - parser = ArgParser() - - call parser%add_argument("--config_file", & - help="The configuration file to use", & - action="store", & - type="string") - - call parser%add_argument("--verbose", & - help="Be verbose", & - action="store_true", & - default=.false.) - - call parser%add_argument("--nx", & - help="The number of cells in the x direction", & - action="store", & - type="integer", & - default=4) - - call parser%add_argument("--ny", & - help="The number of cells in the y direction", & - action="store", & - type="integer", & - default=4) - - call parser%add_argument("--im_world", & - help="The resolution of the cubed sphere", & - action="store", & - type="integer", & - default=90) - - call parser%add_argument("--lm", & - help="The number of levels in each 3D variable", & - action="store", & - type="integer", & - default=137) - - call parser%add_argument("--num_writers", & - help="The number of processes that will write", & - action="store", & - type="integer", & - default=1) - - call parser%add_argument("--num_arrays", & - help="The number of 3D arrays to write", & - action="store", & - type="integer", & - default=5) - - call parser%add_argument("--ntrials", & - help="The number of trials to run", & - action="store", & - type="integer", & - default=3) - - call parser%add_argument("--split_file", & - help="Split the file into multiple files", & - action="store_true", & - default=.false.) - - call parser%add_argument("--gather_3d", & - help="Gather 3D data", & - action="store_true", & - default=.false.) - - call parser%add_argument("--write_barrier", & - help="Add a write barrier", & - action="store_true", & - default=.false.) - - call parser%add_argument("--random_data", & - help="Use random data", & - action="store_true", & - default=.true.) - - call parser%add_argument("--do_writes", & - help="Write data", & - action="store_true", & - default=.true.) - - call parser%add_argument("--netcdf_writes", & - help="Write data as netcdf", & - action="store_true", & - default=.true.) - - options = parser%parse_args() - - option => options%at("verbose") - if (associated(option)) call cast(option, verbose) - - ! We first look for a configuration file - option => options%at("config_file") - if (associated(option)) call cast(option, cli%config_file) + options = parse_arguments() + + call get_cli_options(options,cli) ! if we have it, we load the configuration file if (allocated(cli%config_file)) then - if (verbose .and. rank == 0) write(*,*) "Using configuration file ",cli%config_file + if (rank == 0) write(*,*) "Using configuration file ",cli%config_file + if (rank == 0) write(*,*) "NOTE: This overrides any other command line options" call support%set_parameters_by_config(cli%config_file) else - - option => options%at("nx") - if (associated(option)) call cast(option, cli%nx) - if (verbose .and. rank == 0) write(*,*) "nx = ",cli%nx - - option => options%at("ny") - if (associated(option)) call cast(option, cli%ny) - if (verbose .and. rank == 0) write(*,*) "ny = ",cli%ny - - option => options%at("im_world") - if (associated(option)) call cast(option, cli%im_world) - if (verbose .and. rank == 0) write(*,*) "im_world = ",cli%im_world - - option => options%at("lm") - if (associated(option)) call cast(option, cli%lm) - if (verbose .and. rank == 0) write(*,*) "lm = ",cli%lm - - option => options%at("num_writers") - if (associated(option)) call cast(option, cli%num_writers) - if (verbose .and. rank == 0) write(*,*) "num_writers = ",cli%num_writers - - option => options%at("num_arrays") - if (associated(option)) call cast(option, cli%num_arrays) - if (verbose .and. rank == 0) write(*,*) "num_arrays = ",cli%num_arrays - - option => options%at("ntrials") - if (associated(option)) call cast(option, cli%n_trials) - if (verbose .and. rank == 0) write(*,*) "n_trials = ",cli%n_trials - - option => options%at("split_file") - if (associated(option)) call cast(option, cli%split_file) - if (verbose .and. rank == 0) write(*,*) "split_file = ",cli%split_file - - option => options%at("gather_3d") - if (associated(option)) call cast(option, cli%gather_3d) - if (verbose .and. rank == 0) write(*,*) "gather_3d = ",cli%gather_3d - - option => options%at("write_barrier") - if (associated(option)) call cast(option, cli%write_barrier) - if (verbose .and. rank == 0) write(*,*) "write_barrier = ",cli%write_barrier - - option => options%at("random_data") - if (associated(option)) call cast(option, cli%random_data) - if (verbose .and. rank == 0) write(*,*) "random_data = ",cli%random_data - - option => options%at("do_writes") - if (associated(option)) call cast(option, cli%do_writes) - if (verbose .and. rank == 0) write(*,*) "do_writes = ",cli%do_writes - call support%set_parameters_by_cli(cli) - end if - !call support%set_parameters("checkpoint_benchmark.rc") call MPI_Barrier(MPI_COMM_WORLD,status) _VERIFY(status) diff --git a/benchmarks/io/restart_simulator/README.md b/benchmarks/io/restart_simulator/README.md index 3152425b0575..a5e31dd72dce 100644 --- a/benchmarks/io/restart_simulator/README.md +++ b/benchmarks/io/restart_simulator/README.md @@ -1,19 +1,41 @@ This benchmark simulates writing a series of 3D variables of a given cubed-sphere resolution to a file using the same strategies as used by the real checkpoint code in MAPL -The code has the following options and needs an ESMF rc file named checkpoint\_benchmark.rc +The code has the following command line options: + +``` + -h, --help This message. + --config_file The configuration file to use + --nx The number of cells in the x direction (default=4) + --ny The number of cells in the y direction (default=4) + --im_world The resolution of the cubed sphere (default=90) + --lm The number of levels in each 3D variable (default=137) + --num_readers The number of processes that will read (default=1) + --num_arrays The number of 3D arrays to read (default=5) + --ntrials The number of trials to run (default=3) + --split_file Split the file into multiple files (default=False) + --scatter_3d Scatter 3D data (default=False) + --read_barrier Add a read barrier (default=False) + --no_random_data Do not random data (default=False) + --do_no_reads Do not read data (default=False) + --no_netcdf_reads Do not read data as netcdf (default=False) +``` + +NOTE 1: This program *REQUIRES* a file called `checkpoint.nc4` that is generated by the `checkpoint_benchmark.x` code + +NOTE 2: If you specify a `config_file` it must be an ESMF Config file with the following options: - "NX:" the x distribution for each face - "NY:" the y distribution for each face - "IM\_WORLD:" the cube resolution -- "LM:" the nubmer of levels +- "LM:" the number of levels - "NUM\_WRITERS:" the number of writing processes either to a single or independent files - "NUM\_ARRAYS:" the number of 3D variables to write to the file -- "CHUNK:" whether to chunk, default true -- "SCATTER\_3D:" gather all levels at once (default is false which means a level at a time is gathered) -- "SPLIT\_FILE:" default false, if true, each writer writes to and independent file -- "WRITE\_BARRIER:" default false, add a barrier before each write to for synchronization -- "DO\_WRITES:" default true, if false skips writing (so just an mpi test at that point) +- "CHUNK:" whether to chunk, default `.true.` +- "SCATTER\_3D:" gather all levels at once (default is `.false` which means a level at a time is gathered) +- "SPLIT\_FILE:" default `.false`, if `.true.`, each writer writes to and independent file +- "WRITE\_BARRIER:" default `.false`, add a barrier before each write to for synchronization +- "DO\_WRITES:" default `.true.`, if `.false` skips writing (so just an mpi test at that point) - "NTRIAL:" default 1, the number of trials to make writing -- "RANDOM\_DATA:" default true, if true will arrays with random data, if false sets the array to the rank of the process +- "RANDOM\_DATA:" default `.true.`, if `.true.` will arrays with random data, if `.false` sets the array to the rank of the process -Note that whatever you set NX and NY to the program must be run on 6*NY*NY processors and the number of writers must evenly divide 6*NY +NOTE 3: whatever you set NX and NY to the program must be run on `6*NY*NY` processors and the number of writers must evenly divide `6*NY` diff --git a/benchmarks/io/restart_simulator/restart_simulator.F90 b/benchmarks/io/restart_simulator/restart_simulator.F90 index 235cba280b5b..d57facd1f464 100644 --- a/benchmarks/io/restart_simulator/restart_simulator.F90 +++ b/benchmarks/io/restart_simulator/restart_simulator.F90 @@ -6,6 +6,7 @@ module mapl_restart_support_mod use NetCDF use MAPL_ErrorHandlingMod use MAPL_MemUtilsMod + use fargparse use, intrinsic :: iso_fortran_env, only: INT64, REAL64, REAL32 implicit none @@ -44,7 +45,8 @@ module mapl_restart_support_mod integer(kind=INT64) :: open_file_time integer(kind=INT64) :: close_file_time contains - procedure :: set_parameters + procedure :: set_parameters_by_config + procedure :: set_parameters_by_cli procedure :: compute_decomposition procedure :: allocate_n_arrays procedure :: create_arrays @@ -57,14 +59,173 @@ module mapl_restart_support_mod procedure :: reset end type + type cli_options + integer :: nx + integer :: ny + integer :: im_world + integer :: lm + integer :: num_readers + integer :: num_arrays + integer :: n_trials + logical :: split_file = .false. + logical :: scatter_3d = .false. + logical :: read_barrier = .false. + logical :: random_data = .true. + logical :: do_reads = .true. + logical :: netcdf_reads = .true. + character(len=:), allocatable :: config_file + end type cli_options + contains - subroutine set_parameters(this,config_file) + function parse_arguments() result(options) + + type(StringUnlimitedMap) :: options + type(ArgParser), target :: parser + + call parser%initialize('checkpoint_simulator.x') + parser = ArgParser() + + call parser%add_argument("--config_file", & + help="The configuration file to use", & + action="store", & + type="string") + + call parser%add_argument("--nx", & + help="The number of cells in the x direction (default=4)", & + action="store", & + type="integer", & + default=4) + + call parser%add_argument("--ny", & + help="The number of cells in the y direction (default=4)", & + action="store", & + type="integer", & + default=4) + + call parser%add_argument("--im_world", & + help="The resolution of the cubed sphere (default=90)", & + action="store", & + type="integer", & + default=90) + + call parser%add_argument("--lm", & + help="The number of levels in each 3D variable (default=137)", & + action="store", & + type="integer", & + default=137) + + call parser%add_argument("--num_readers", & + help="The number of processes that will read (default=1)", & + action="store", & + type="integer", & + default=1) + + call parser%add_argument("--num_arrays", & + help="The number of 3D arrays to read (default=5)", & + action="store", & + type="integer", & + default=5) + + call parser%add_argument("--ntrials", & + help="The number of trials to run (default=3)", & + action="store", & + type="integer", & + default=3) + + call parser%add_argument("--split_file", & + help="Split the file into multiple files (default=False)", & + action="store_true", & + default=.false.) + + call parser%add_argument("--scatter_3d", & + help="Scatter 3D data (default=False)", & + action="store_true", & + default=.false.) + + call parser%add_argument("--read_barrier", & + help="Add a read barrier (default=False)", & + action="store_true", & + default=.false.) + + call parser%add_argument("--no_random_data", & + help="Do not random data (default=False)", & + action="store_true", & + default=.false.) + + call parser%add_argument("--do_no_reads", & + help="Do not read data (default=False)", & + action="store_true", & + default=.false.) + + call parser%add_argument("--no_netcdf_reads", & + help="Do not read data as netcdf (default=False)", & + action="store_true", & + default=.false.) + + options = parser%parse_args() + + end function parse_arguments + + subroutine get_cli_options(options, cli) + type(StringUnlimitedMap), intent(in) :: options + type(cli_options), intent(out) :: cli + class(*), pointer :: option + logical :: tmp + + option => options%at("config_file") + if (associated(option)) call cast(option, cli%config_file) + + option => options%at("nx") + if (associated(option)) call cast(option, cli%nx) + + option => options%at("ny") + if (associated(option)) call cast(option, cli%ny) + + option => options%at("im_world") + if (associated(option)) call cast(option, cli%im_world) + + option => options%at("lm") + if (associated(option)) call cast(option, cli%lm) + + option => options%at("num_readers") + if (associated(option)) call cast(option, cli%num_readers) + + option => options%at("num_arrays") + if (associated(option)) call cast(option, cli%num_arrays) + + option => options%at("ntrials") + if (associated(option)) call cast(option, cli%n_trials) + + option => options%at("split_file") + if (associated(option)) call cast(option, cli%split_file) + + option => options%at("scatter_3d") + if (associated(option)) call cast(option, cli%scatter_3d) + + option => options%at("read_barrier") + if (associated(option)) call cast(option, cli%read_barrier) + + option => options%at("no_random_data") + if (associated(option)) call cast(option, tmp) + cli%random_data = .not. tmp + + option => options%at("do_no_reads") + if (associated(option)) call cast(option, tmp) + cli%do_reads = .not. tmp + + option => options%at("no_netcdf_reads") + if (associated(option)) call cast(option, tmp) + cli%netcdf_reads = .not. tmp + + end subroutine get_cli_options + + subroutine set_parameters_by_config(this,config_file) class(test_support), intent(inout) :: this character(len=*), intent(in) :: config_file type(ESMF_Config) :: config - integer :: comm_size, status,error_code + integer :: comm_size, status,error_code, rc config = ESMF_ConfigCreate() this%extra_info = .false. @@ -95,7 +256,11 @@ subroutine set_parameters(this,config_file) this%time_reading = 0.d0 this%mpi_time = 0.0 call MPI_COMM_SIZE(MPI_COMM_WORLD,comm_size,status) - if (comm_size /= (this%nx*this%ny*6)) call MPI_Abort(mpi_comm_world,error_code,status) + _VERIFY(status) + if (comm_size /= (this%nx*this%ny*6)) then + call MPI_Abort(mpi_comm_world,error_code,status) + _VERIFY(status) + endif contains @@ -129,7 +294,46 @@ function get_integer_key(config,label,default_val) result(val) end if end function - end subroutine + end subroutine set_parameters_by_config + + subroutine set_parameters_by_cli(this,cli) + class(test_support), intent(inout) :: this + type(cli_options), intent(in) :: cli + + logical :: is_present + integer :: comm_size, status,error_code,rc + + this%extra_info = .false. + this%read_barrier = cli%read_barrier + this%do_reads = cli%do_reads + this%netcdf_reads = cli%netcdf_reads + this%scatter_3d = cli%scatter_3d + this%split_file = cli%split_file + this%nx = cli%nx + this%ny = cli%ny + this%im_world = cli%im_world + this%lm = cli%lm + this%num_readers = cli%num_readers + this%num_arrays = cli%num_arrays + this%n_trials = cli%n_trials + this%random = cli%random_data + + this%read_counter = 0 + this%read_3d_time = 0 + this%read_2d_time = 0 + this%open_file_time = 0 + this%close_file_time = 0 + this%data_volume = 0.d0 + this%time_reading = 0.d0 + this%mpi_time = 0.0 + call MPI_COMM_SIZE(MPI_COMM_WORLD,comm_size,status) + _VERIFY(status) + if (comm_size /= (this%nx*this%ny*6)) then + call MPI_Abort(mpi_comm_world,error_code,status) + _VERIFY(status) + endif + + end subroutine set_parameters_by_cli subroutine reset(this) class(test_support), intent(inout) :: this @@ -170,12 +374,13 @@ subroutine allocate_n_arrays(this,im,jm) integer, intent(in) :: im integer, intent(in) :: jm - integer :: n,rank,status + integer :: n,rank,status,rc character(len=3) :: formatted_int integer :: seed_size integer, allocatable :: seeds(:) call MPI_COMM_RANK(MPI_COMM_WORLD,rank,status) + _VERIFY(status) call random_seed(size=seed_size) allocate(seeds(seed_size)) seeds = rank @@ -196,10 +401,12 @@ subroutine create_arrays(this) class(test_support), intent(inout) :: this integer, allocatable :: ims(:),jms(:) - integer :: rank, status,comm_size,n,i,j,rank_counter,offset,index_offset + integer :: rank, status,comm_size,n,i,j,rank_counter,offset,index_offset,rc call MPI_Comm_Rank(MPI_COMM_WORLD,rank,status) + _VERIFY(status) call MPI_Comm_Size(MPI_COMM_WORLD,comm_size,status) + _VERIFY(status) allocate(this%bundle(this%num_arrays)) ims = this%compute_decomposition(axis=1) jms = this%compute_decomposition(axis=2) @@ -248,16 +455,19 @@ subroutine create_arrays(this) subroutine create_communicators(this) class(test_support), intent(inout) :: this - integer :: myid,status,nx0,ny0,color,j,ny_by_readers,local_ny + integer :: myid,status,nx0,ny0,color,j,ny_by_readers,local_ny,rc local_ny = this%ny*6 call MPI_Comm_Rank(mpi_comm_world,myid,status) + _VERIFY(status) nx0 = mod(myid,this%nx) + 1 ny0 = myid/this%nx + 1 color = nx0 call MPI_Comm_Split(MPI_COMM_WORLD,color,myid,this%ycomm,status) + _VERIFY(status) color = ny0 call MPI_Comm_Split(MPI_COMM_WORLD,color,myid,this%xcomm,status) + _VERIFY(status) ny_by_readers = local_ny/this%num_readers @@ -267,15 +477,19 @@ subroutine create_communicators(this) color = MPI_UNDEFINED end if call MPI_COMM_SPLIT(MPI_COMM_WORLD,color,myid,this%readers_comm,status) + _VERIFY(status) + if (this%num_readers == local_ny) then this%scatter_comm = this%xcomm else j = ny0 - mod(ny0-1,ny_by_readers) call MPI_COMM_SPLIT(MPI_COMM_WORLD,j,myid,this%scatter_comm,status) + _VERIFY(status) end if - call MPI_BARRIER(mpi_comm_world,status) + call MPI_BARRIER(mpi_comm_world, status) + _VERIFY(status) end subroutine @@ -283,7 +497,7 @@ subroutine create_communicators(this) subroutine close_file(this) class(test_support), intent(inout) :: this - integer :: status + integer :: status, rc integer(kind=INT64) :: sub_start,sub_end @@ -292,11 +506,13 @@ subroutine close_file(this) if (this%readers_comm /= MPI_COMM_NULL) then if (this%netcdf_reads) then status = nf90_close(this%ncid) + _VERIFY(status) else close(this%ncid) end if end if call MPI_BARRIER(MPI_COMM_WORLD,status) + _VERIFY(status) call system_clock(count=sub_end) this%close_file_time = sub_end-sub_start end subroutine @@ -319,21 +535,37 @@ subroutine open_file(this) create_mode = IOR(create_mode,NF90_SHARE) create_mode = IOR(create_mode,NF90_MPIIO) call MPI_INFO_CREATE(info,status) + _VERIFY(status) call MPI_INFO_SET(info,"cb_buffer_size","16777216",status) + _VERIFY(status) call MPI_INFO_SET(info,"romio_cb_write","enable",status) + _VERIFY(status) if (this%extra_info) then call MPI_INFO_SET(info,"IBM_largeblock_io","true",status) + _VERIFY(status) call MPI_INFO_SET(info,"striping_unit","4194304",status) + _VERIFY(status) end if if (this%readers_comm /= MPI_COMM_NULL) then if (this%split_file) then call MPI_COMM_RANK(this%readers_comm,writer_rank,status) + _VERIFY(status) write(fc,'(I0.3)')writer_rank fname = "checkpoint_"//fc//".nc4" status = nf90_open(fname,ior(NF90_NETCDF4,NF90_CLOBBER), this%ncid) + if (status /= NF90_NOERR) then + write(*,*) "Error opening file ",fname + call MPI_Abort(MPI_COMM_WORLD,rc,status) + _VERIFY(status) + end if else fname = "checkpoint.nc4" status = nf90_open(fname,create_mode, this%ncid, comm=this%readers_comm, info=info) + if (status /= NF90_NOERR) then + write(*,*) "Error opening file ",fname + call MPI_Abort(MPI_COMM_WORLD,rc,status) + _VERIFY(status) + end if end if end if else @@ -347,6 +579,7 @@ subroutine open_file(this) end if end if call MPI_BARRIER(MPI_COMM_WORLD,status) + _VERIFY(status) call system_clock(count=sub_end) this%open_file_time = sub_end-sub_start end subroutine @@ -354,13 +587,15 @@ subroutine open_file(this) subroutine read_file(this) class(test_support), intent(inout) :: this - integer :: status,i,l + integer :: status,i,l,rc integer(kind=INT64) :: sub_start,sub_end call MPI_BARRIER(MPI_COMM_WORLD,status) + _VERIFY(status) call system_clock(count=sub_start) call MPI_BARRIER(MPI_COMM_WORLD,status) + _VERIFY(status) do i=1,this%num_arrays if (this%scatter_3d) then call this%read_variable(this%bundle(i)%field_name,this%bundle(i)%field) @@ -371,10 +606,13 @@ subroutine read_file(this) end if enddo call MPI_BARRIER(MPI_COMM_WORLD,status) + _VERIFY(status) call system_clock(count=sub_end) call MPI_BARRIER(MPI_COMM_WORLD,status) + _VERIFY(status) this%read_3d_time = sub_end-sub_start call MPI_BARRIER(MPI_COMM_WORLD,status) + _VERIFY(status) end subroutine subroutine read_variable(this,var_name,local_var) @@ -387,7 +625,7 @@ subroutine read_variable(this,var_name,local_var) integer :: start(3), cnt(3) integer :: jsize, jprev, num_io_rows integer, allocatable :: sendcounts(:), displs(:) - integer :: im_world,jm_world,varid + integer :: im_world,jm_world,varid,rc real, allocatable :: var(:,:,:) integer(kind=INT64) :: start_time,end_time,count_rate,lev,start_mpi,end_mpi real(kind=REAL64) :: io_time @@ -398,11 +636,15 @@ subroutine read_variable(this,var_name,local_var) ndes_x = size(this%in) call mpi_comm_rank(this%ycomm,myrow,status) + _VERIFY(status) call mpi_comm_rank(this%scatter_comm,myiorank,status) + _VERIFY(status) call mpi_comm_size(this%scatter_comm,num_io_rows,status) + _VERIFY(status) num_io_rows=num_io_rows/ndes_x allocate (sendcounts(ndes_x*num_io_rows), displs(ndes_x*num_io_rows), stat=status) + _VERIFY(status) if(myiorank==0) then do j=1,num_io_rows @@ -437,7 +679,9 @@ subroutine read_variable(this,var_name,local_var) if (this%do_reads) then if (this%netcdf_reads) then status = nf90_inq_varid(this%ncid,name=var_name ,varid=varid) + _VERIFY(status) status = nf90_get_var(this%ncid,varid,var,start,cnt) + _VERIFY(status) else write(this%ncid)var end if @@ -478,9 +722,13 @@ subroutine read_variable(this,var_name,local_var) call system_clock(count=start_mpi) call mpi_scatterv( buf, sendcounts, displs, MPI_REAL, local_var, size(local_var), MPI_REAL, & 0, this%scatter_comm, status ) + _VERIFY(status) call system_clock(count=end_mpi) this%time_mpi = this%mpi_time + (end_mpi - start_mpi) - if (this%read_barrier) call MPI_Barrier(MPI_COMM_WORLD,status) + if (this%read_barrier) then + call MPI_Barrier(MPI_COMM_WORLD,status) + _VERIFY(status) + end if deallocate(buf, stat=status) deallocate (sendcounts, displs, stat=status) @@ -498,7 +746,7 @@ subroutine read_level(this,var_name,local_var,z_index) integer :: start(3), cnt(3) integer :: jsize, jprev, num_io_rows integer, allocatable :: sendcounts(:), displs(:) - integer :: im_world,jm_world,varid + integer :: im_world,jm_world,varid,rc real, allocatable :: var(:,:) integer(kind=INT64) :: start_time,end_time,count_rate,start_mpi,end_mpi real(kind=REAL64) :: io_time @@ -509,11 +757,15 @@ subroutine read_level(this,var_name,local_var,z_index) ndes_x = size(this%in) call mpi_comm_rank(this%ycomm,myrow,status) + _VERIFY(status) call mpi_comm_rank(this%scatter_comm,myiorank,status) + _VERIFY(status) call mpi_comm_size(this%scatter_comm,num_io_rows,status) + _VERIFY(status) num_io_rows=num_io_rows/ndes_x allocate (sendcounts(ndes_x*num_io_rows), displs(ndes_x*num_io_rows), stat=status) + _VERIFY(status) if(myiorank==0) then do j=1,num_io_rows @@ -531,7 +783,9 @@ subroutine read_level(this,var_name,local_var,z_index) jsize=jsize + (this%jn(myrow+j) - this%j1(myrow+j) + 1) enddo allocate(VAR(IM_WORLD,jsize), stat=status) + _VERIFY(status) allocate(buf(IM_WORLD*jsize), stat=status) + _VERIFY(status) start(1) = 1 if (this%split_file) then @@ -548,7 +802,9 @@ subroutine read_level(this,var_name,local_var,z_index) if (this%do_reads) then if (this%netcdf_reads) then status = nf90_inq_varid(this%ncid,name=var_name ,varid=varid) + _VERIFY(status) status = nf90_get_var(this%ncid,varid,var,start,cnt) + _VERIFY(status) else read(this%ncid)var end if @@ -585,9 +841,10 @@ subroutine read_level(this,var_name,local_var,z_index) allocate(buf(0), stat=status) endif - call system_clock(count=start_mpi) + call system_clock(count=start_mpi) call mpi_scatterv( buf, sendcounts, displs, MPI_REAL, local_var, size(local_var), MPI_REAL, & 0, this%scatter_comm, status ) + _VERIFY(status) call system_clock(count=end_mpi) this%mpi_time = this%mpi_time + (end_mpi - start_mpi) if (this%read_barrier) call MPI_Barrier(MPI_COMM_WORLD,status) @@ -599,11 +856,13 @@ subroutine read_level(this,var_name,local_var,z_index) end module +#define I_AM_MAIN #include "MAPL_ErrLog.h" program checkpoint_tester use ESMF use MPI use NetCDF + use fargparse use mapl_restart_support_mod use, intrinsic :: iso_fortran_env, only: REAL64, INT64 implicit NONE @@ -618,24 +877,47 @@ program checkpoint_tester real(kind=REAL64) :: mean_throughput, mean_fs_throughput real(kind=REAL64) :: std_throughput, std_fs_throughput + type(StringUnlimitedMap) :: options + type(cli_options) :: cli + call system_clock(count=start_app,count_rate=count_rate) call MPI_Init(status) + _VERIFY(status) call MPI_Barrier(MPI_COMM_WORLD,status) + _VERIFY(status) call MPI_Comm_Rank(MPI_COMM_WORLD,rank,status) + _VERIFY(status) support%my_rank = rank call MPI_Comm_Size(MPI_COMM_WORLD,comm_size,status) + _VERIFY(status) call ESMF_Initialize(logKindFlag=ESMF_LOGKIND_NONE,mpiCommunicator=MPI_COMM_WORLD) call MPI_Barrier(MPI_COMM_WORLD,status) + _VERIFY(status) + + options = parse_arguments() + + call get_cli_options(options,cli) + + ! if we have it, we load the configuration file + if (allocated(cli%config_file)) then + if (rank == 0) write(*,*) "Using configuration file ",cli%config_file + if (rank == 0) write(*,*) "NOTE: This overrides any other command line options" + call support%set_parameters_by_config(cli%config_file) + else + call support%set_parameters_by_cli(cli) + end if - call support%set_parameters("restart_benchmark.rc") call MPI_Barrier(MPI_COMM_WORLD,status) + _VERIFY(status) call support%create_arrays() call MPI_Barrier(MPI_COMM_WORLD,status) + _VERIFY(status) call support%create_communicators() call MPI_Barrier(MPI_COMM_WORLD,status) + _VERIFY(status) allocate(total_throughput(support%n_trials)) allocate(all_proc_throughput(support%n_trials)) @@ -645,14 +927,18 @@ program checkpoint_tester call system_clock(count=start_read) call MPI_Barrier(MPI_COMM_WORLD,status) + _VERIFY(status) call support%open_file() call MPI_Barrier(MPI_COMM_WORLD,status) + _VERIFY(status) call support%read_file() call MPI_Barrier(MPI_COMM_WORLD,status) + _VERIFY(status) call support%close_file() call MPI_Barrier(MPI_COMM_WORLD,status) + _VERIFY(status) call system_clock(count=end_time) read_time = real(end_time-start_read,kind=REAL64)/real(count_rate,kind=REAL64) @@ -664,10 +950,14 @@ program checkpoint_tester if (support%readers_comm /= MPI_COMM_NULL) then call MPI_COMM_SIZE(support%readers_comm,reader_size,status) + _VERIFY(status) call MPI_COMM_RANK(support%readers_comm,reader_rank,status) + _VERIFY(status) call MPI_AllReduce(support%data_volume,average_volume,1,MPI_DOUBLE_PRECISION,MPI_SUM,support%readers_comm,status) + _VERIFY(status) average_volume = average_volume/real(reader_size,kind=REAL64) call MPI_AllReduce(support%time_reading,average_time,1,MPI_DOUBLE_PRECISION,MPI_SUM,support%readers_comm,status) + _VERIFY(status) average_time = average_time/real(reader_size,kind=REAL64) end if if (rank == 0) then From 2528357300d477edce4347dc8fb7045e2df15e6c Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Wed, 16 Oct 2024 07:27:38 -0400 Subject: [PATCH 7/9] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5eb5be4b69d..9e91f66f39ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Added commandline options to `checkpoint_benchmark.x` and `restart_benchmark.x` to allow for easier testing of different configurations. Note that the old configuration file style of input is allowed via the `--config_file` option (which overrides any other command line options) + ### Fixed ### Removed From 654a45393f9ca86936997e7778e4d273560e504f Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Fri, 18 Oct 2024 14:06:26 -0400 Subject: [PATCH 8/9] Use positive options --- benchmarks/io/checkpoint_simulator/README.md | 28 ++++++------- .../checkpoint_simulator.F90 | 40 +++++++++---------- benchmarks/io/restart_simulator/README.md | 26 ++++++------ .../restart_simulator/restart_simulator.F90 | 32 +++++++-------- 4 files changed, 63 insertions(+), 63 deletions(-) diff --git a/benchmarks/io/checkpoint_simulator/README.md b/benchmarks/io/checkpoint_simulator/README.md index c74a048512c2..a576c2d8aa83 100644 --- a/benchmarks/io/checkpoint_simulator/README.md +++ b/benchmarks/io/checkpoint_simulator/README.md @@ -5,20 +5,20 @@ The code has the following command line options: optional arguments: -h, --help This message. --config_file The configuration file to use - --nx The number of cells in the x direction (default=4) - --ny The number of cells in the y direction (default=4) - --im_world The resolution of the cubed sphere (default=90) - --lm The number of levels in each 3D variable (default=137) - --num_writers The number of processes that will write (default=1) - --num_arrays The number of 3D arrays to write (default=5) - --ntrials The number of trials to run (default=3) - --split_file Split the file into multiple files (default=False) - --gather_3d Gather 3D data (default=False) - --write_barrier Add a write barrier (default=False) - --no_random_data Do not use random data (default=False) - --do_no_writes Do not write data (default=False) - --no_netcdf_writes Do not write data as netcdf (default=False) - --no_chunking Do not chunk (default=False) + --nx The number of cells in the x direction (default: 4) + --ny The number of cells in the y direction (default: 4) + --im_world The resolution of the cubed sphere (default: 90) + --lm The number of levels in each 3D variable (default: 137) + --num_writers The number of processes that will write (default: 1) + --num_arrays The number of 3D arrays to write (default: 5) + --ntrials The number of trials to run (default: 3) + --split_file Split the file into multiple files (default: do not split) + --gather_3d Gather all levels at once instead of one at a time (default: gather one at a time) + --write_barrier Add a barrier after every write (default: no barrier) + --static_data Use static data (rank of process) instead of random data (default: random data) + --suppress_writes Do not write data (default: write data) + --write_binary Write binary data instead of NetCDF (default: write NetCDF) + --no_chunking Do not chunk output (default: chunk the output) ``` NOTE 1: If you specify a `config_file` it must be an ESMF Config file with the following options: diff --git a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 index 5b213d271b57..7643f957e906 100644 --- a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 +++ b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 @@ -94,79 +94,79 @@ function parse_arguments() result(options) type="string") call parser%add_argument("--nx", & - help="The number of cells in the x direction (default=4)", & + help="The number of cells in the x direction (default: 4)", & action="store", & type="integer", & default=4) call parser%add_argument("--ny", & - help="The number of cells in the y direction (default=4)", & + help="The number of cells in the y direction (default: 4)", & action="store", & type="integer", & default=4) call parser%add_argument("--im_world", & - help="The resolution of the cubed sphere (default=90)", & + help="The resolution of the cubed sphere (default: 90)", & action="store", & type="integer", & default=90) call parser%add_argument("--lm", & - help="The number of levels in each 3D variable (default=137)", & + help="The number of levels in each 3D variable (default: 137)", & action="store", & type="integer", & default=137) call parser%add_argument("--num_writers", & - help="The number of processes that will write (default=1)", & + help="The number of processes that will write (default: 1)", & action="store", & type="integer", & default=1) call parser%add_argument("--num_arrays", & - help="The number of 3D arrays to write (default=5)", & + help="The number of 3D arrays to write (default: 5)", & action="store", & type="integer", & default=5) call parser%add_argument("--ntrials", & - help="The number of trials to run (default=3)", & + help="The number of trials to run (default: 3)", & action="store", & type="integer", & default=3) call parser%add_argument("--split_file", & - help="Split the file into multiple files (default=False)", & + help="Split the file into multiple files (default: do not split)", & action="store_true", & default=.false.) call parser%add_argument("--gather_3d", & - help="Gather 3D data (default=False)", & + help="Gather all levels at once instead of one at a time (default: gather one at a time)", & action="store_true", & default=.false.) call parser%add_argument("--write_barrier", & - help="Add a write barrier (default=False)", & + help="Add a barrier after every write (default: no barrier)", & action="store_true", & default=.false.) - call parser%add_argument("--no_random_data", & - help="Do not use random data (default=False)", & + call parser%add_argument("--static_data", & + help="Use static data (rank of process) instead of random data (default: random data)", & action="store_true", & default=.False.) - call parser%add_argument("--do_no_writes", & - help="Do not write data (default=False)", & + call parser%add_argument("--suppress_writes", & + help="Do not write data (default: write data)", & action="store_true", & default=.False.) - call parser%add_argument("--no_netcdf_writes", & - help="Do not write data as netcdf (default=False)", & + call parser%add_argument("--write_binary", & + help="Write binary data instead of NetCDF (default: write NetCDF)", & action="store_true", & default=.false.) call parser%add_argument("--no_chunking", & - help="Do not chunk (default=False)", & + help="Do not chunk output (default: chunk the output)", & action="store_true", & default=.false.) @@ -213,15 +213,15 @@ subroutine get_cli_options(options, cli) option => options%at("write_barrier") if (associated(option)) call cast(option, cli%write_barrier) - option => options%at("no_random_data") + option => options%at("static_data") if (associated(option)) call cast(option, tmp) cli%random_data = .not. tmp - option => options%at("do_no_writes") + option => options%at("suppress_writes") if (associated(option)) call cast(option, tmp) cli%do_writes = .not. tmp - option => options%at("no_netcdf_writes") + option => options%at("write_binary") if (associated(option)) call cast(option, tmp) cli%netcdf_writes = .not. tmp diff --git a/benchmarks/io/restart_simulator/README.md b/benchmarks/io/restart_simulator/README.md index a5e31dd72dce..fe36e1567ff6 100644 --- a/benchmarks/io/restart_simulator/README.md +++ b/benchmarks/io/restart_simulator/README.md @@ -5,19 +5,19 @@ The code has the following command line options: ``` -h, --help This message. --config_file The configuration file to use - --nx The number of cells in the x direction (default=4) - --ny The number of cells in the y direction (default=4) - --im_world The resolution of the cubed sphere (default=90) - --lm The number of levels in each 3D variable (default=137) - --num_readers The number of processes that will read (default=1) - --num_arrays The number of 3D arrays to read (default=5) - --ntrials The number of trials to run (default=3) - --split_file Split the file into multiple files (default=False) - --scatter_3d Scatter 3D data (default=False) - --read_barrier Add a read barrier (default=False) - --no_random_data Do not random data (default=False) - --do_no_reads Do not read data (default=False) - --no_netcdf_reads Do not read data as netcdf (default=False) + --nx The number of cells in the x direction (default: 4) + --ny The number of cells in the y direction (default: 4) + --im_world The resolution of the cubed sphere (default: 90) + --lm The number of levels in each 3D variable (default: 137) + --num_readers The number of processes that will read (default: 1) + --num_arrays The number of 3D arrays to read (default: 5) + --ntrials The number of trials to run (default: 3) + --split_file Read split files instead of a single file (default: read single file) + --scatter_3d Scatter all the levels at once instead of one at a time (default: scatter one at a time) + --read_barrier Add a barrier after every read (default: no barrier) + --static_data Use static data (rank of process) instead of random data (default: random data) + --suppress_reads Do not read data (default: read data) + --read_binary Read binary data instead of netCDF (default: netCDF data) ``` NOTE 1: This program *REQUIRES* a file called `checkpoint.nc4` that is generated by the `checkpoint_benchmark.x` code diff --git a/benchmarks/io/restart_simulator/restart_simulator.F90 b/benchmarks/io/restart_simulator/restart_simulator.F90 index d57facd1f464..a5f777e984ec 100644 --- a/benchmarks/io/restart_simulator/restart_simulator.F90 +++ b/benchmarks/io/restart_simulator/restart_simulator.F90 @@ -92,74 +92,74 @@ function parse_arguments() result(options) type="string") call parser%add_argument("--nx", & - help="The number of cells in the x direction (default=4)", & + help="The number of cells in the x direction (default: 4)", & action="store", & type="integer", & default=4) call parser%add_argument("--ny", & - help="The number of cells in the y direction (default=4)", & + help="The number of cells in the y direction (default: 4)", & action="store", & type="integer", & default=4) call parser%add_argument("--im_world", & - help="The resolution of the cubed sphere (default=90)", & + help="The resolution of the cubed sphere (default: 90)", & action="store", & type="integer", & default=90) call parser%add_argument("--lm", & - help="The number of levels in each 3D variable (default=137)", & + help="The number of levels in each 3D variable (default: 137)", & action="store", & type="integer", & default=137) call parser%add_argument("--num_readers", & - help="The number of processes that will read (default=1)", & + help="The number of processes that will read (default: 1)", & action="store", & type="integer", & default=1) call parser%add_argument("--num_arrays", & - help="The number of 3D arrays to read (default=5)", & + help="The number of 3D arrays to read (default: 5)", & action="store", & type="integer", & default=5) call parser%add_argument("--ntrials", & - help="The number of trials to run (default=3)", & + help="The number of trials to run (default: 3)", & action="store", & type="integer", & default=3) call parser%add_argument("--split_file", & - help="Split the file into multiple files (default=False)", & + help="Read split files instead of a single file (default: read single file)", & action="store_true", & default=.false.) call parser%add_argument("--scatter_3d", & - help="Scatter 3D data (default=False)", & + help="Scatter all the levels at once instead of one at a time (default: scatter one at a time)", & action="store_true", & default=.false.) call parser%add_argument("--read_barrier", & - help="Add a read barrier (default=False)", & + help="Add a barrier after every read (default: no barrier)", & action="store_true", & default=.false.) - call parser%add_argument("--no_random_data", & - help="Do not random data (default=False)", & + call parser%add_argument("--static_data", & + help="Use static data (rank of process) instead of random data (default: random data)", & action="store_true", & default=.false.) - call parser%add_argument("--do_no_reads", & - help="Do not read data (default=False)", & + call parser%add_argument("--suppress_reads", & + help="Do not read data (default: read data)", & action="store_true", & default=.false.) - call parser%add_argument("--no_netcdf_reads", & - help="Do not read data as netcdf (default=False)", & + call parser%add_argument("--read_binary", & + help="Read binary data instead of netCDF (default: netCDF data)", & action="store_true", & default=.false.) From 7cfb7d629f7b1c748b464d5560e7b4bf81fa876f Mon Sep 17 00:00:00 2001 From: Matthew Thompson Date: Wed, 23 Oct 2024 07:38:06 -0400 Subject: [PATCH 9/9] Update readmes --- benchmarks/io/checkpoint_simulator/README.md | 2 +- .../checkpoint_simulator.F90 | 2 +- benchmarks/io/restart_simulator/README.md | 21 +++++++++---------- .../restart_simulator/restart_simulator.F90 | 6 +++--- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/benchmarks/io/checkpoint_simulator/README.md b/benchmarks/io/checkpoint_simulator/README.md index a576c2d8aa83..bce5cf452ab0 100644 --- a/benchmarks/io/checkpoint_simulator/README.md +++ b/benchmarks/io/checkpoint_simulator/README.md @@ -34,7 +34,7 @@ NOTE 1: If you specify a `config_file` it must be an ESMF Config file with the f - "SPLIT\_FILE:" default `.false.`, if `.true.`, each writer writes to and independent file - "WRITE\_BARRIER:" default `.false.`, add a barrier before each write to for synchronization - "DO\_WRITES:" default `.true.`, if `.false.` skips writing (so just an mpi test at that point) -- "NTRIALS:" default 1, the number of trials to make writing +- "NTRIALS:" default 3, the number of trials to make writing - "RANDOM\_DATA:" default `.true.`, if `.true.` will arrays with random data, if `.false.` sets the array to the rank of the process NOTE 2: that whatever you set NX and NY to the program must be run on `6*NX*NY` processors and the number of writers must evenly divide `6*NY` diff --git a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 index 7643f957e906..3627e50ffde3 100644 --- a/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 +++ b/benchmarks/io/checkpoint_simulator/checkpoint_simulator.F90 @@ -257,7 +257,7 @@ subroutine set_parameters_by_config(this,config_file) this%write_barrier = get_logical_key(config,"WRITE_BARRIER:",.false.) this%do_writes = get_logical_key(config,"DO_WRITES:",.true.) this%netcdf_writes = get_logical_key(config,"NETCDF_WRITES:",.true.) - this%n_trials = get_integer_key(config,"NTRIALS:",1) + this%n_trials = get_integer_key(config,"NTRIALS:",3) this%random = get_logical_key(config,"RANDOM_DATA:",.true.) this%write_counter = 0 diff --git a/benchmarks/io/restart_simulator/README.md b/benchmarks/io/restart_simulator/README.md index fe36e1567ff6..d89c48741b53 100644 --- a/benchmarks/io/restart_simulator/README.md +++ b/benchmarks/io/restart_simulator/README.md @@ -1,4 +1,4 @@ -This benchmark simulates writing a series of 3D variables of a given cubed-sphere resolution to a file using the same strategies as used by the real checkpoint code in MAPL +This benchmark simulates reading a series of 3D variables of a given cubed-sphere resolution to a file using the same strategies as used by the real checkpoint code in MAPL The code has the following command line options: @@ -20,7 +20,7 @@ The code has the following command line options: --read_binary Read binary data instead of netCDF (default: netCDF data) ``` -NOTE 1: This program *REQUIRES* a file called `checkpoint.nc4` that is generated by the `checkpoint_benchmark.x` code +NOTE 1: This program *REQUIRES* a file called `checkpoint.nc4` that is generated by the `checkpoint_benchmark.x` code. NOTE 2: If you specify a `config_file` it must be an ESMF Config file with the following options: @@ -28,14 +28,13 @@ NOTE 2: If you specify a `config_file` it must be an ESMF Config file with the f - "NY:" the y distribution for each face - "IM\_WORLD:" the cube resolution - "LM:" the number of levels -- "NUM\_WRITERS:" the number of writing processes either to a single or independent files -- "NUM\_ARRAYS:" the number of 3D variables to write to the file -- "CHUNK:" whether to chunk, default `.true.` -- "SCATTER\_3D:" gather all levels at once (default is `.false` which means a level at a time is gathered) -- "SPLIT\_FILE:" default `.false`, if `.true.`, each writer writes to and independent file -- "WRITE\_BARRIER:" default `.false`, add a barrier before each write to for synchronization -- "DO\_WRITES:" default `.true.`, if `.false` skips writing (so just an mpi test at that point) -- "NTRIAL:" default 1, the number of trials to make writing +- "NUM\_READERS:" the number of reading processes either from a single or independent files +- "NUM\_ARRAYS:" the number of 3D variables to read from the file +- "SCATTER\_3D:" scatter all levels at once (default is `.false` which means a level at a time is gathered) +- "SPLIT\_FILE:" default `.false`, if `.true.`, each reader reads from an independent file +- "READ\_BARRIER:" default `.false`, add a barrier before each read for synchronization +- "DO\_READS:" default `.true.`, if `.false` skips reading (so just an mpi test at that point) +- "NTRIALS:" default 3, the number of trials to make for each read - "RANDOM\_DATA:" default `.true.`, if `.true.` will arrays with random data, if `.false` sets the array to the rank of the process -NOTE 3: whatever you set NX and NY to the program must be run on `6*NY*NY` processors and the number of writers must evenly divide `6*NY` +NOTE 3: whatever you set NX and NY to the program must be run on `6*NY*NY` processors and the number of readers must evenly divide `6*NY` diff --git a/benchmarks/io/restart_simulator/restart_simulator.F90 b/benchmarks/io/restart_simulator/restart_simulator.F90 index a5f777e984ec..710b59c2c6c3 100644 --- a/benchmarks/io/restart_simulator/restart_simulator.F90 +++ b/benchmarks/io/restart_simulator/restart_simulator.F90 @@ -241,10 +241,10 @@ subroutine set_parameters_by_config(this,config_file) this%scatter_3d = get_logical_key(config,"SCATTER_3D:",.false.) this%split_file = get_logical_key(config,"SPLIT_FILE:",.false.) this%extra_info = get_logical_key(config,"EXTRA_INFO:",.false.) - this%read_barrier = get_logical_key(config,"read_BARRIER:",.false.) + this%read_barrier = get_logical_key(config,"READ_BARRIER:",.false.) this%do_reads = get_logical_key(config,"DO_READS:",.true.) - this%netcdf_reads = get_logical_key(config,"netcdf_reads:",.true.) - this%n_trials = get_integer_key(config,"NTRIALS:",1) + this%netcdf_reads = get_logical_key(config,"NETCDF_READS:",.true.) + this%n_trials = get_integer_key(config,"NTRIALS:",3) this%random = get_logical_key(config,"RANDOM_DATA:",.true.) this%read_counter = 0