diff --git a/setup.py b/setup.py index 874cef6..9e4494d 100644 --- a/setup.py +++ b/setup.py @@ -93,10 +93,14 @@ def pybind11_extension(m): 'console_scripts': [ 'sbench=stencil_benchmarks.scripts.sbench:main', 'sbench-analyze=stencil_benchmarks.scripts.sbench_analyze:main', + 'sbench-h100-collection=stencil_benchmarks.scripts' + '.sbench_h100_collection:main', 'sbench-a100-collection=stencil_benchmarks.scripts' '.sbench_a100_collection:main', 'sbench-v100-collection=stencil_benchmarks.scripts' '.sbench_v100_collection:main', + 'sbench-p100-collection=stencil_benchmarks.scripts' + '.sbench_p100_collection:main', 'sbench-mi50-collection=stencil_benchmarks.scripts' '.sbench_mi50_collection:main', 'sbench-mi100-collection=stencil_benchmarks.scripts' diff --git a/stencil_benchmarks/scripts/sbench_h100_collection.py b/stencil_benchmarks/scripts/sbench_h100_collection.py new file mode 100644 index 0000000..71b7f40 --- /dev/null +++ b/stencil_benchmarks/scripts/sbench_h100_collection.py @@ -0,0 +1,176 @@ +# Stencil Benchmarks +# +# Copyright (c) 2017-2021, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# SPDX-License-Identifier: BSD-3-Clause + +import click + +from stencil_benchmarks.benchmarks_collection.stencils.cuda_hip import ( + basic, horizontal_diffusion as hdiff, vertical_advection as vadv) +from stencil_benchmarks.tools.multirun import (Configuration, + run_scaling_benchmark, + truncate_block_size_to_domain, + default_kwargs) + + +@click.group() +def main(): + pass + + +common_kwargs = default_kwargs(backend='cuda', + compiler='nvcc', + gpu_architecture='sm_90', + verify=False, + dry_runs=1, + alignment=128, + dtype='float32') + + +@main.command() +@click.argument('output', type=click.Path()) +@click.option('--executions', '-e', type=int, default=101) +@click.option('--option', '-o', multiple=True) +def basic_bandwidth(output, executions, option): + kwargs = common_kwargs( + option, + loop='3D', + block_size=(128, 2, 1), + halo=(1, 1, 1), + ) + + stream_kwargs = kwargs.copy() + stream_kwargs.update(loop='1D', block_size=(1024, 1, 1), halo=(0, 0, 0)) + + configurations = [ + Configuration(basic.Copy, name='stream', **stream_kwargs), + Configuration(basic.Empty, name='empty', **kwargs), + Configuration(basic.Copy, name='copy', **kwargs), + Configuration(basic.OnesidedAverage, name='avg-i', axis=0, **kwargs), + Configuration(basic.OnesidedAverage, name='avg-j', axis=1, **kwargs), + Configuration(basic.OnesidedAverage, name='avg-k', axis=2, **kwargs), + Configuration(basic.SymmetricAverage, + name='sym-avg-i', + axis=0, + **kwargs), + Configuration(basic.SymmetricAverage, + name='sym-avg-j', + axis=1, + **kwargs), + Configuration(basic.SymmetricAverage, + name='sym-avg-k', + axis=2, + **kwargs), + Configuration(basic.Laplacian, + name='lap-ij', + along_x=True, + along_y=True, + along_z=False, + **kwargs) + ] + + table = run_scaling_benchmark(configurations, executions) + table.to_csv(output) + + +@main.command() +@click.argument('output', type=click.Path()) +@click.option('--executions', '-e', type=int, default=101) +@click.option('--option', '-o', multiple=True) +def horizontal_diffusion_bandwidth(output, executions, option): + kwargs = common_kwargs(option) + + configurations = [ + Configuration(hdiff.Classic, block_size=(32, 12, 1), **kwargs), + Configuration(hdiff.OnTheFly, + block_size=(32, 16, 1), + loop='3D', + **kwargs), + Configuration(hdiff.OnTheFlyIncache, block_size=(32, 8, 1), **kwargs), + Configuration(hdiff.JScanSharedMem, block_size=(256, 32, 1), **kwargs), + Configuration(hdiff.JScanOtfIncache, block_size=(128, 4, 1), **kwargs), + Configuration(hdiff.JScanOtf, block_size=(128, 4, 1), **kwargs), + Configuration(hdiff.JScanShuffleIncache, + block_size=(28, 8, 2), + **kwargs), + Configuration(hdiff.JScanShuffle, block_size=(28, 8, 2), **kwargs), + Configuration(hdiff.JScanShuffleSystolic, + block_size=(28, 4, 3), + **kwargs) + ] + + def truncate_block_size_to_domain_if_possible(**kwargs): + if kwargs['block_size'][0] != 28: + return truncate_block_size_to_domain(**kwargs) + return kwargs + + table = run_scaling_benchmark( + configurations, + executions, + preprocess_args=truncate_block_size_to_domain_if_possible) + table.to_csv(output) + + +@main.command() +@click.argument('output', type=click.Path()) +@click.option('--executions', '-e', type=int, default=101) +@click.option('--option', '-o', multiple=True) +def vertical_advection_bandwidth(output, executions, option): + kwargs = common_kwargs(option) + + configurations = [ + Configuration(vadv.Classic, + block_size=(128, 1), + unroll_factor=8, + **kwargs), + Configuration(vadv.LocalMem, + block_size=(128, 1), + unroll_factor=28, + **kwargs), + Configuration(vadv.SharedMem, + block_size=(64, 1), + unroll_factor=0, + **kwargs), + Configuration(vadv.LocalMemMerged, + block_size=(128, 1), + unroll_factor=2, + **kwargs) + ] + + table = run_scaling_benchmark( + configurations, + executions, + preprocess_args=truncate_block_size_to_domain) + table.to_csv(output) + + +if __name__ == '__main__': + main() diff --git a/stencil_benchmarks/scripts/sbench_p100_collection.py b/stencil_benchmarks/scripts/sbench_p100_collection.py new file mode 100644 index 0000000..4eb8f45 --- /dev/null +++ b/stencil_benchmarks/scripts/sbench_p100_collection.py @@ -0,0 +1,177 @@ +# Stencil Benchmarks +# +# Copyright (c) 2017-2021, ETH Zurich and MeteoSwiss +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# SPDX-License-Identifier: BSD-3-Clause + +import click + +from stencil_benchmarks.benchmarks_collection.stencils.cuda_hip import ( + basic, horizontal_diffusion as hdiff, vertical_advection as vadv) +from stencil_benchmarks.tools.multirun import (Configuration, + run_scaling_benchmark, + truncate_block_size_to_domain, + default_kwargs) + + +@click.group() +def main(): + pass + + +common_kwargs = default_kwargs(backend='cuda', + compiler='nvcc', + gpu_architecture='sm_60', + verify=False, + dry_runs=1, + alignment=128, + dtype='float32', + print_code=True) + + +@main.command() +@click.argument('output', type=click.Path()) +@click.option('--executions', '-e', type=int, default=101) +@click.option('--option', '-o', multiple=True) +def basic_bandwidth(output, executions, option): + kwargs = common_kwargs( + option, + loop='3D', + block_size=(128, 2, 1), + halo=(1, 1, 1), + ) + + stream_kwargs = kwargs.copy() + stream_kwargs.update(loop='1D', block_size=(1024, 1, 1), halo=(0, 0, 0)) + + configurations = [ + Configuration(basic.Copy, name='stream', **stream_kwargs), + Configuration(basic.Empty, name='empty', **kwargs), + Configuration(basic.Copy, name='copy', **kwargs), + Configuration(basic.OnesidedAverage, name='avg-i', axis=0, **kwargs), + Configuration(basic.OnesidedAverage, name='avg-j', axis=1, **kwargs), + Configuration(basic.OnesidedAverage, name='avg-k', axis=2, **kwargs), + Configuration(basic.SymmetricAverage, + name='sym-avg-i', + axis=0, + **kwargs), + Configuration(basic.SymmetricAverage, + name='sym-avg-j', + axis=1, + **kwargs), + Configuration(basic.SymmetricAverage, + name='sym-avg-k', + axis=2, + **kwargs), + Configuration(basic.Laplacian, + name='lap-ij', + along_x=True, + along_y=True, + along_z=False, + **kwargs) + ] + + table = run_scaling_benchmark(configurations, executions) + table.to_csv(output) + + +@main.command() +@click.argument('output', type=click.Path()) +@click.option('--executions', '-e', type=int, default=101) +@click.option('--option', '-o', multiple=True) +def horizontal_diffusion_bandwidth(output, executions, option): + kwargs = common_kwargs(option) + + configurations = [ + Configuration(hdiff.Classic, block_size=(32, 16, 1), **kwargs), + Configuration(hdiff.OnTheFly, + block_size=(32, 16, 1), + loop='3D', + **kwargs), + Configuration(hdiff.OnTheFlyIncache, block_size=(32, 8, 1), **kwargs), + Configuration(hdiff.JScanSharedMem, block_size=(256, 32, 1), **kwargs), + Configuration(hdiff.JScanOtfIncache, block_size=(128, 4, 1), **kwargs), + Configuration(hdiff.JScanOtf, block_size=(128, 4, 1), **kwargs), + Configuration(hdiff.JScanShuffleIncache, + block_size=(28, 8, 2), + **kwargs), + Configuration(hdiff.JScanShuffle, block_size=(28, 8, 2), **kwargs), + Configuration(hdiff.JScanShuffleSystolic, + block_size=(28, 4, 3), + **kwargs) + ] + + def truncate_block_size_to_domain_if_possible(**kwargs): + if kwargs['block_size'][0] != 28: + return truncate_block_size_to_domain(**kwargs) + return kwargs + + table = run_scaling_benchmark( + configurations, + executions, + preprocess_args=truncate_block_size_to_domain_if_possible) + table.to_csv(output) + + +@main.command() +@click.argument('output', type=click.Path()) +@click.option('--executions', '-e', type=int, default=101) +@click.option('--option', '-o', multiple=True) +def vertical_advection_bandwidth(output, executions, option): + kwargs = common_kwargs(option) + + configurations = [ + Configuration(vadv.Classic, + block_size=(512, 1), + unroll_factor=8, + **kwargs), + Configuration(vadv.LocalMem, + block_size=(128, 1), + unroll_factor=28, + **kwargs), + Configuration(vadv.SharedMem, + block_size=(64, 1), + unroll_factor=0, + **kwargs), + Configuration(vadv.LocalMemMerged, + block_size=(512, 1), + unroll_factor=2, + **kwargs) + ] + + table = run_scaling_benchmark( + configurations, + executions, + preprocess_args=truncate_block_size_to_domain) + table.to_csv(output) + + +if __name__ == '__main__': + main()