From dbafd363ab9da640edf7b8bf2bc8716d0d83ad7c Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Wed, 30 Oct 2024 17:31:26 -0700 Subject: [PATCH 01/14] add nvfuser options --- thunder/executors/nvfuserex_impl.py | 23 +++++++++++++++++++--- thunder/tests/test_nvfuser.py | 30 +++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index 561f838a7..a7f3a83cf 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -437,7 +437,10 @@ class FusionDefinitionWrapper: cache_clear: None | Callable = None last_used: None | FusionDefinition = None last_inputs: None | Sequence[tuple] = None - store_inputs: bool = False + store_inputs: bool = False, + profile: bool = False, + _enable_options: list = [], + _disable_options: list = [], def __call__(self, *args): fd = self.get_fd(self.to_descriptors(args)) @@ -446,8 +449,14 @@ def __call__(self, *args): if self.store_inputs: self.last_inputs = args + kwargs = {} # Set device if set in one of the "factory" methods like full, iota, or uniform - kwargs = {"device": fd._selected_device} if hasattr(fd, "_selected_device") else {} + if hasattr(fd, "_selected_device"): + kwargs["device"] = fd._selected_device + if not self._enable_options: + kwargs["_enable_options"] = self._enable_options + if not self._disable_options: + kwargs["_disable_options"] = self._disable_options with annotate_for_profile(self.name): return fd.execute(args, **kwargs) @@ -540,6 +549,12 @@ def create_fusion_definition_wrapper( store_inputs: None | bool = get_compile_option( "nv_store_fusion_inputs", "Allow nvFuser to store fusion inputs for repro." ) + _enable_options: None | list = get_compile_option( + "nv_enable_options", "List of NVFUSER_ENABLE options to set." + ) + _disable_options: None | list = get_compile_option( + "nv_disable_options", "List of NVFUSER_DISABLE options to set." + ) tensor_indices = [] for idx, x in enumerate(sorted_unique_inputs): @@ -553,7 +568,7 @@ def create_fusion_definition_wrapper( def get_fd(input_descriptors) -> FusionDefinition: # A closure over local trace and region return create_fd(bsyms, input_descriptors, sorted_unique_inputs, sorted_unique_outputs) - + # breakpoint() fdw = FusionDefinitionWrapper( get_fd, partial(to_descriptors, sorted_unique_inputs), @@ -561,6 +576,8 @@ def get_fd(input_descriptors) -> FusionDefinition: get_fd.cache_info, get_fd.cache_clear, store_inputs=store_inputs, + _enable_options = _enable_options if _enable_options is not None else [], + _disable_options = _disable_options if _disable_options is not None else [], ) return fdw diff --git a/thunder/tests/test_nvfuser.py b/thunder/tests/test_nvfuser.py index 32a3eee7c..02dde7e71 100644 --- a/thunder/tests/test_nvfuser.py +++ b/thunder/tests/test_nvfuser.py @@ -1077,3 +1077,33 @@ def sdpa_fn(q, k, v, dropout_p, is_causal, scale): ref_outputs = (ref_attn_out, *(inp.grad for inp in ref_tensor_inputs)) for nv_out, ref_out in zip(nv_outputs, ref_outputs): torch.testing.assert_close(nv_out, ref_out) + + +@instantiate( + dtypes=(thunder.float16, thunder.bfloat16), + devicetypes=(devices.DeviceType.CUDA,), + executors=(nvFuserExecutor,), + decorators=( + pytest.mark.skipif( + nvfuser_version() is None or nvfuser_version() < LooseVersion("0.2.2"), + reason="Requires nvFuser version 0.2.2 or later", + ), + ), +) +def test_enable_disable_options(executor, device: str, dtype: dtypes.dtype): + + def fn(a, b): + return torch.matmul(a, b) + + for sample in matmul_opinfo.sample_inputs(device, dtype): + if nvfuser_version() < LooseVersion("0.2.4") and (sample.args[0].ndim != 2 or sample.args[1].ndim != 2): + # Only 2D inputs are supported for version < 0.2.4. + continue + + compiled_func = thunder.jit(fn, executors_list=executor.executors_list(), nv_enable_matmul=True, nv_enable_options=["fuse_matmul"], nv_disable_options=["matmul_expr_eval"]) + + out = compiled_func(*sample.args) + traces = thunder.last_traces(compiled_func) + fusions = examine.get_fusions(traces[-1]) + assert len(fusions) == 1 + torch.testing.assert_close(out, torch.matmul(*sample.args)) \ No newline at end of file From 02c0f75d6aa8cfef51b37f5d95c19063a5b48e9f Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Wed, 30 Oct 2024 17:55:44 -0700 Subject: [PATCH 02/14] fix kwargs setup --- thunder/executors/nvfuserex_impl.py | 5 ++++- thunder/tests/test_nvfuser.py | 27 ++++++++++++++++----------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index a7f3a83cf..64ed8c6af 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -552,10 +552,13 @@ def create_fusion_definition_wrapper( _enable_options: None | list = get_compile_option( "nv_enable_options", "List of NVFUSER_ENABLE options to set." ) + _enable_options = _enable_options if _enable_options is not None else [] + _disable_options: None | list = get_compile_option( "nv_disable_options", "List of NVFUSER_DISABLE options to set." ) - + _disable_options = _disable_options if _disable_options is not None else [] + print(_enable_options, _disable_options) tensor_indices = [] for idx, x in enumerate(sorted_unique_inputs): if isinstance(x, TensorProxy): diff --git a/thunder/tests/test_nvfuser.py b/thunder/tests/test_nvfuser.py index 02dde7e71..c1e0ec4af 100644 --- a/thunder/tests/test_nvfuser.py +++ b/thunder/tests/test_nvfuser.py @@ -1080,7 +1080,7 @@ def sdpa_fn(q, k, v, dropout_p, is_causal, scale): @instantiate( - dtypes=(thunder.float16, thunder.bfloat16), + dtypes=(thunder.float16,), devicetypes=(devices.DeviceType.CUDA,), executors=(nvFuserExecutor,), decorators=( @@ -1095,15 +1095,20 @@ def test_enable_disable_options(executor, device: str, dtype: dtypes.dtype): def fn(a, b): return torch.matmul(a, b) - for sample in matmul_opinfo.sample_inputs(device, dtype): - if nvfuser_version() < LooseVersion("0.2.4") and (sample.args[0].ndim != 2 or sample.args[1].ndim != 2): - # Only 2D inputs are supported for version < 0.2.4. - continue - compiled_func = thunder.jit(fn, executors_list=executor.executors_list(), nv_enable_matmul=True, nv_enable_options=["fuse_matmul"], nv_disable_options=["matmul_expr_eval"]) - out = compiled_func(*sample.args) - traces = thunder.last_traces(compiled_func) - fusions = examine.get_fusions(traces[-1]) - assert len(fusions) == 1 - torch.testing.assert_close(out, torch.matmul(*sample.args)) \ No newline at end of file + m = 24 + n = 16 + k = 16 + inps = [ + torch.randn(m, k, device="cuda", dtype=torch.float16), + torch.randn(k, n, device="cuda", dtype=torch.float16), + ] + + compiled_func = thunder.jit(fn, executors_list=executor.executors_list(), nv_enable_matmul=True, nv_disable_options=["matmul_expr_eval", "kernel_reuse"]) + + out = compiled_func(*inps) + traces = thunder.last_traces(compiled_func) + fusions = examine.get_fusions(traces[-1]) + assert len(fusions) == 1 + torch.testing.assert_close(out, torch.matmul(*inps)) \ No newline at end of file From 9590fec7cf7f3e95c942cdde6c7548b51205f91d Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Wed, 30 Oct 2024 18:04:00 -0700 Subject: [PATCH 03/14] change test to check for error --- thunder/tests/test_nvfuser.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/thunder/tests/test_nvfuser.py b/thunder/tests/test_nvfuser.py index c1e0ec4af..c6431b5f6 100644 --- a/thunder/tests/test_nvfuser.py +++ b/thunder/tests/test_nvfuser.py @@ -1106,9 +1106,16 @@ def fn(a, b): ] compiled_func = thunder.jit(fn, executors_list=executor.executors_list(), nv_enable_matmul=True, nv_disable_options=["matmul_expr_eval", "kernel_reuse"]) - - out = compiled_func(*inps) - traces = thunder.last_traces(compiled_func) - fusions = examine.get_fusions(traces[-1]) - assert len(fusions) == 1 - torch.testing.assert_close(out, torch.matmul(*inps)) \ No newline at end of file + try: + out = compiled_func(*inps) + raise RuntimeError( + 'RuntimeError: INTERNAL ASSERT FAILED at "/opt/pytorch/nvfuser/csrc/fusion_segmenter.cpp":3718, please report a bug with repro script to NVFuser at https://github.com/NVIDIA/Fuser/issues. Can not find a scheduler to schedule fusion segment' + ) + except: + pass + + # out = compiled_func(*inps) + # traces = thunder.last_traces(compiled_func) + # fusions = examine.get_fusions(traces[-1]) + # assert len(fusions) == 1 + # torch.testing.assert_close(out, torch.matmul(*inps)) \ No newline at end of file From bcc2422ee736083a53c62c00673fd2669f9d82ed Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 12 Nov 2024 21:10:50 -0800 Subject: [PATCH 04/14] add version check --- thunder/executors/nvfuserex_impl.py | 12 ++++++-- thunder/tests/test_nvfuser.py | 45 ++++++++++++++--------------- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index 64ed8c6af..211e39da7 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -454,9 +454,15 @@ def __call__(self, *args): if hasattr(fd, "_selected_device"): kwargs["device"] = fd._selected_device if not self._enable_options: + + if nvfuser_version() >= LooseVersion("0.2.23"): kwargs["_enable_options"] = self._enable_options if not self._disable_options: - kwargs["_disable_options"] = self._disable_options + kwargs["_disable_options"] = self._disable_options + + elif len(self._enable_options) or len(self._disable_options): + warnings.warn(f"nvFuser _enable_options/_disable_options requires version 0.2.23 and above, using version {nvfuser_version()}. These options will be ignored.") + with annotate_for_profile(self.name): return fd.execute(args, **kwargs) @@ -558,7 +564,7 @@ def create_fusion_definition_wrapper( "nv_disable_options", "List of NVFUSER_DISABLE options to set." ) _disable_options = _disable_options if _disable_options is not None else [] - print(_enable_options, _disable_options) + tensor_indices = [] for idx, x in enumerate(sorted_unique_inputs): if isinstance(x, TensorProxy): @@ -571,7 +577,7 @@ def create_fusion_definition_wrapper( def get_fd(input_descriptors) -> FusionDefinition: # A closure over local trace and region return create_fd(bsyms, input_descriptors, sorted_unique_inputs, sorted_unique_outputs) - # breakpoint() + fdw = FusionDefinitionWrapper( get_fd, partial(to_descriptors, sorted_unique_inputs), diff --git a/thunder/tests/test_nvfuser.py b/thunder/tests/test_nvfuser.py index c6431b5f6..971a84bd4 100644 --- a/thunder/tests/test_nvfuser.py +++ b/thunder/tests/test_nvfuser.py @@ -1080,42 +1080,39 @@ def sdpa_fn(q, k, v, dropout_p, is_causal, scale): @instantiate( - dtypes=(thunder.float16,), + dtypes=(thunder.float32,), devicetypes=(devices.DeviceType.CUDA,), executors=(nvFuserExecutor,), decorators=( pytest.mark.skipif( - nvfuser_version() is None or nvfuser_version() < LooseVersion("0.2.2"), - reason="Requires nvFuser version 0.2.2 or later", + nvfuser_version() is None or nvfuser_version() < LooseVersion("0.2.23"), + reason="Requires nvFuser version 0.2.23 or later", ), ), ) -def test_enable_disable_options(executor, device: str, dtype: dtypes.dtype): +def test_enable_disable_options(executor, device: str, thunder_dtype: dtypes.dtype): def fn(a, b): return torch.matmul(a, b) + m, n, k = 24, 16, 16 - - m = 24 - n = 16 - k = 16 + dtype = ltorch.to_torch_dtype(thunder_dtype) inps = [ - torch.randn(m, k, device="cuda", dtype=torch.float16), - torch.randn(k, n, device="cuda", dtype=torch.float16), + torch.randn(m, k, device="cuda", dtype=dtype), + torch.randn(k, n, device="cuda", dtype=dtype), ] - compiled_func = thunder.jit(fn, executors_list=executor.executors_list(), nv_enable_matmul=True, nv_disable_options=["matmul_expr_eval", "kernel_reuse"]) - try: - out = compiled_func(*inps) - raise RuntimeError( - 'RuntimeError: INTERNAL ASSERT FAILED at "/opt/pytorch/nvfuser/csrc/fusion_segmenter.cpp":3718, please report a bug with repro script to NVFuser at https://github.com/NVIDIA/Fuser/issues. Can not find a scheduler to schedule fusion segment' - ) - except: - pass - - # out = compiled_func(*inps) - # traces = thunder.last_traces(compiled_func) - # fusions = examine.get_fusions(traces[-1]) - # assert len(fusions) == 1 - # torch.testing.assert_close(out, torch.matmul(*inps)) \ No newline at end of file + compiled_func = thunder.jit( + fn, + executors_list=executor.executors_list(), + nv_enable_matmul=True, + nv_enable_options=["fuse_matmul"], + nv_disable_options=["matmul_expr_eval", "kernel_reuse"] + ) + # The above combination of options enables matmul codegen and disables expr evaluation for matmul. + # Since matmul scheduler does not support float32 inputs, the execution should raise an error. + # By default, without using these options, the given fusion will run through expr eval scheduler correctly. + + with pytest.raises(RuntimeError, match="Can not find a scheduler to schedule fusion segment"): + out = compiled_func(*inps) \ No newline at end of file From 3aeca6a55b1ede3e9101e6a400087e690a3bb127 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 12 Nov 2024 21:16:03 -0800 Subject: [PATCH 05/14] remove profile --- thunder/executors/nvfuserex_impl.py | 1 - 1 file changed, 1 deletion(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index 211e39da7..1afcfaea7 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -438,7 +438,6 @@ class FusionDefinitionWrapper: last_used: None | FusionDefinition = None last_inputs: None | Sequence[tuple] = None store_inputs: bool = False, - profile: bool = False, _enable_options: list = [], _disable_options: list = [], From 74e66fa784be6a025f946dc05e20358af3b70c8a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Nov 2024 05:20:12 +0000 Subject: [PATCH 06/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- thunder/executors/nvfuserex_impl.py | 32 ++++++++++++----------------- thunder/tests/test_nvfuser.py | 10 ++++----- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index 1afcfaea7..92cb4dc81 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -437,9 +437,9 @@ class FusionDefinitionWrapper: cache_clear: None | Callable = None last_used: None | FusionDefinition = None last_inputs: None | Sequence[tuple] = None - store_inputs: bool = False, - _enable_options: list = [], - _disable_options: list = [], + store_inputs: bool = (False,) + _enable_options: list = ([],) + _disable_options: list = ([],) def __call__(self, *args): fd = self.get_fd(self.to_descriptors(args)) @@ -452,13 +452,11 @@ def __call__(self, *args): # Set device if set in one of the "factory" methods like full, iota, or uniform if hasattr(fd, "_selected_device"): kwargs["device"] = fd._selected_device - if not self._enable_options: - + if nvfuser_version() >= LooseVersion("0.2.23"): kwargs["_enable_options"] = self._enable_options - if not self._disable_options: - kwargs["_disable_options"] = self._disable_options - + kwargs["_disable_options"] = self._disable_options + elif len(self._enable_options) or len(self._disable_options): warnings.warn(f"nvFuser _enable_options/_disable_options requires version 0.2.23 and above, using version {nvfuser_version()}. These options will be ignored.") @@ -554,16 +552,12 @@ def create_fusion_definition_wrapper( store_inputs: None | bool = get_compile_option( "nv_store_fusion_inputs", "Allow nvFuser to store fusion inputs for repro." ) - _enable_options: None | list = get_compile_option( - "nv_enable_options", "List of NVFUSER_ENABLE options to set." - ) + _enable_options: None | list = get_compile_option("nv_enable_options", "List of NVFUSER_ENABLE options to set.") _enable_options = _enable_options if _enable_options is not None else [] - - _disable_options: None | list = get_compile_option( - "nv_disable_options", "List of NVFUSER_DISABLE options to set." - ) + + _disable_options: None | list = get_compile_option("nv_disable_options", "List of NVFUSER_DISABLE options to set.") _disable_options = _disable_options if _disable_options is not None else [] - + tensor_indices = [] for idx, x in enumerate(sorted_unique_inputs): if isinstance(x, TensorProxy): @@ -576,7 +570,7 @@ def create_fusion_definition_wrapper( def get_fd(input_descriptors) -> FusionDefinition: # A closure over local trace and region return create_fd(bsyms, input_descriptors, sorted_unique_inputs, sorted_unique_outputs) - + fdw = FusionDefinitionWrapper( get_fd, partial(to_descriptors, sorted_unique_inputs), @@ -584,8 +578,8 @@ def get_fd(input_descriptors) -> FusionDefinition: get_fd.cache_info, get_fd.cache_clear, store_inputs=store_inputs, - _enable_options = _enable_options if _enable_options is not None else [], - _disable_options = _disable_options if _disable_options is not None else [], + _enable_options=_enable_options if _enable_options is not None else [], + _disable_options=_disable_options if _disable_options is not None else [], ) return fdw diff --git a/thunder/tests/test_nvfuser.py b/thunder/tests/test_nvfuser.py index 971a84bd4..8a5949344 100644 --- a/thunder/tests/test_nvfuser.py +++ b/thunder/tests/test_nvfuser.py @@ -1104,15 +1104,15 @@ def fn(a, b): ] compiled_func = thunder.jit( - fn, - executors_list=executor.executors_list(), - nv_enable_matmul=True, + fn, + executors_list=executor.executors_list(), + nv_enable_matmul=True, nv_enable_options=["fuse_matmul"], - nv_disable_options=["matmul_expr_eval", "kernel_reuse"] + nv_disable_options=["matmul_expr_eval", "kernel_reuse"], ) # The above combination of options enables matmul codegen and disables expr evaluation for matmul. # Since matmul scheduler does not support float32 inputs, the execution should raise an error. # By default, without using these options, the given fusion will run through expr eval scheduler correctly. with pytest.raises(RuntimeError, match="Can not find a scheduler to schedule fusion segment"): - out = compiled_func(*inps) \ No newline at end of file + out = compiled_func(*inps) From 55b2adfd188e255199f2ea82a865ca4aa9b70a93 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Tue, 12 Nov 2024 21:26:14 -0800 Subject: [PATCH 07/14] remove duplicate --- thunder/executors/nvfuserex_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index 92cb4dc81..8cb9b697f 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -578,8 +578,8 @@ def get_fd(input_descriptors) -> FusionDefinition: get_fd.cache_info, get_fd.cache_clear, store_inputs=store_inputs, - _enable_options=_enable_options if _enable_options is not None else [], - _disable_options=_disable_options if _disable_options is not None else [], + _enable_options=_enable_options, + _disable_options=_disable_options, ) return fdw From 2ffa83349b76fe085330a9ba2bfe730c03f40405 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Wed, 13 Nov 2024 00:17:03 -0800 Subject: [PATCH 08/14] change default value to None --- thunder/executors/nvfuserex_impl.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index 8cb9b697f..717246e86 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -437,9 +437,9 @@ class FusionDefinitionWrapper: cache_clear: None | Callable = None last_used: None | FusionDefinition = None last_inputs: None | Sequence[tuple] = None - store_inputs: bool = (False,) - _enable_options: list = ([],) - _disable_options: list = ([],) + store_inputs: bool = False, + _enable_options: None | list[str] = None, + _disable_options: None | list[str] = None def __call__(self, *args): fd = self.get_fd(self.to_descriptors(args)) @@ -454,10 +454,10 @@ def __call__(self, *args): kwargs["device"] = fd._selected_device if nvfuser_version() >= LooseVersion("0.2.23"): - kwargs["_enable_options"] = self._enable_options - kwargs["_disable_options"] = self._disable_options + kwargs["_enable_options"] = self._enable_options if self._enable_options is not None else [] + kwargs["_disable_options"] = self._disable_options if self._disable_options is not None else [] - elif len(self._enable_options) or len(self._disable_options): + elif self._enable_options or self._disable_options: warnings.warn(f"nvFuser _enable_options/_disable_options requires version 0.2.23 and above, using version {nvfuser_version()}. These options will be ignored.") with annotate_for_profile(self.name): @@ -552,11 +552,8 @@ def create_fusion_definition_wrapper( store_inputs: None | bool = get_compile_option( "nv_store_fusion_inputs", "Allow nvFuser to store fusion inputs for repro." ) - _enable_options: None | list = get_compile_option("nv_enable_options", "List of NVFUSER_ENABLE options to set.") - _enable_options = _enable_options if _enable_options is not None else [] - - _disable_options: None | list = get_compile_option("nv_disable_options", "List of NVFUSER_DISABLE options to set.") - _disable_options = _disable_options if _disable_options is not None else [] + _enable_options: None | list[str] = get_compile_option("nv_enable_options", "List of NVFUSER_ENABLE options to set.") + _disable_options: None | list[str] = get_compile_option("nv_disable_options", "List of NVFUSER_DISABLE options to set.") tensor_indices = [] for idx, x in enumerate(sorted_unique_inputs): From d3ccebcedd5c9931c53ab446ebc35f31cb8aa671 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Wed, 13 Nov 2024 00:17:42 -0800 Subject: [PATCH 09/14] add comment --- thunder/executors/nvfuserex_impl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index 717246e86..832d14c33 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -454,6 +454,7 @@ def __call__(self, *args): kwargs["device"] = fd._selected_device if nvfuser_version() >= LooseVersion("0.2.23"): + # nvFuser expects empty list instead of None values. kwargs["_enable_options"] = self._enable_options if self._enable_options is not None else [] kwargs["_disable_options"] = self._disable_options if self._disable_options is not None else [] From ef7d744d642eca16d4a396aa34ed805f3f0e5ab8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Nov 2024 08:18:29 +0000 Subject: [PATCH 10/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- thunder/executors/nvfuserex_impl.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index 832d14c33..d8f0827b2 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -437,8 +437,8 @@ class FusionDefinitionWrapper: cache_clear: None | Callable = None last_used: None | FusionDefinition = None last_inputs: None | Sequence[tuple] = None - store_inputs: bool = False, - _enable_options: None | list[str] = None, + store_inputs: bool = (False,) + _enable_options: None | list[str] = (None,) _disable_options: None | list[str] = None def __call__(self, *args): @@ -553,8 +553,12 @@ def create_fusion_definition_wrapper( store_inputs: None | bool = get_compile_option( "nv_store_fusion_inputs", "Allow nvFuser to store fusion inputs for repro." ) - _enable_options: None | list[str] = get_compile_option("nv_enable_options", "List of NVFUSER_ENABLE options to set.") - _disable_options: None | list[str] = get_compile_option("nv_disable_options", "List of NVFUSER_DISABLE options to set.") + _enable_options: None | list[str] = get_compile_option( + "nv_enable_options", "List of NVFUSER_ENABLE options to set." + ) + _disable_options: None | list[str] = get_compile_option( + "nv_disable_options", "List of NVFUSER_DISABLE options to set." + ) tensor_indices = [] for idx, x in enumerate(sorted_unique_inputs): From c95c051e32d2ff0f76005e776ffeaa08f11dc35a Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Wed, 13 Nov 2024 00:19:30 -0800 Subject: [PATCH 11/14] fix syntax --- thunder/executors/nvfuserex_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index d8f0827b2..dcdfb363f 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -437,8 +437,8 @@ class FusionDefinitionWrapper: cache_clear: None | Callable = None last_used: None | FusionDefinition = None last_inputs: None | Sequence[tuple] = None - store_inputs: bool = (False,) - _enable_options: None | list[str] = (None,) + store_inputs: bool = False + _enable_options: None | list[str] = None _disable_options: None | list[str] = None def __call__(self, *args): From f60ad93430f691b683fbc433043c198de2fe5ce5 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Wed, 13 Nov 2024 10:07:22 -0800 Subject: [PATCH 12/14] review comments, resolve conflict --- thunder/executors/nvfuserex_impl.py | 18 +++++++++--------- thunder/tests/test_nvfuser.py | 4 +++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index dcdfb363f..4cbb7e877 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -438,8 +438,8 @@ class FusionDefinitionWrapper: last_used: None | FusionDefinition = None last_inputs: None | Sequence[tuple] = None store_inputs: bool = False - _enable_options: None | list[str] = None - _disable_options: None | list[str] = None + enable_options: None | list[str] = None + disable_options: None | list[str] = None def __call__(self, *args): fd = self.get_fd(self.to_descriptors(args)) @@ -455,10 +455,10 @@ def __call__(self, *args): if nvfuser_version() >= LooseVersion("0.2.23"): # nvFuser expects empty list instead of None values. - kwargs["_enable_options"] = self._enable_options if self._enable_options is not None else [] - kwargs["_disable_options"] = self._disable_options if self._disable_options is not None else [] + kwargs["_enable_options"] = self.enable_options if self.enable_options is not None else [] + kwargs["_disable_options"] = self.disable_options if self.disable_options is not None else [] - elif self._enable_options or self._disable_options: + elif self.enable_options or self.disable_options: warnings.warn(f"nvFuser _enable_options/_disable_options requires version 0.2.23 and above, using version {nvfuser_version()}. These options will be ignored.") with annotate_for_profile(self.name): @@ -553,10 +553,10 @@ def create_fusion_definition_wrapper( store_inputs: None | bool = get_compile_option( "nv_store_fusion_inputs", "Allow nvFuser to store fusion inputs for repro." ) - _enable_options: None | list[str] = get_compile_option( + enable_options: None | list[str] = get_compile_option( "nv_enable_options", "List of NVFUSER_ENABLE options to set." ) - _disable_options: None | list[str] = get_compile_option( + disable_options: None | list[str] = get_compile_option( "nv_disable_options", "List of NVFUSER_DISABLE options to set." ) @@ -580,8 +580,8 @@ def get_fd(input_descriptors) -> FusionDefinition: get_fd.cache_info, get_fd.cache_clear, store_inputs=store_inputs, - _enable_options=_enable_options, - _disable_options=_disable_options, + enable_options=enable_options, + disable_options=disable_options, ) return fdw diff --git a/thunder/tests/test_nvfuser.py b/thunder/tests/test_nvfuser.py index 8a5949344..f190a59fb 100644 --- a/thunder/tests/test_nvfuser.py +++ b/thunder/tests/test_nvfuser.py @@ -1113,6 +1113,8 @@ def fn(a, b): # The above combination of options enables matmul codegen and disables expr evaluation for matmul. # Since matmul scheduler does not support float32 inputs, the execution should raise an error. # By default, without using these options, the given fusion will run through expr eval scheduler correctly. - + # NOTE: This test relies on `float32` being unsupported by nvFuser matmul scheduler. + # If this support is added, the test will need to be updated since it will no longer + # verify the functionality of the above flags. with pytest.raises(RuntimeError, match="Can not find a scheduler to schedule fusion segment"): out = compiled_func(*inps) From 05d4a116434b1cc64165aa250fe984e84bb1e734 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Nov 2024 18:08:18 +0000 Subject: [PATCH 13/14] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- thunder/executors/nvfuserex_impl.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index 4cbb7e877..1e344397f 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -459,8 +459,10 @@ def __call__(self, *args): kwargs["_disable_options"] = self.disable_options if self.disable_options is not None else [] elif self.enable_options or self.disable_options: - warnings.warn(f"nvFuser _enable_options/_disable_options requires version 0.2.23 and above, using version {nvfuser_version()}. These options will be ignored.") - + warnings.warn( + f"nvFuser _enable_options/_disable_options requires version 0.2.23 and above, using version {nvfuser_version()}. These options will be ignored." + ) + with annotate_for_profile(self.name): return fd.execute(args, **kwargs) @@ -553,9 +555,7 @@ def create_fusion_definition_wrapper( store_inputs: None | bool = get_compile_option( "nv_store_fusion_inputs", "Allow nvFuser to store fusion inputs for repro." ) - enable_options: None | list[str] = get_compile_option( - "nv_enable_options", "List of NVFUSER_ENABLE options to set." - ) + enable_options: None | list[str] = get_compile_option("nv_enable_options", "List of NVFUSER_ENABLE options to set.") disable_options: None | list[str] = get_compile_option( "nv_disable_options", "List of NVFUSER_DISABLE options to set." ) From 582068d04c6a6daf3b084d67b834a2ae5d7966a0 Mon Sep 17 00:00:00 2001 From: root <26priya11@gmail.com> Date: Wed, 13 Nov 2024 17:17:15 -0800 Subject: [PATCH 14/14] update error message --- thunder/executors/nvfuserex_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thunder/executors/nvfuserex_impl.py b/thunder/executors/nvfuserex_impl.py index 1e344397f..4e5a204f8 100644 --- a/thunder/executors/nvfuserex_impl.py +++ b/thunder/executors/nvfuserex_impl.py @@ -460,7 +460,7 @@ def __call__(self, *args): elif self.enable_options or self.disable_options: warnings.warn( - f"nvFuser _enable_options/_disable_options requires version 0.2.23 and above, using version {nvfuser_version()}. These options will be ignored." + f"nv_enable_options/nv_disable_options require nvFuser version 0.2.23 and above, found version {nvfuser_version()}. These options will be ignored." ) with annotate_for_profile(self.name):