flashinfer-ai · yzh119 · Jan 28, 2025 · Jan 27, 2025 · Jan 28, 2025
diff --git a/csrc/flashinfer_gemm_sm90_ops.cu b/csrc/flashinfer_gemm_sm90_ops.cu
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2025 by FlashInfer team.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "pytorch_extension_utils.h"
+
+void CutlassSegmentGEMMSM90(at::Tensor float_workspace_buffer, at::Tensor int_workspace_buffer,
+                            at::Tensor all_problems, at::Tensor x_ptr, at::Tensor w_ptr,
+                            at::Tensor y_ptr, at::Tensor x_stride, at::Tensor weight_stride,
+                            at::Tensor y_stride, at::Tensor empty_x_data, bool weight_column_major,
+                            std::vector<int64_t> plan_info_vec, int64_t cuda_stream);
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("cutlass_segment_gemm_sm90", &CutlassSegmentGEMMSM90,
+        "Cutlass Segment GEMM operator for SM90");
+}
diff --git a/flashinfer/gemm.py b/flashinfer/gemm.py
@@ -597,72 +597,71 @@ def run(
         else:
             backend = self.backend
 
-        match backend:
-            case "sm90":
-                (
-                    all_problems,
-                    x_data,
-                    w_data,
-                    y_data,
-                    x_stride_data,
-                    w_stride_data,
-                    y_stride_data,
-                ) = launch_compute_sm90_group_gemm_args(
-                    x,
-                    weights,
-                    y,
-                    weight_column_major,
-                    batch_size,
-                    seg_indptr,
-                    weight_indices,
-                )
-                get_gemm_sm90_module().cutlass_segment_gemm_sm90(
-                    self._float_workspace_buffer,
-                    self._int_workspace_buffer,
-                    all_problems,
-                    x_data,
-                    w_data,
-                    y_data,
-                    x_stride_data,
-                    w_stride_data,
-                    y_stride_data,
-                    y,  # for torch compile mutates_args
-                    empty_x_data,  # for kernel type dispatch
-                    weight_column_major,
-                )
-            case "sm80":
-                (
-                    all_problems,
-                    x_data,
-                    w_data,
-                    y_data,
-                    x_ld_data,
-                    w_ld_data,
-                    y_ld_data,
-                ) = launch_compute_sm80_group_gemm_args(
-                    x,
-                    weights,
-                    y,
-                    weight_column_major,
-                    batch_size,
-                    seg_indptr,
-                    weight_indices,
-                )
-                get_gemm_module().cutlass_segment_gemm(
-                    self._int_workspace_buffer,
-                    all_problems,
-                    x_data,
-                    w_data,
-                    y_data,
-                    x_ld_data,
-                    w_ld_data,
-                    y_ld_data,
-                    y,
-                    empty_x_data,
-                    weight_column_major,
-                )
-            case _:
-                raise ValueError(f"Unsupported gemm backend: {backend}")
+        if backend == "sm90":
+            (
+                all_problems,
+                x_data,
+                w_data,
+                y_data,
+                x_stride_data,
+                w_stride_data,
+                y_stride_data,
+            ) = launch_compute_sm90_group_gemm_args(
+                x,
+                weights,
+                y,
+                weight_column_major,
+                batch_size,
+                seg_indptr,
+                weight_indices,
+            )
+            get_gemm_sm90_module().cutlass_segment_gemm_sm90(
+                self._float_workspace_buffer,
+                self._int_workspace_buffer,
+                all_problems,
+                x_data,
+                w_data,
+                y_data,
+                x_stride_data,
+                w_stride_data,
+                y_stride_data,
+                y,  # for torch compile mutates_args
+                empty_x_data,  # for kernel type dispatch
+                weight_column_major,
+            )
+        elif backend == "sm80":
+            (
+                all_problems,
+                x_data,
+                w_data,
+                y_data,
+                x_ld_data,
+                w_ld_data,
+                y_ld_data,
+            ) = launch_compute_sm80_group_gemm_args(
+                x,
+                weights,
+                y,
+                weight_column_major,
+                batch_size,
+                seg_indptr,
+                weight_indices,
+            )
+            get_gemm_module().cutlass_segment_gemm(
+                self._int_workspace_buffer,
+                all_problems,
+                x_data,
+                w_data,
+                y_data,
+                x_ld_data,
+                w_ld_data,
+                y_ld_data,
+                y,
+                empty_x_data,
+                weight_column_major,
+            )
+        else:
+            raise ValueError(f"Unsupported gemm backend: {backend}")
         return y
 
     forward = run