From a6f740444df981299cf3386883c4f244de910b98 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Thu, 2 Nov 2023 16:28:01 +0800 Subject: [PATCH] Add block_size attribute for nf4 operator --- csrc/lc/nf4.cu | 4 +--- paddleslim/lc/quantizers/nf4.py | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/csrc/lc/nf4.cu b/csrc/lc/nf4.cu index a5b6a824e..02f042ed4 100644 --- a/csrc/lc/nf4.cu +++ b/csrc/lc/nf4.cu @@ -226,10 +226,8 @@ std::vector QuantizeNF4(const paddle::Tensor& input, int block_s } } - - - PD_BUILD_OP(quantize_nf4) .Inputs({"input"}) .Outputs({"out", "abs_max"}) + .Attrs({"block_size: int"}) .SetKernelFn(PD_KERNEL(QuantizeNF4)); \ No newline at end of file diff --git a/paddleslim/lc/quantizers/nf4.py b/paddleslim/lc/quantizers/nf4.py index 5ed786f9a..35b9f932b 100644 --- a/paddleslim/lc/quantizers/nf4.py +++ b/paddleslim/lc/quantizers/nf4.py @@ -14,7 +14,8 @@ def __init__(self, block_size=64, double_quant=False): self.double_quant_scale = None def quantize(self, x: paddle.Tensor): - out, abs_max = paddleslim_ops.quantize_nf4(x) + out, abs_max = paddleslim_ops.quantize_nf4( + x, block_size=self.block_size) self.quant_scale = abs_max return out