From 8d617e51fcf1afe7340e5f11d594d1fbc9a17b43 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 6 Mar 2024 19:54:59 -0800 Subject: [PATCH] Lower unstructured sparsity threshold to 40% (#100) SUMMARY: "please provide a brief summary" TEST PLAN: "please outline how the changes were tested" --- vllm/model_executor/layers/parameters/lazy_compressed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/parameters/lazy_compressed.py b/vllm/model_executor/layers/parameters/lazy_compressed.py index d5ee86ee8fb56..37128a6ed54b7 100644 --- a/vllm/model_executor/layers/parameters/lazy_compressed.py +++ b/vllm/model_executor/layers/parameters/lazy_compressed.py @@ -120,8 +120,8 @@ def compress(self) -> None: sparsity = 1 - (torch.count_nonzero(self.uncompressed_data).item() / numpy.prod(self.shape)) - # Only compress if we have sufficient sparsity (>=45%) - if sparsity < 0.45: + # Only compress if we have sufficient sparsity (>=40%) + if sparsity < 0.4: logger.warning( f"Called compress() on tensor of shape {self.shape} but only has " f"{sparsity:.2}% sparsity, skipping compression")