From 4e4557d81c57128a9d847b19463b1ed67dd7c8d8 Mon Sep 17 00:00:00 2001 From: Disty0 Date: Tue, 13 May 2025 18:50:23 +0300 Subject: [PATCH] NNCF set min matmul shape to 32 --- modules/model_quant_nncf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/model_quant_nncf.py b/modules/model_quant_nncf.py index 206fb9cf3..e78f49a3f 100644 --- a/modules/model_quant_nncf.py +++ b/modules/model_quant_nncf.py @@ -64,7 +64,7 @@ def nncf_compress_layer(layer, num_bits, is_asym_mode, torch_dtype=None, quant_c else: reduction_axes = -1 channel_size = layer.weight.shape[-1] - use_int8_matmul = use_int8_matmul and not is_asym_mode and channel_size >= 1024 and layer.weight.shape[0] >= 1024 + use_int8_matmul = use_int8_matmul and not is_asym_mode and channel_size >= 32 and layer.weight.shape[0] >= 32 if not use_int8_matmul and (group_size > 0 or (num_bits == 4 and group_size != -1)): if group_size == 0: