NNCF set min matmul shape to 32

2025-05-13 18:50:23 +03:00 · 2025-05-13 18:50:23 +03:00 · 4e4557d81c
parent b9ad55857d
commit 4e4557d81c
1 changed files with 1 additions and 1 deletions
--- a/modules/model_quant_nncf.py
+++ b/modules/model_quant_nncf.py
@ -64,7 +64,7 @@ def nncf_compress_layer(layer, num_bits, is_asym_mode, torch_dtype=None, quant_c
        else:
            reduction_axes = -1
            channel_size = layer.weight.shape[-1]
-            use_int8_matmul = use_int8_matmul and not is_asym_mode and channel_size >= 1024 and layer.weight.shape[0] >= 1024
+            use_int8_matmul = use_int8_matmul and not is_asym_mode and channel_size >= 32 and layer.weight.shape[0] >= 32

            if not use_int8_matmul and (group_size > 0 or (num_bits == 4 and group_size != -1)):
                if group_size == 0: