diff --git a/CHANGELOG.md b/CHANGELOG.md index 84d6f5cf8..0bd678b72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,9 @@ - **nunchaku**: update to `nunchaku==1.0.0` *note*: nunchaku updated the repo which will trigger re-download of nunchaku models when first used nunchaku is currently available for: *Flux.1 Dev/Schnell/Kontext/Krea/Depth/Fill*, *Qwen-Image/Qwen-Lightning*, *SANA-1.6B* + - **tensorrt**: new quantization engine from nvidia + *experimental*: requires new pydantic package which *may* break other things, to enable start sdnext with `--new` flag + *note*: this is model quantization only, no support for tensorRT inference yet - **Other** - refactor reuse-seed and add functionality to all tabs - refactor modernui js codebase @@ -53,6 +56,7 @@ - remove samplers filtering - allow both flow-matching and discrete samplers for sdxl models - cleanup command line parameters + - add `--new` command line flag to enable testing of new packages without breaking existing installs - **Fixes** - normalize path hanlding when deleting images - unified compile upscalers diff --git a/modules/model_quant.py b/modules/model_quant.py index 45f940c59..0da8f9a56 100644 --- a/modules/model_quant.py +++ b/modules/model_quant.py @@ -111,9 +111,9 @@ def create_trt_config(kwargs = None, allow: bool = True, module: str = 'Model', if trt is None: return kwargs trt_config_data = { - "fp8": {"quant_type": "FP8", "quant_method": "modelopt", "modules_to_not_convert": []}, "int8": {"quant_type": "INT8", "quant_method": "modelopt", "modules_to_not_convert": []}, "int4": {"quant_type": "INT4", "quant_method": "modelopt", "block_quantize": 128, "channel_quantize": -1, "modules_to_not_convert": ["conv", "patch_embed"]}, + "fp8": {"quant_type": "FP8", "quant_method": "modelopt", "modules_to_not_convert": []}, "nf4": {"quant_type": "NF4", "quant_method": "modelopt", "block_quantize": 128, "channel_quantize": -1, "scale_block_quantize": 8, "scale_channel_quantize": -1, "modules_to_not_convert": ["conv"]}, "nvfp4": {"quant_type": "NVFP4", "quant_method": "modelopt", "block_quantize": 128, "channel_quantize": -1, "modules_to_not_convert": ["conv"]}, } diff --git a/modules/shared.py b/modules/shared.py index f5774461a..31a0803c8 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -224,8 +224,8 @@ options_templates.update(options_section(("quantization", "Model Quantization"), "layerwise_quantization_nonblocking": OptionInfo(False, "Layerwise non-blocking operations", gr.Checkbox), "trt_quantization_sep": OptionInfo("