mirror of https://github.com/vladmandic/automatic
add experimental tensorrt quantization
Signed-off-by: Vladimir Mandic <mandic00@live.com>pull/4193/head
parent
64edb0787b
commit
78c2a629b6
|
|
@ -46,6 +46,9 @@
|
|||
- **nunchaku**: update to `nunchaku==1.0.0`
|
||||
*note*: nunchaku updated the repo which will trigger re-download of nunchaku models when first used
|
||||
nunchaku is currently available for: *Flux.1 Dev/Schnell/Kontext/Krea/Depth/Fill*, *Qwen-Image/Qwen-Lightning*, *SANA-1.6B*
|
||||
- **tensorrt**: new quantization engine from nvidia
|
||||
*experimental*: requires new pydantic package which *may* break other things, to enable start sdnext with `--new` flag
|
||||
*note*: this is model quantization only, no support for tensorRT inference yet
|
||||
- **Other**
|
||||
- refactor reuse-seed and add functionality to all tabs
|
||||
- refactor modernui js codebase
|
||||
|
|
@ -53,6 +56,7 @@
|
|||
- remove samplers filtering
|
||||
- allow both flow-matching and discrete samplers for sdxl models
|
||||
- cleanup command line parameters
|
||||
- add `--new` command line flag to enable testing of new packages without breaking existing installs
|
||||
- **Fixes**
|
||||
- normalize path hanlding when deleting images
|
||||
- unified compile upscalers
|
||||
|
|
|
|||
|
|
@ -111,9 +111,9 @@ def create_trt_config(kwargs = None, allow: bool = True, module: str = 'Model',
|
|||
if trt is None:
|
||||
return kwargs
|
||||
trt_config_data = {
|
||||
"fp8": {"quant_type": "FP8", "quant_method": "modelopt", "modules_to_not_convert": []},
|
||||
"int8": {"quant_type": "INT8", "quant_method": "modelopt", "modules_to_not_convert": []},
|
||||
"int4": {"quant_type": "INT4", "quant_method": "modelopt", "block_quantize": 128, "channel_quantize": -1, "modules_to_not_convert": ["conv", "patch_embed"]},
|
||||
"fp8": {"quant_type": "FP8", "quant_method": "modelopt", "modules_to_not_convert": []},
|
||||
"nf4": {"quant_type": "NF4", "quant_method": "modelopt", "block_quantize": 128, "channel_quantize": -1, "scale_block_quantize": 8, "scale_channel_quantize": -1, "modules_to_not_convert": ["conv"]},
|
||||
"nvfp4": {"quant_type": "NVFP4", "quant_method": "modelopt", "block_quantize": 128, "channel_quantize": -1, "modules_to_not_convert": ["conv"]},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -224,8 +224,8 @@ options_templates.update(options_section(("quantization", "Model Quantization"),
|
|||
"layerwise_quantization_nonblocking": OptionInfo(False, "Layerwise non-blocking operations", gr.Checkbox),
|
||||
|
||||
"trt_quantization_sep": OptionInfo("<h2>TensorRT</h2>", "", gr.HTML),
|
||||
"trt_quantization": OptionInfo([], "Quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "TE", "LLM", "VAE"]}),
|
||||
"trt_quantization_type": OptionInfo("nvfp4", "Quantization type", gr.Dropdown, {"choices": ["int8", "int4", "fp8", "nf4", "nvfp4"]}),
|
||||
"trt_quantization": OptionInfo([], "Quantization enabled", gr.CheckboxGroup, {"choices": ["Model"]}),
|
||||
"trt_quantization_type": OptionInfo("int8", "Quantization type", gr.Dropdown, {"choices": ["int8", "int4", "fp8", "nf4", "nvfp4"]}),
|
||||
|
||||
"nncf_compress_sep": OptionInfo("<h2>NNCF: Neural Network Compression Framework</h2>", "", gr.HTML, {"visible": cmd_opts.use_openvino}),
|
||||
"nncf_compress_weights": OptionInfo([], "Quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "TE", "VAE"], "visible": cmd_opts.use_openvino}),
|
||||
|
|
|
|||
Loading…
Reference in New Issue