diff --git a/CHANGELOG.md b/CHANGELOG.md
index fa564a06e..0cf4abd77 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,8 @@
 # Change Log for SD.Next
 
-## Update for 2024-10-17
+## Update for 2024-10-18
 
-### Highlights for 2024-10-17
+### Highlights for 2024-10-18
 
 - **Reprocess**: New workflow options that allow you to generate at lower quality and then  
   reprocess at higher quality for select images only or generate without hires/refine and then reprocess with hires/refine  
@@ -22,6 +22,8 @@
 - Auto-detection of best available **device/dtype** settings for your platform and GPU reduces neeed for manual configuration  
 - Full rewrite of **sampler options**, not far more streamlined with tons of new options to tweak scheduler behavior  
 - Improved **LoRA** detection and handling for all supported models  
+- Tons of work on dynamic quantization that can be applied on-the-fly during model load to any model type  
+  Supported quantization engines include TorchAO, Optimum.quanto, NNCF compression, and more...  
 
 Oh, and we've compiled a full table with list of popular text-to-image generative models, their respective parameters and architecture overview: <https://github.com/vladmandic/automatic/wiki/Models>
 
@@ -30,7 +32,7 @@ And there are also other goodies like multiple *XYZ grid* improvements, addition
 [README](https://github.com/vladmandic/automatic/blob/master/README.md) | [CHANGELOG](https://github.com/vladmandic/automatic/blob/master/CHANGELOG.md) | [WiKi](https://github.com/vladmandic/automatic/wiki) | [Discord](https://discord.com/invite/sd-next-federal-batch-inspectors-1101998836328697867)
 
 
-### Details for 2024-10-17
+### Details for 2024-10-18
 
 - **reprocess**
   - new top-level button: reprocess latent from your history of generated image(s)  
@@ -211,6 +213,11 @@ And there are also other goodies like multiple *XYZ grid* improvements, addition
   - setting `lora_load_gpu` to load LoRA directly to GPU  
     *default*: true unless lovwram  
 
+- **torchao**
+  - reimplement torchao quantization
+  - configure in settings -> compute settings -> quantization
+  - can be applied to any model on-the-fly during load  
+
 - **huggingface**:  
   - force logout/login on token change  
   - unified handling of cache folder: set via `HF_HUB` or `HF_HUB_CACHE` or via settings -> system paths  
@@ -233,6 +240,7 @@ And there are also other goodies like multiple *XYZ grid* improvements, addition
   - fix update infotext on image select  
   - fix imageviewer exif parser  
   - selectable info view in image viewer, thanks @ZeldaMaster501  
+  - setting to enable browser autolaunch, thanks @brknsoul  
 - **free-u** check if device/dtype are fft compatible and cast as necessary  
 - **rocm**
   - additional gpu detection and auto-config code, thanks @lshqqytiger  
diff --git a/javascript/sdnext.css b/javascript/sdnext.css
index 0f3765694..3c81e7d8e 100644
--- a/javascript/sdnext.css
+++ b/javascript/sdnext.css
@@ -55,6 +55,8 @@ td > div > span { overflow-y: auto; max-height: 3em; overflow-x: hidden; }
 .gradio-radio { padding: 0 !important; width: max-content !important; }
 .gradio-slider { margin-right: var(--spacing-sm) !important; width: max-content !important }
 .gradio-slider input[type="number"] { width: 5em; font-size: var(--text-xs); height: 16px; text-align: right; padding: 0; }
+.gradio-checkboxgroup { padding: 0 !important; }
+.gradio-checkbox > label { color: var(--block-title-text-color) !important; }
 
 /* custom gradio elements */
 .accordion-compact { padding: 8px 0px 4px 0px !important; }
diff --git a/modules/loader.py b/modules/loader.py
index 18cb42893..52331672a 100644
--- a/modules/loader.py
+++ b/modules/loader.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 from functools import partial
+import os
 import re
 import sys
 import logging
@@ -13,6 +14,7 @@ errors.install()
 logging.getLogger("DeepSpeed").disabled = True
 
 
+os.environ.setdefault('TORCH_LOGS', '-all')
 import torch # pylint: disable=C0411
 try:
     import intel_extension_for_pytorch as ipex # pylint: disable=import-error, unused-import
@@ -96,7 +98,6 @@ def get_packages():
     }
 
 try:
-    import os
     import math
     cores = os.cpu_count()
     affinity = len(os.sched_getaffinity(0))
diff --git a/modules/sd_models.py b/modules/sd_models.py
index a8c92bc4e..47d0c5947 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -737,8 +737,8 @@ def set_diffuser_options(sd_model, vae = None, op: str = 'model', offload=True):
                 model.eval()
             return model
         sd_model = sd_models_compile.apply_compile_to_model(sd_model, eval_model, ["Model", "VAE", "Text Encoder"], op="eval")
-    if shared.opts.diffusers_quantization:
-        sd_model = sd_models_compile.dynamic_quantization(sd_model)
+    if len(shared.opts.torchao_quantization) > 0:
+        sd_model = sd_models_compile.torchao_quantization(sd_model)
 
     if shared.opts.opt_channelslast and hasattr(sd_model, 'unet'):
         shared.log.debug(f'Setting {op}: channels-last=True')
@@ -1193,7 +1193,7 @@ def load_diffuser_file(model_type, pipeline, checkpoint_info, diffusers_load_con
                 from diffusers.utils import import_utils
                 import_utils._accelerate_available = False # pylint: disable=protected-access
             if shared.opts.diffusers_to_gpu and model_type.startswith('Stable Diffusion'):
-                shared.log.debug(f'Diffusers accelerate: hijack={shared.opts.diffusers_to_gpu}')
+                shared.log.debug(f'Diffusers accelerate: direct={shared.opts.diffusers_to_gpu}')
                 sd_hijack_accelerate.hijack_accelerate()
             else:
                 sd_hijack_accelerate.restore_accelerate()
diff --git a/modules/sd_models_compile.py b/modules/sd_models_compile.py
index ff4fa6aff..91ed84ded 100644
--- a/modules/sd_models_compile.py
+++ b/modules/sd_models_compile.py
@@ -183,7 +183,7 @@ def nncf_compress_model(model, op=None, sd_model=None):
 def nncf_compress_weights(sd_model):
     try:
         t0 = time.time()
-        shared.log.info(f"NNCF Compress Weights: {shared.opts.nncf_compress_weights}")
+        shared.log.info(f"Quantization: type=NNCF modules={shared.opts.nncf_compress_weights}")
         global quant_last_model_name, quant_last_model_device # pylint: disable=global-statement
         install('nncf==2.7.0', quiet=True)
 
@@ -199,9 +199,9 @@ def nncf_compress_weights(sd_model):
         quant_last_model_device = None
 
         t1 = time.time()
-        shared.log.info(f"NNCF Compress Weights: time={t1-t0:.2f}")
+        shared.log.info(f"Quantization: type=NNCF time={t1-t0:.2f}")
     except Exception as e:
-        shared.log.warning(f"NNCF Compress Weights: error: {e}")
+        shared.log.warning(f"Quantization: type=NNCF {e}")
     return sd_model
 
 
@@ -249,10 +249,10 @@ def optimum_quanto_model(model, op=None, sd_model=None, weights=None, activation
 def optimum_quanto_weights(sd_model):
     try:
         if shared.opts.diffusers_offload_mode in {"balanced", "sequential"}:
-            shared.log.warning(f"Optimum Quanto Weights is incompatible with {shared.opts.diffusers_offload_mode} offload!")
+            shared.log.warning(f"Quantization: type=Optimum.quanto offload={shared.opts.diffusers_offload_mode} not compatible")
             return sd_model
         t0 = time.time()
-        shared.log.info(f"Optimum Quanto Weights: {shared.opts.optimum_quanto_weights}")
+        shared.log.info(f"Quantization: type=Optimum.quanto: modules={shared.opts.optimum_quanto_weights}")
         global quant_last_model_name, quant_last_model_device # pylint: disable=global-statement
         quanto = model_quant.load_quanto()
         quanto.tensor.qbits.QBitsTensor.create = lambda *args, **kwargs: quanto.tensor.qbits.QBitsTensor(*args, **kwargs)
@@ -299,9 +299,9 @@ def optimum_quanto_weights(sd_model):
             devices.torch_gc(force=True)
 
         t1 = time.time()
-        shared.log.info(f"Optimum Quanto Weights: time={t1-t0:.2f}")
+        shared.log.info(f"Quantization: type=Optimum.quanto time={t1-t0:.2f}")
     except Exception as e:
-        shared.log.warning(f"Optimum Quanto Weights: error: {e}")
+        shared.log.warning(f"Quantization: type=Optimum.quanto {e}")
     return sd_model
 
 
@@ -329,7 +329,7 @@ def compile_onediff(sd_model):
         from onediff.infer_compiler import oneflow_compile
 
     except Exception as e:
-        shared.log.warning(f"Model compile using onediff/oneflow: {e}")
+        shared.log.warning(f"Model compile: task=onediff {e}")
         return sd_model
 
     try:
@@ -351,9 +351,9 @@ def compile_onediff(sd_model):
         if shared.opts.cuda_compile_precompile:
             sd_model("dummy prompt")
         t1 = time.time()
-        shared.log.info(f"Model compile: task=onediff/oneflow time={t1-t0:.2f}")
+        shared.log.info(f"Model compile: task=onediff time={t1-t0:.2f}")
     except Exception as e:
-        shared.log.info(f"Model compile: task=onediff/oneflow error: {e}")
+        shared.log.info(f"Model compile: task=onediff {e}")
     return sd_model
 
 
@@ -361,7 +361,7 @@ def compile_stablefast(sd_model):
     try:
         import sfast.compilers.stable_diffusion_pipeline_compiler as sf
     except Exception as e:
-        shared.log.warning(f'Model compile using stable-fast: {e}')
+        shared.log.warning(f'Model compile: task=stablefast: {e}')
         return sd_model
     config = sf.CompilationConfig.Default()
     try:
@@ -390,9 +390,9 @@ def compile_stablefast(sd_model):
         if shared.opts.cuda_compile_precompile:
             sd_model("dummy prompt")
         t1 = time.time()
-        shared.log.info(f"Model compile: task='Stable-fast' config={config.__dict__} time={t1-t0:.2f}")
+        shared.log.info(f"Model compile: task=stablefast config={config.__dict__} time={t1-t0:.2f}")
     except Exception as e:
-        shared.log.info(f"Model compile: task=Stable-fast error: {e}")
+        shared.log.info(f"Model compile: task=stablefast {e}")
     return sd_model
 
 
@@ -401,7 +401,7 @@ def compile_torch(sd_model):
         t0 = time.time()
         import torch._dynamo # pylint: disable=unused-import,redefined-outer-name
         torch._dynamo.reset() # pylint: disable=protected-access
-        shared.log.debug(f"Model compile available backends: {torch._dynamo.list_backends()}") # pylint: disable=protected-access
+        shared.log.debug(f"Model compile: task=torch backends={torch._dynamo.list_backends()}") # pylint: disable=protected-access
 
         def torch_compile_model(model, op=None, sd_model=None): # pylint: disable=unused-argument
             if hasattr(model, "device") and model.device.type != "meta":
@@ -442,7 +442,7 @@ def compile_torch(sd_model):
             torch._inductor.config.use_mixed_mm = True # pylint: disable=protected-access
             # torch._inductor.config.force_fuse_int_mm_with_mul = True # pylint: disable=protected-access
         except Exception as e:
-            shared.log.error(f"Torch inductor config error: {e}")
+            shared.log.error(f"Model compile: torch inductor config error: {e}")
 
         sd_model = apply_compile_to_model(sd_model, function=torch_compile_model, options=shared.opts.cuda_compile, op="compile")
 
@@ -450,9 +450,9 @@ def compile_torch(sd_model):
         if shared.opts.cuda_compile_precompile:
             sd_model("dummy prompt")
         t1 = time.time()
-        shared.log.info(f"Model compile: time={t1-t0:.2f}")
+        shared.log.info(f"Model compile: task=torch time={t1-t0:.2f}")
     except Exception as e:
-        shared.log.warning(f"Model compile error: {e}")
+        shared.log.warning(f"Model compile: task=torch {e}")
     return sd_model
 
 
@@ -467,19 +467,19 @@ def check_deepcache(enable: bool):
 def compile_deepcache(sd_model):
     global deepcache_worker # pylint: disable=global-statement
     if not hasattr(sd_model, 'unet'):
-        shared.log.warning(f'Model compile using deep-cache: {sd_model.__class__} not supported')
+        shared.log.warning(f'Model compile: task=deepcache pipeline={sd_model.__class__} not supported')
         return sd_model
     try:
         from DeepCache import DeepCacheSDHelper
     except Exception as e:
-        shared.log.warning(f'Model compile using deep-cache: {e}')
+        shared.log.warning(f'Model compile: task=deepcache {e}')
         return sd_model
     t0 = time.time()
     check_deepcache(False)
     deepcache_worker = DeepCacheSDHelper(pipe=sd_model)
     deepcache_worker.set_params(cache_interval=shared.opts.deep_cache_interval, cache_branch_id=0)
     t1 = time.time()
-    shared.log.info(f"Model compile: task='DeepCache' config={deepcache_worker.params} time={t1-t0:.2f}")
+    shared.log.info(f"Model compile: task=deepcache config={deepcache_worker.params} time={t1-t0:.2f}")
     # config={'cache_interval': 3, 'cache_layer_id': 0, 'cache_block_id': 0, 'skip_mode': 'uniform'} time=0.00
     return sd_model
 
@@ -503,40 +503,56 @@ def compile_diffusers(sd_model):
     return sd_model
 
 
-def dynamic_quantization(sd_model):
+def torchao_quantization(sd_model):
     try:
         install('torchao', quiet=True)
-        from torchao.quantization import autoquant
+        from torchao import quantization as q
     except Exception as e:
-        shared.log.error(f"Model dynamic quantization not supported: {e}")
+        shared.log.error(f"Quantization: type=TorchAO quantization not supported: {e}")
         return sd_model
-
-    """
-    from torchao.quantization import quant_api
-    def dynamic_quant_filter_fn(mod, *args): # pylint: disable=unused-argument
-        return (isinstance(mod, torch.nn.Linear) and mod.in_features > 16 and (mod.in_features, mod.out_features)
-                not in [(1280, 640), (1920, 1280), (1920, 640), (2048, 1280), (2048, 2560), (2560, 1280), (256, 128), (2816, 1280), (320, 640), (512, 1536), (512, 256), (512, 512), (640, 1280), (640, 1920), (640, 320), (640, 5120), (640, 640), (960, 320), (960, 640)])
-
-    def conv_filter_fn(mod, *args): # pylint: disable=unused-argument
-        return (isinstance(mod, torch.nn.Conv2d) and mod.kernel_size == (1, 1) and 128 in [mod.in_channels, mod.out_channels])
-
-    quant_api.swap_conv2d_1x1_to_linear(sd_model.unet, conv_filter_fn)
-    quant_api.swap_conv2d_1x1_to_linear(sd_model.vae, conv_filter_fn)
-    quant_api.apply_dynamic_quant(sd_model.unet, dynamic_quant_filter_fn)
-    quant_api.apply_dynamic_quant(sd_model.vae, dynamic_quant_filter_fn)
-    """
-
-    shared.log.info(f"Model dynamic quantization: pipeline={sd_model.__class__.__name__}")
+    if shared.opts.torchao_quantization_type == "int8+act":
+        fn = q.int8_dynamic_activation_int8_weight
+    elif shared.opts.torchao_quantization_type == "int8":
+        fn = q.int8_weight_only
+    elif shared.opts.torchao_quantization_type == "int4":
+        fn = q.int4_weight_only
+    elif shared.opts.torchao_quantization_type == "fp8+act":
+        fn = q.float8_dynamic_activation_float8_weight
+    elif shared.opts.torchao_quantization_type == "fp8":
+        fn = q.float8_weight_only
+    elif shared.opts.torchao_quantization_type == "fpx":
+        fn = q.fpx_weight_only
+    else:
+        shared.log.error(f"Quantization: type=TorchAO type={shared.opts.torchao_quantization_type} not supported")
+        return sd_model
+    shared.log.info(f"Quantization: type=TorchAO pipe={sd_model.__class__.__name__} quant={shared.opts.torchao_quantization_type} fn={fn} targets={shared.opts.torchao_quantization}")
     try:
-        if shared.sd_model_type == 'sd' or shared.sd_model_type == 'sdxl':
-            sd_model.unet = sd_model.unet.to(devices.device)
-            sd_model.unet = autoquant(sd_model.unet, error_on_unseen=False)
-        elif shared.sd_model_type == 'f1':
-            sd_model.transformer = autoquant(sd_model.transformer, error_on_unseen=False)
-        else:
-            shared.log.error(f"Model dynamic quantization not supported: {shared.sd_model_type}")
+        t0 = time.time()
+        modules = []
+        if hasattr(sd_model, 'unet') and 'Model' in shared.opts.torchao_quantization:
+            modules.append('unet')
+            q.quantize_(sd_model.unet, fn(), device=devices.device)
+        if hasattr(sd_model, 'transformer') and 'Model' in shared.opts.torchao_quantization:
+            modules.append('transformer')
+            q.quantize_(sd_model.transformer, fn(), device=devices.device)
+            # sd_model.transformer = q.autoquant(sd_model.transformer, error_on_unseen=False)
+        if hasattr(sd_model, 'vae') and 'VAE' in shared.opts.torchao_quantization:
+            modules.append('vae')
+            q.quantize_(sd_model.vae, fn(), device=devices.device)
+        if hasattr(sd_model, 'text_encoder') and 'Text Encoder' in shared.opts.torchao_quantization:
+            modules.append('te1')
+            q.quantize_(sd_model.text_encoder, fn(), device=devices.device)
+        if hasattr(sd_model, 'text_encoder_2') and 'Text Encoder' in shared.opts.torchao_quantization:
+            modules.append('te2')
+            q.quantize_(sd_model.text_encoder_2, fn(), device=devices.device)
+        if hasattr(sd_model, 'text_encoder_3') and 'Text Encoder' in shared.opts.torchao_quantization:
+            modules.append('te3')
+            q.quantize_(sd_model.text_encoder_3, fn(), device=devices.device)
+        t1 = time.time()
+        shared.log.info(f"Quantization: type=TorchAO modules={modules} time={t1-t0:.2f}")
     except Exception as e:
-        shared.log.error(f"Model dynamic quantization: {e}")
+        shared.log.error(f"Quantization: type=TorchAO {e}")
+    setup_logging() # torchao uses dynamo which messes with logging so reset is needed
     return sd_model
 
 
diff --git a/modules/shared.py b/modules/shared.py
index d54fd65b3..69574ff92 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -426,7 +426,6 @@ startup_offload_mode, startup_cross_attention, startup_sdp_options = get_default
 
 options_templates.update(options_section(('sd', "Execution & Models"), {
     "sd_backend": OptionInfo(default_backend, "Execution backend", gr.Radio, {"choices": ["diffusers", "original"] }),
-    "autolaunch": OptionInfo(False, "Autolaunch browser upon startup"),
     "sd_model_checkpoint": OptionInfo(default_checkpoint, "Base model", gr.Dropdown, lambda: {"choices": list_checkpoint_tiles()}, refresh=refresh_checkpoints),
     "sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints),
     "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list),
@@ -493,11 +492,12 @@ options_templates.update(options_section(('cuda', "Compute Settings"), {
 
     "quant_sep": OptionInfo("<h2>Model Quantization</h2>", "", gr.HTML, {"visible": native}),
     "quant_shuffle_weights": OptionInfo(False, "Shuffle the weights between GPU and CPU when quantizing", gr.Checkbox, {"visible": native}),
-    "diffusers_quantization": OptionInfo(False, "Dynamic quantization with TorchAO", gr.Checkbox, {"visible": native}),
-    "nncf_compress_weights": OptionInfo([], "Compress Model weights with NNCF INT8", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
-    "optimum_quanto_weights": OptionInfo([], "Quantize Model weights with Optimum Quanto", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
-    "optimum_quanto_weights_type": OptionInfo("qint8", "Weights type for Optimum Quanto", gr.Radio, {"choices": ['qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2', 'qint4', 'qint2'], "visible": native}),
-    "optimum_quanto_activations_type": OptionInfo("none", "Activations type for Optimum Quanto", gr.Radio, {"choices": ['none', 'qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2'], "visible": native}),
+    "nncf_compress_weights": OptionInfo([], "NNCF int8 compression enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
+    "optimum_quanto_weights": OptionInfo([], "Optimum.quanto quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "ControlNet"], "visible": native}),
+    "optimum_quanto_weights_type": OptionInfo("qint8", "Optimum.quanto quantization type", gr.Radio, {"choices": ['qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2', 'qint4', 'qint2'], "visible": native}),
+    "optimum_quanto_activations_type": OptionInfo("none", "Optimum.quanto quantization activations ", gr.Radio, {"choices": ['none', 'qint8', 'qfloat8_e4m3fn', 'qfloat8_e5m2'], "visible": native}),
+    "torchao_quantization": OptionInfo([], "TorchAO quantization enabled", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder"], "visible": native}),
+    "torchao_quantization_type": OptionInfo("int8", "TorchAO quantization type", gr.Radio, {"choices": ["int8+act", "int8", "int4", "fp8+act", "fp8", "fpx"], "visible": native}),
 
     "ipex_sep": OptionInfo("<h2>IPEX</h2>", "", gr.HTML, {"visible": devices.backend == "ipex"}),
     "ipex_optimize": OptionInfo([], "IPEX Optimize for Intel GPUs", gr.CheckboxGroup, {"choices": ["Model", "VAE", "Text Encoder", "Upscaler"], "visible": devices.backend == "ipex"}),
@@ -713,6 +713,7 @@ options_templates.update(options_section(('ui', "User Interface Options"), {
     "theme_type": OptionInfo("Standard", "Theme type", gr.Radio, {"choices": ["Modern", "Standard", "None"]}),
     "theme_style": OptionInfo("Auto", "Theme mode", gr.Radio, {"choices": ["Auto", "Dark", "Light"]}),
     "gradio_theme": OptionInfo("black-teal", "UI theme", gr.Dropdown, lambda: {"choices": theme.list_themes()}, refresh=theme.refresh_themes),
+    "autolaunch": OptionInfo(False, "Autolaunch browser upon startup"),
     "font_size": OptionInfo(14, "Font size", gr.Slider, {"minimum": 8, "maximum": 32, "step": 1, "visible": True}),
     "tooltips": OptionInfo("UI Tooltips", "UI tooltips", gr.Radio, {"choices": ["None", "Browser default", "UI tooltips"], "visible": False}),
     "aspect_ratios": OptionInfo("1:1, 4:3, 3:2, 16:9, 16:10, 21:9, 2:3, 3:4, 9:16, 10:16, 9:21", "Allowed aspect ratios"),