diff --git a/CHANGELOG.md b/CHANGELOG.md
index 591c0afe3..bc6cd163b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,22 @@
 # Change Log for SD.Next
 
+## Update for 2024-11-22
+
+- Model loader improvements:  
+  - detect model components on model load fail  
+  - Flux, SD35: force unload model  
+  - Flux: apply `bnb` quant when loading *unet/transformer*  
+  - Flux: all-in-one safetensors  
+    example: <https://civitai.com/models/646328?modelVersionId=1040235>  
+  - Flux: do not recast quants  
+- Sampler improvements  
+  - update DPM FlowMatch samplers  
+- Fixes:  
+  - update `diffusers`  
+  - fix README links  
+  - fix sdxl controlnet single-file loader  
+  - relax settings validator  
+
 ## Update for 2024-11-21
 
 ### Highlights for 2024-11-21
diff --git a/README.md b/README.md
index d099496b8..1bb5eacd0 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@ For screenshots and informations on other available themes, see [Themes Wiki](ht
 ## Model support
 
 Additional models will be added as they become available and there is public interest in them  
-See [models overview](wiki/Models) for details on each model, including their architecture, complexity and other info  
+See [models overview](https://github.com/vladmandic/automatic/wiki/Models) for details on each model, including their architecture, complexity and other info  
 
 - [RunwayML Stable Diffusion](https://github.com/Stability-AI/stablediffusion/) 1.x and 2.x *(all variants)*
 - [StabilityAI Stable Diffusion XL](https://github.com/Stability-AI/generative-models), [StabilityAI Stable Diffusion 3.0](https://stability.ai/news/stable-diffusion-3-medium) Medium, [StabilityAI Stable Diffusion 3.5](https://huggingface.co/stabilityai/stable-diffusion-3.5-large) Medium, Large, Large Turbo
@@ -101,17 +101,17 @@ See [models overview](wiki/Models) for details on each model, including their ar
 
 ## Getting started
 
-- Get started with **SD.Next** by following the [installation instructions](wiki/Installation)  
-- For more details, check out [advanced installation](wiki/Advanced-Install) guide  
-- List and explanation of [command line arguments](wiki/CLI-Arguments)
+- Get started with **SD.Next** by following the [installation instructions](https://github.com/vladmandic/automatic/wiki/Installation)  
+- For more details, check out [advanced installation](https://github.com/vladmandic/automatic/wiki/Advanced-Install) guide  
+- List and explanation of [command line arguments](https://github.com/vladmandic/automatic/wiki/CLI-Arguments)
 - Install walkthrough [video](https://www.youtube.com/watch?v=nWTnTyFTuAs)
 
 > [!TIP]
 > And for platform specific information, check out  
-> [WSL](wiki/WSL) | [Intel Arc](wiki/Intel-ARC) | [DirectML](wiki/DirectML) | [OpenVINO](wiki/OpenVINO) | [ONNX & Olive](wiki/ONNX-Runtime) | [ZLUDA](wiki/ZLUDA) | [AMD ROCm](wiki/AMD-ROCm) | [MacOS](wiki/MacOS-Python.md) | [nVidia](wiki/nVidia)
+> [WSL](https://github.com/vladmandic/automatic/wiki/WSL) | [Intel Arc](https://github.com/vladmandic/automatic/wiki/Intel-ARC) | [DirectML](https://github.com/vladmandic/automatic/wiki/DirectML) | [OpenVINO](https://github.com/vladmandic/automatic/wiki/OpenVINO) | [ONNX & Olive](https://github.com/vladmandic/automatic/wiki/ONNX-Runtime) | [ZLUDA](https://github.com/vladmandic/automatic/wiki/ZLUDA) | [AMD ROCm](https://github.com/vladmandic/automatic/wiki/AMD-ROCm) | [MacOS](https://github.com/vladmandic/automatic/wiki/MacOS-Python.md) | [nVidia](https://github.com/vladmandic/automatic/wiki/nVidia)
 
 > [!WARNING]
-> If you run into issues, check out [troubleshooting](wiki/Troubleshooting) and [debugging](wiki/Debug) guides  
+> If you run into issues, check out [troubleshooting](https://github.com/vladmandic/automatic/wiki/Troubleshooting) and [debugging](https://github.com/vladmandic/automatic/wiki/Debug) guides  
 
 > [!TIP]
 > All command line options can also be set via env variable
diff --git a/TODO.md b/TODO.md
index 88d704457..973e062dc 100644
--- a/TODO.md
+++ b/TODO.md
@@ -8,6 +8,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
 - SD35 LoRA: <https://github.com/huggingface/diffusers/issues/9950>
 - Flux IPAdapter: <https://github.com/huggingface/diffusers/issues/9825>
 - Flux Fill/ControlNet/Redux: <https://github.com/huggingface/diffusers/pull/9985>
+- Flux NF4: <https://github.com/huggingface/diffusers/issues/9996>
 - SANA: <https://github.com/huggingface/diffusers/pull/9982>
 
 ## Other
diff --git a/cli/model-keys.py b/cli/model-keys.py
index bd4a91551..45b900bd7 100755
--- a/cli/model-keys.py
+++ b/cli/model-keys.py
@@ -38,6 +38,16 @@ def list_to_dict(flat_list):
     return result_dict
 
 
+def list_compact(flat_list):
+    result_list = []
+    for item in flat_list:
+        keys = item.split('.')
+        keys = '.'.join(keys[:2])
+        if keys not in result_list:
+            result_list.append(keys)
+    return result_list
+
+
 def guess_dct(dct: dict):
     # if has(dct, 'model.diffusion_model.input_blocks') and has(dct, 'model.diffusion_model.label_emb'):
     #    return 'sdxl'
@@ -65,7 +75,9 @@ def read_keys(fn):
     except Exception as e:
         pprint(e)
     dct = list_to_dict(keys)
+    lst = list_compact(keys)
     pprint(f'file: {fn}')
+    pprint(lst)
     pprint(remove_entries_after_depth(dct, 3))
     pprint(remove_entries_after_depth(dct, 6))
     guess = guess_dct(dct)
diff --git a/installer.py b/installer.py
index fe52ce668..0b64c3616 100644
--- a/installer.py
+++ b/installer.py
@@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
 def check_diffusers():
     if args.skip_all or args.skip_requirements:
         return
-    sha = 'cd6ca9df2987c000b28e13b19bd4eec3ef3c914b'
+    sha = 'b5fd6f13f5434d69d919cc8cedf0b11db664cf06'
     pkg = pkg_resources.working_set.by_key.get('diffusers', None)
     minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
     cur = opts.get('diffusers_version', '') if minor > 0 else ''
diff --git a/modules/model_flux.py b/modules/model_flux.py
index c605702c8..17234d9a4 100644
--- a/modules/model_flux.py
+++ b/modules/model_flux.py
@@ -194,6 +194,7 @@ def load_transformer(file_path): # triggered by opts.sd_unet change
         if _transformer is not None:
             transformer = _transformer
     else:
+        diffusers_load_config = model_quant.create_bnb_config(diffusers_load_config)
         transformer = diffusers.FluxTransformer2DModel.from_single_file(file_path, **diffusers_load_config)
     if transformer is None:
         shared.log.error('Failed to load UNet model')
@@ -213,6 +214,11 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
     text_encoder_2 = None
     vae = None
 
+    # unload current model
+    sd_models.unload_model_weights()
+    shared.sd_model = None
+    devices.torch_gc(force=True)
+
     # load overrides if any
     if shared.opts.sd_unet != 'None':
         try:
@@ -305,8 +311,21 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch
         repo_id = 'black-forest-labs/FLUX.1-dev' # workaround since sayakpaul model is missing model_index.json
     for c in kwargs:
         if kwargs[c].dtype == torch.float32 and devices.dtype != torch.float32:
-            shared.log.warning(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} cast dtype={devices.dtype}')
+            shared.log.warning(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} cast dtype={devices.dtype} recast')
             kwargs[c] = kwargs[c].to(dtype=devices.dtype)
-    kwargs = model_quant.create_bnb_config(kwargs)
-    pipe = diffusers.FluxPipeline.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
+
+    allow_bnb = 'gguf' not in (sd_unet.loaded_unet or '')
+    kwargs = model_quant.create_bnb_config(kwargs, allow_bnb)
+    if checkpoint_info.path.endswith('.safetensors') and os.path.isfile(checkpoint_info.path):
+        pipe = diffusers.FluxPipeline.from_single_file(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
+    else:
+        pipe = diffusers.FluxPipeline.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config)
+
+    # release memory
+    transformer = None
+    text_encoder_1 = None
+    text_encoder_2 = None
+    vae = None
+    devices.torch_gc()
+
     return pipe
diff --git a/modules/model_quant.py b/modules/model_quant.py
index 68bdfa7b2..0e7bdd4b3 100644
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@@ -7,10 +7,10 @@ bnb = None
 quanto = None
 
 
-def create_bnb_config(kwargs = None):
+def create_bnb_config(kwargs = None, allow_bnb: bool = True):
     from modules import shared, devices
-    if len(shared.opts.bnb_quantization) > 0:
-        if 'Model' in shared.opts.bnb_quantization and 'transformer' not in (kwargs or {}):
+    if len(shared.opts.bnb_quantization) > 0 and allow_bnb:
+        if 'Model' in shared.opts.bnb_quantization:
             load_bnb()
             bnb_config = diffusers.BitsAndBytesConfig(
                 load_in_8bit=shared.opts.bnb_quantization_type in ['fp8'],
diff --git a/modules/model_sd3.py b/modules/model_sd3.py
index 78eee7b4d..b9d579085 100644
--- a/modules/model_sd3.py
+++ b/modules/model_sd3.py
@@ -120,6 +120,11 @@ def load_sd3(checkpoint_info, cache_dir=None, config=None):
     repo_id = sd_models.path_to_repo(checkpoint_info.name)
     fn = checkpoint_info.path
 
+    # unload current model
+    sd_models.unload_model_weights()
+    shared.sd_model = None
+    devices.torch_gc(force=True)
+
     kwargs = {}
     kwargs = load_overrides(kwargs, cache_dir)
     if fn is None or not os.path.exists(fn):
@@ -152,5 +157,5 @@ def load_sd3(checkpoint_info, cache_dir=None, config=None):
         config=config,
         **kwargs,
     )
-    devices.torch_gc(force=True)
+    devices.torch_gc()
     return pipe
diff --git a/modules/model_tools.py b/modules/model_tools.py
index 1d016a19e..07cd61b6e 100644
--- a/modules/model_tools.py
+++ b/modules/model_tools.py
@@ -13,6 +13,16 @@ def remove_entries_after_depth(d, depth, current_depth=0):
     return d
 
 
+def list_compact(flat_list):
+    result_list = []
+    for item in flat_list:
+        keys = item.split('.')
+        keys = '.'.join(keys[:2])
+        if keys not in result_list:
+            result_list.append(keys)
+    return result_list
+
+
 def list_to_dict(flat_list):
     result_dict = {}
     try:
diff --git a/modules/schedulers/scheduler_dpm_flowmatch.py b/modules/schedulers/scheduler_dpm_flowmatch.py
index 83573105e..1afe54498 100644
--- a/modules/schedulers/scheduler_dpm_flowmatch.py
+++ b/modules/schedulers/scheduler_dpm_flowmatch.py
@@ -9,11 +9,11 @@ import torch
 import torchsde
 
 from diffusers.configuration_utils import ConfigMixin, register_to_config
-from diffusers.utils import BaseOutput, logging
+from diffusers.utils import BaseOutput
 from diffusers.utils.torch_utils import randn_tensor
 from diffusers.schedulers.scheduling_utils import SchedulerMixin
+import scipy.stats
 
-logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
 class BatchedBrownianTree:
     """A wrapper around torchsde.BrownianTree that enables batches of entropy."""
@@ -101,39 +101,42 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
     Args:
         num_train_timesteps (`int`, defaults to 1000):
             The number of diffusion steps to train the model.
+        beta_start (`float`, defaults to 0.0001):
+            The starting `beta` value of inference.
+        beta_end (`float`, defaults to 0.02):
+            The final `beta` value.
+        beta_schedule (`str`, defaults to `"scaled linear"`):
+            The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from `linear` or `scaled_linear`.
+        trained_betas (`np.ndarray`, *optional*):
+            Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
         solver_order (`int`, defaults to 2):
             The DPMSolver order which can be `2` or `3`. It is recommended to use `solver_order=2` for guided
             sampling, and `solver_order=3` for unconditional sampling.
-        thresholding (`bool`, defaults to `False`):
-            Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
-            as Stable Diffusion.
-        dynamic_thresholding_ratio (`float`, defaults to 0.995):
-            The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
-        sample_max_value (`float`, defaults to 1.0):
-            The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
         algorithm_type (`str`, defaults to `dpmsolver++2M`):
             Algorithm type for the solver; can be `dpmsolver2`, `dpmsolver2A`, `dpmsolver++2M`, `dpmsolver++2S`, `dpmsolver++sde`, `dpmsolver++2Msde`, 
             or `dpmsolver++3Msde`.
         solver_type (`str`, defaults to `midpoint`):
             Solver type for the second-order solver; can be `midpoint` or `heun`. The solver type slightly affects the
             sample quality, especially for a small number of steps. It is recommended to use `midpoint` solvers.
-        sigma_schedule (`str`, *optional*, defaults to None): Sigma schedule to compute the `sigmas`. Optionally, we use 
+        sigma_schedule (`str`, *optional*, defaults to None (beta)): Sigma schedule to compute the `sigmas`. Optionally, we use 
             the schedule "karras" introduced in the EDM paper (https://arxiv.org/abs/2206.00364). Other acceptable values are 
             "exponential". The exponential schedule was incorporated in this model: https://huggingface.co/stabilityai/cosxl. 
             Other acceptable values are "lambdas". The uniform-logSNR for step sizes proposed by Lu's DPM-Solver in the 
             noise schedule during the sampling process. The sigmas and time steps are determined according to a sequence of `lambda(t)`.
-        use_noise_sampler for BrownianTreeNoiseSampler (only valid for `dpmsolver++2S`, `dpmsolver++sde`, `dpmsolver++2Msde`, 
-            or `dpmsolver++3Msde`): A noise sampler backed by a torchsde increasing the stability of convergence. Default strategy 
+            "betas" for step sizes in the noise schedule during the sampling process. Refer to [Beta
+            Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
+        use_noise_sampler for BrownianTreeNoiseSampler (only valid for `dpmsolver++2S`, `dpmsolver++sde`, `dpmsolver++2Msde`, or `dpmsolver++3Msde`.
+            A noise sampler backed by a torchsde increasing the stability of convergence. Default strategy 
             (random noise) has it jumping all over the place, but Brownian sampling is more stable. Utilizes the model generation seed provided.
         midpoint_ratio (`float`, *optional*, range: 0.4 to 0.6, default=0.5): Only valid for (`dpmsolver++sde`, `dpmsolver++2S`).
             Higher values may result in smoothing, more vivid colors and less noise at the expense of more detail and effect.
         s_noise (`float`, *optional*, defaults to 1.0): Sigma noise strength: range 0 - 1.1 (only valid for `dpmsolver++2S`, `dpmsolver++sde`, 
             `dpmsolver++2Msde`, or `dpmsolver++3Msde`). The amount of additional noise to counteract loss of detail during sampling. A 
             reasonable range is [1.000, 1.011]. Defaults to 1.0 from the original implementation.
-        use_SD35_sigmas: (`bool` defaults to False for FLUX and True for SD3). Based on original interpretation of using beta values for determining sigmas.
+        use_beta_sigmas: (`bool` defaults to False for FLUX and True for SD3). Based on original interpretation of using beta values for determining sigmas.
         use_dynamic_shifting (`bool` defaults to False for SD3 and True for FLUX). When `True`, shift is ignored.
-        shift (`float`, defaults to 3.0): The shift value for the timestep schedule for SD3 when not using dynamic shifting
-        The remaining args are specific to Flux's dynamic shifting based on resolution
+        shift (`float`, defaults to 3.0): The shift value for the timestep schedule for SD3 when not using dynamic shifting.
+        The remaining args are specific to Flux's dynamic shifting based on resolution.
     """
 
     _compatibles = []
@@ -143,10 +146,11 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
     def __init__(
         self,
         num_train_timesteps: int = 1000,
+        beta_start: float = 0.00085,
+        beta_end: float = 0.012,
+        beta_schedule: str = "scaled linear",
+        trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
         solver_order: int = 2,
-        thresholding: Optional[bool] = False,
-        dynamic_thresholding_ratio: float = 0.995,
-        sample_max_value: Optional[float] = 1.0,
         algorithm_type: str = "dpmsolver++2M",
         solver_type: str = "midpoint",
         sigma_schedule: Optional[str] = None,
@@ -154,7 +158,7 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         midpoint_ratio: Optional[float] = 0.5,
         s_noise: Optional[float] = 1.0,
         use_noise_sampler: Optional[bool] = True,
-        use_SD35_sigmas: Optional[bool] = False,
+        use_beta_sigmas: Optional[bool] = False,
         use_dynamic_shifting=False,
         base_shift: Optional[float] = 0.5,
         max_shift: Optional[float] = 1.15,
@@ -168,6 +172,12 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         if solver_type not in ["midpoint", "heun"]:
             raise NotImplementedError(f"{solver_type} is not implemented for {self.__class__}")
 
+        if sigma_schedule not in [None, "karras", "exponential", "lambdas", "betas"]:
+            raise NotImplementedError(f"{sigma_schedule} is not implemented for {self.__class__}")
+
+        if beta_schedule not in ["linear", "scaled linear"]:
+            raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}")
+
         # setable values
         timesteps = np.linspace(1, num_train_timesteps, num_train_timesteps, dtype=np.float32)[::-1].copy()
         timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32)
@@ -186,8 +196,6 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         self._begin_index = None
         self.sigmas = sigmas.to("cpu")  # to avoid too much CPU/GPU communication
         self.model_outputs = [None] * solver_order
-        self.sigma_min = self.sigmas[-1].item()
-        self.sigma_max = self.sigmas[0].item()
 
     @property
     def step_index(self):
@@ -213,7 +221,7 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         """
         self._begin_index = begin_index
 
-    def time_shift(self, mu: float, sigma: float, t: torch.Tensor):
+    def time_shift(self, mu: float, sigma: float, t: torch.FloatTensor):
         return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
 
     def set_timesteps(self,
@@ -235,25 +243,39 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
             raise ValueError(" you have a pass a value for `mu` when `use_dynamic_shifting` is set to be `True`")
 
         if sigmas is None:
-            self.use_SD35_sigmas = True
+            self.use_beta_sigmas = True
             self.num_inference_steps = num_inference_steps
-            sigmas1 = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps, dtype=np.float64)
-            beta_start = 0.00085
-            beta_end = 0.012
-            betas = torch.linspace(beta_start**0.5, beta_end**0.5, self.config.num_train_timesteps, dtype=torch.float64) ** 2
+            beta_start = self.config.beta_start
+            beta_end = self.config.beta_end
+            if self.config.trained_betas is not None:
+                betas = torch.tensor(self.config.trained_betas, dtype=torch.float64)
+            elif self.config.beta_schedule == "linear":
+                betas = torch.linspace(beta_start, beta_end, self.config.num_train_timesteps, dtype=torch.float64)
+            elif self.config.beta_schedule == "scaled linear":
+                # this schedule is very specific to the latent diffusion model.
+                betas = torch.linspace(beta_start**0.5, beta_end**0.5, self.config.num_train_timesteps, dtype=torch.float64) ** 2
+            else:
+                raise NotImplementedError(f"{self.config.beta_schedule} is not implemented for {self.__class__}")
             alphas = 1.0 - betas
             alphas_cumprod = torch.cumprod(alphas, dim=0)
             sigmas = np.array(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5)
             del alphas_cumprod
             del alphas
             del betas
-        elif self.use_SD35_sigmas:
+        elif self.use_beta_sigmas:
             num_inference_steps = len(sigmas)
             self.num_inference_steps = num_inference_steps
-            sigmas1 = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps, dtype=np.float64)
-            beta_start = 0.00085
-            beta_end = 0.012
-            betas = torch.linspace(beta_start**0.5, beta_end**0.5, self.config.num_train_timesteps, dtype=torch.float64) ** 2
+            beta_start = self.config.beta_start
+            beta_end = self.config.beta_end
+            if self.config.trained_betas is not None:
+                betas = torch.tensor(self.config.trained_betas, dtype=torch.float64)
+            elif self.config.beta_schedule == "linear":
+                betas = torch.linspace(beta_start, beta_end, self.config.num_train_timesteps, dtype=torch.float64)
+            elif self.config.beta_schedule == "scaled linear":
+                # this schedule is very specific to the latent diffusion model.
+                betas = torch.linspace(beta_start**0.5, beta_end**0.5, self.config.num_train_timesteps, dtype=torch.float64) ** 2
+            else:
+                raise NotImplementedError(f"{self.config.beta_schedule} is not implemented for {self.__class__}")
             alphas = 1.0 - betas
             alphas_cumprod = torch.cumprod(alphas, dim=0)
             sigmas = np.array(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5)
@@ -265,7 +287,7 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
             self.num_inference_steps = num_inference_steps
 
         if self.config.sigma_schedule == "exponential":
-            if self.use_SD35_sigmas:
+            if self.use_beta_sigmas:
                 sigmas = np.flip(sigmas).copy()
                 sigma_min = sigmas[-1]
                 sigma_max = sigmas[0]
@@ -273,13 +295,12 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
                 OldRange = sigma_max - sigma_min
                 NewRange = 1.0 - sigma_min
                 sigmas = (((sigmas - sigma_min) * NewRange) / OldRange) + sigma_min
-                del sigmas1
             else:
                 sigma_min = sigmas[-1]
                 sigma_max = sigmas[0]
                 sigmas = self._convert_to_exponential(sigma_min, sigma_max, num_inference_steps=num_inference_steps)
         elif self.config.sigma_schedule == "karras":
-            if self.use_SD35_sigmas:
+            if self.use_beta_sigmas:
                 sigmas = np.flip(sigmas).copy()
                 sigma_min = sigmas[-1]
                 sigma_max = sigmas[0]
@@ -287,14 +308,13 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
                 OldRange = sigma_max - sigma_min
                 NewRange = 1.0 - sigma_min
                 sigmas = (((sigmas - sigma_min) * NewRange) / OldRange) + sigma_min
-                del sigmas1
             else:
                 sigma_min = sigmas[-1]
                 sigma_max = sigmas[0]
                 sigmas = self._convert_to_karras(sigma_min, sigma_max, num_inference_steps=num_inference_steps)
             sigmas = torch.from_numpy(sigmas).to(dtype=torch.float64, device=device)
         elif self.config.sigma_schedule == "lambdas":
-            if self.use_SD35_sigmas:
+            if self.use_beta_sigmas:
                 log_sigmas = np.log(sigmas)
                 lambdas = np.flip(log_sigmas.copy())
                 lambdas = self._convert_to_lu(in_lambdas=lambdas, num_inference_steps=num_inference_steps)
@@ -304,7 +324,6 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
                 OldRange = sigma_max - sigma_min
                 NewRange = 1.0 - sigma_min
                 sigmas = (((sigmas - sigma_min) * NewRange) / OldRange) + sigma_min
-                del sigmas1
                 del lambdas
                 del log_sigmas
             else:
@@ -315,12 +334,25 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
                 del lambdas
                 del log_sigmas
             sigmas = torch.from_numpy(sigmas).to(dtype=torch.float64, device=device)
-        else:
-            if self.use_SD35_sigmas:
+        elif self.config.sigma_schedule == "betas":
+            if self.use_beta_sigmas:
+                sigmas = np.flip(sigmas).copy()
+                sigma_min = sigmas[-1]
+                sigma_max = sigmas[0]
+                sigmas = self._convert_to_beta(sigma_min, sigma_max, num_inference_steps=num_inference_steps, device=device)
+                OldRange = sigma_max - sigma_min
+                NewRange = 1.0 - sigma_min
+                sigmas = (((sigmas - sigma_min) * NewRange) / OldRange) + sigma_min
+            else:
+                sigmas = np.flip(sigmas).copy()
+                sigma_min = sigmas[-1]
+                sigmas = np.linspace(1.0, sigma_min, num_inference_steps)
+                sigmas = torch.from_numpy(sigmas).to(dtype=torch.float64, device=device)
+        else:
+            if self.use_beta_sigmas:
                 sigmas = np.flip(sigmas).copy()
                 sigma_min = sigmas[-1]
                 sigmas = np.linspace(1.0, sigma_min, num_inference_steps)
-                del sigmas1
             sigmas = torch.from_numpy(sigmas).to(dtype=torch.float64, device=device)
         
         if self.config.use_dynamic_shifting:
@@ -339,39 +371,19 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         self._step_index = None
         self._begin_index = None
 
-    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
-    def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
-        """
-        "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
-        prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
-        s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
-        pixels from saturation at each step. We find that dynamic thresholding results in significantly better
-        photorealism as well as better image-text alignment, especially when using very large guidance weights."
-
-        https://arxiv.org/abs/2205.11487
-        """
-        dtype = sample.dtype
-        batch_size, channels, *remaining_dims = sample.shape
-
-        if dtype not in (torch.float32, torch.float64):
-            sample = sample.float()  # upcast for quantile calculation, and clamp not implemented for cpu half
-
-        # Flatten sample for doing quantile calculation along each image
-        sample = sample.reshape(batch_size, channels * np.prod(remaining_dims))
-
-        abs_sample = sample.abs()  # "a certain percentile absolute pixel value"
-
-        s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1)
-        s = torch.clamp(
-            s, min=1, max=self.config.sample_max_value
-        )  # When clamped to min=1, equivalent to standard clipping to [-1, 1]
-        s = s.unsqueeze(1)  # (batch_size, 1) because clamp will broadcast along dim=0
-        sample = torch.clamp(sample, -s, s) / s  # "we threshold xt0 to the range [-s, s] and then divide by s"
-
-        sample = sample.reshape(batch_size, channels, *remaining_dims)
-        sample = sample.to(dtype)
-
-        return sample
+    # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta
+    def _convert_to_beta(self, sigma_min, sigma_max, num_inference_steps, device: Union[str, torch.device] = None, alpha: float = 0.6, beta: float = 0.6) -> torch.Tensor:
+        """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
+        sigmas = torch.Tensor(
+            [
+                sigma_min + (ppf * (sigma_max - sigma_min))
+                for ppf in [
+                    scipy.stats.beta.ppf(timestep, alpha, beta)
+                    for timestep in 1 - np.linspace(0, 1, num_inference_steps).astype(np.float64)
+                ]
+            ]
+        ).to(dtype=torch.float64, device=device)
+        return sigmas
 
     def _convert_to_lu(self, in_lambdas: torch.Tensor, num_inference_steps) -> torch.Tensor:
         """Constructs the noise schedule of Lu et al. (2022)."""
@@ -399,51 +411,6 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp()
         return sigmas
 
-    def convert_model_output(
-        self,
-        model_output: torch.Tensor,
-        sample: torch.Tensor = None,
-        *args,
-        **kwargs,
-    ) -> torch.Tensor:
-        """
-        Convert the model output to the corresponding type the DPMSolver/DPMSolver++ algorithm needs. DPM-Solver is
-        designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to discretize an
-        integral of the data prediction model.
-
-        <Tip>
-
-        The algorithm and model type are decoupled. You can use either DPMSolver or DPMSolver++ for both noise
-        prediction and data prediction models.
-
-        </Tip>
-
-        Args:
-            model_output (`torch.Tensor`):
-                The direct output from the learned diffusion model.
-            sample (`torch.Tensor`):
-                A current instance of a sample created by the diffusion process.
-
-        Returns:
-            `torch.Tensor`:
-                The converted model output.
-        """
-        timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None)
-        if sample is None:
-            if len(args) > 1:
-                sample = args[1]
-            else:
-                raise ValueError("missing `sample` as a required keyward argument")
-
-        # Flow Match needs to solve an integral of the data prediction model.
-        sigma = self.sigmas[self.step_index]
-        x0_pred = sample - sigma * model_output
-
-        if self.config.thresholding:
-            x0_pred = self._threshold_sample(x0_pred)
-
-        return x0_pred
-
     def index_for_timestep(self, timestep, schedule_timesteps=None):
         if schedule_timesteps is None:
             schedule_timesteps = self.timesteps
@@ -511,7 +478,9 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
         if self.config.algorithm_type in ["dpmsolver2", "dpmsolver2A"]:
             pass
         else:
-            model_output = self.convert_model_output(model_output, sample=sample)
+            # Flow Match needs to solve an integral of the data prediction model.
+            sigma = self.sigmas[self.step_index]
+            model_output = sample - sigma * model_output
             for i in range(self.config.solver_order - 1):
                 self.model_outputs[i] = self.model_outputs[i + 1]
             self.model_outputs[-1] = model_output
@@ -830,7 +799,7 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
 
         return FlowMatchDPMSolverMultistepSchedulerOutput(prev_sample=prev_sample)
 
-    def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+    def scale_model_input(self, sample: torch.FloatTensor, *args, **kwargs) -> torch.FloatTensor:
         """
         Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
         current timestep.
diff --git a/modules/sd_detect.py b/modules/sd_detect.py
index 31f773607..062bb32e1 100644
--- a/modules/sd_detect.py
+++ b/modules/sd_detect.py
@@ -92,7 +92,7 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
                 guess = 'Stable Diffusion 3'
             if 'flux' in f.lower():
                 guess = 'FLUX'
-                if size > 11000 and size < 20000:
+                if size > 11000 and size < 16000:
                     warn(f'Model detected as FLUX UNET model, but attempting to load a base model: {op}={f} size={size} MB')
             # switch for specific variant
             if guess == 'Stable Diffusion' and 'inpaint' in f.lower():
@@ -112,8 +112,9 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
                 if keys is not None and len(keys) > 0:
                     modules = model_tools.list_to_dict(keys)
                     modules = model_tools.remove_entries_after_depth(modules, 3)
+                    lst = model_tools.list_compact(keys)
                     t1 = time.time()
-                    shared.log.debug(f'Autodetect modules: {modules} time={t1-t0:.2f}')
+                    shared.log.debug(f'Autodetect: modules={modules} list={lst} time={t1-t0:.2f}')
         except Exception as e:
             shared.log.error(f'Autodetect {op}: file="{f}" {e}')
             if debug_load:
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 0336a6268..cf1921a36 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -504,6 +504,8 @@ def move_model(model, device=None, force=False):
                                         module.to_empty(device=device)
             elif 'enable_sequential_cpu_offload' in str(e0):
                 pass # ignore model move if sequential offload is enabled
+            elif 'Params4bit' in str(e0) or 'Params8bit' in str(e0):
+                pass # ignore model move if quantization is enabled
             else:
                 raise e0
         t1 = time.time()
@@ -819,6 +821,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
         if model_type is None:
             shared.log.error(f'Load {op}: pipeline={shared.opts.diffusers_pipeline} not detected')
             return
+        vae_file = None
         if model_type.startswith('Stable Diffusion') and (op == 'model' or op == 'refiner'): # preload vae for sd models
             vae_file, vae_source = sd_vae.resolve_vae(checkpoint_info.filename)
             vae = sd_vae.load_vae_diffusers(checkpoint_info.path, vae_file, vae_source)
@@ -897,7 +900,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
 
         set_diffuser_offload(sd_model, op)
         if op == 'model' and not (os.path.isdir(checkpoint_info.path) or checkpoint_info.type == 'huggingface'):
-            if getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None:
+            if getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None and vae_file is not None:
                 sd_vae.apply_vae_config(shared.sd_model.sd_checkpoint_info.filename, vae_file, sd_model)
         if op == 'refiner' and shared.opts.diffusers_move_refiner:
             shared.log.debug('Moving refiner model to CPU')
diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py
index 370cb767b..60c75b64e 100644
--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@@ -80,13 +80,13 @@ config = {
     'DPM++ Cosine': { 'solver_order': 2, 'sigma_schedule': "exponential", 'prediction_type': "v-prediction" },
     'DPM SDE': { 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'noise_sampler_seed': None, 'timestep_spacing': 'linspace', 'steps_offset': 0,  },
 
-    'DPM2 FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver2', 'use_noise_sampler': True },
-    'DPM2a FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver2A', 'use_noise_sampler': True },
-    'DPM2++ 2M FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver++2M', 'use_noise_sampler': True },
-    'DPM2++ 2S FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver++2S', 'use_noise_sampler': True },
-    'DPM2++ SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver++sde', 'use_noise_sampler': True },
-    'DPM2++ 2M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver++2Msde', 'use_noise_sampler': True },
-    'DPM2++ 3M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 3, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver++3Msde', 'use_noise_sampler': True },
+    'DPM2 FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2', 'use_noise_sampler': True },
+    'DPM2a FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2A', 'use_noise_sampler': True },
+    'DPM2++ 2M FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2M', 'use_noise_sampler': True },
+    'DPM2++ 2S FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2S', 'use_noise_sampler': True },
+    'DPM2++ SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++sde', 'use_noise_sampler': True },
+    'DPM2++ 2M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2Msde', 'use_noise_sampler': True },
+    'DPM2++ 3M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 3, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++3Msde', 'use_noise_sampler': True },
 
     'Heun': { 'use_beta_sigmas': False, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'timestep_spacing': 'linspace' },
     'Heun FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1 },
@@ -236,8 +236,8 @@ class DiffusionSampler:
         if 'use_dynamic_shifting' in self.config:
             if 'Flux' in model.__class__.__name__:
                 self.config['use_dynamic_shifting'] = shared.opts.schedulers_dynamic_shift
-        if 'use_SD35_sigmas' in self.config:
-            self.config['use_SD35_sigmas'] = 'StableDiffusion3' in model.__class__.__name__
+        if 'use_beta_sigmas' in self.config:
+            self.config['use_beta_sigmas'] = 'StableDiffusion3' in model.__class__.__name__
         if 'rescale_betas_zero_snr' in self.config:
             self.config['rescale_betas_zero_snr'] = shared.opts.schedulers_rescale_betas
         if 'timestep_spacing' in self.config and shared.opts.schedulers_timestep_spacing != 'default' and shared.opts.schedulers_timestep_spacing is not None:
diff --git a/modules/shared.py b/modules/shared.py
index ceec3162c..a89cbbc95 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -722,7 +722,7 @@ options_templates.update(options_section(('saving-images', "Image Options"), {
     "image_watermark_image": OptionInfo('', "Image watermark file"),
 }))
 
-options_templates.update(options_section(('saving-paths', "Image Naming & Paths"), {
+options_templates.update(options_section(('saving-paths', "Image Paths"), {
     "saving_sep_images": OptionInfo("<h2>Save options</h2>", "", gr.HTML),
     "save_images_add_number": OptionInfo(True, "Numbered filenames", component_args=hide_dirs),
     "use_original_name_batch": OptionInfo(True, "Batch uses original name"),
diff --git a/modules/shared_state.py b/modules/shared_state.py
index 9947dcb70..7def42b8c 100644
--- a/modules/shared_state.py
+++ b/modules/shared_state.py
@@ -1,5 +1,4 @@
 import os
-import sys
 import time
 import datetime
 from modules.errors import log
@@ -120,8 +119,8 @@ class State:
     def end(self, api=None):
         import modules.devices
         if self.time_start is None: # someone called end before being
-            fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
-            log.debug(f'Access state.end: {fn}') # pylint: disable=protected-access
+            # fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access
+            # log.debug(f'Access state.end: {fn}') # pylint: disable=protected-access
             self.time_start = time.time()
         if self.debug_output:
             log.debug(f'State end: {self.job} time={time.time() - self.time_start:.2f}')
diff --git a/modules/ui_sections.py b/modules/ui_sections.py
index 7951a9227..f15edb4bd 100644
--- a/modules/ui_sections.py
+++ b/modules/ui_sections.py
@@ -276,7 +276,7 @@ def create_sampler_options(tabname):
 
     else: # shared.native
         with gr.Row(elem_classes=['flex-break']):
-            sampler_sigma = gr.Dropdown(label='Sigma method', elem_id=f"{tabname}_sampler_sigma", choices=['default', 'karras', 'beta', 'exponential', 'lambdas'], value=shared.opts.schedulers_sigma, type='value')
+            sampler_sigma = gr.Dropdown(label='Sigma method', elem_id=f"{tabname}_sampler_sigma", choices=['default', 'karras', 'betas', 'exponential', 'lambdas'], value=shared.opts.schedulers_sigma, type='value')
             sampler_spacing = gr.Dropdown(label='Timestep spacing', elem_id=f"{tabname}_sampler_spacing", choices=['default', 'linspace', 'leading', 'trailing'], value=shared.opts.schedulers_timestep_spacing, type='value')
         with gr.Row(elem_classes=['flex-break']):
             sampler_beta = gr.Dropdown(label='Beta schedule', elem_id=f"{tabname}_sampler_beta", choices=['default', 'linear', 'scaled', 'cosine'], value=shared.opts.schedulers_beta_schedule, type='value')
diff --git a/scripts/xyz_grid_classes.py b/scripts/xyz_grid_classes.py
index 84a11daff..b80b9f13c 100644
--- a/scripts/xyz_grid_classes.py
+++ b/scripts/xyz_grid_classes.py
@@ -115,7 +115,7 @@ axis_options = [
     AxisOption("[Process] Server options", str, apply_options),
     AxisOptionTxt2Img("[Sampler] Name", str, apply_sampler, fmt=format_value_add_label, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers]),
     AxisOptionImg2Img("[Sampler] Name", str, apply_sampler, fmt=format_value_add_label, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers_for_img2img]),
-    AxisOption("[Sampler] Sigma method", str, apply_setting("schedulers_sigma"), choices=lambda: ['default', 'karras', 'beta', 'exponential', 'lambdas']),
+    AxisOption("[Sampler] Sigma method", str, apply_setting("schedulers_sigma"), choices=lambda: ['default', 'karras', 'betas', 'exponential', 'lambdas']),
     AxisOption("[Sampler] Timestep spacing", str, apply_setting("schedulers_timestep_spacing"), choices=lambda: ['default', 'linspace', 'leading', 'trailing']),
     AxisOption("[Sampler] Timestep range", int, apply_setting("schedulers_timesteps_range")),
     AxisOption("[Sampler] Solver order", int, apply_setting("schedulers_solver_order")),