diff --git a/CHANGELOG.md b/CHANGELOG.md index 591c0afe3..bc6cd163b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Change Log for SD.Next +## Update for 2024-11-22 + +- Model loader improvements: + - detect model components on model load fail + - Flux, SD35: force unload model + - Flux: apply `bnb` quant when loading *unet/transformer* + - Flux: all-in-one safetensors + example: + - Flux: do not recast quants +- Sampler improvements + - update DPM FlowMatch samplers +- Fixes: + - update `diffusers` + - fix README links + - fix sdxl controlnet single-file loader + - relax settings validator + ## Update for 2024-11-21 ### Highlights for 2024-11-21 diff --git a/README.md b/README.md index d099496b8..1bb5eacd0 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ For screenshots and informations on other available themes, see [Themes Wiki](ht ## Model support Additional models will be added as they become available and there is public interest in them -See [models overview](wiki/Models) for details on each model, including their architecture, complexity and other info +See [models overview](https://github.com/vladmandic/automatic/wiki/Models) for details on each model, including their architecture, complexity and other info - [RunwayML Stable Diffusion](https://github.com/Stability-AI/stablediffusion/) 1.x and 2.x *(all variants)* - [StabilityAI Stable Diffusion XL](https://github.com/Stability-AI/generative-models), [StabilityAI Stable Diffusion 3.0](https://stability.ai/news/stable-diffusion-3-medium) Medium, [StabilityAI Stable Diffusion 3.5](https://huggingface.co/stabilityai/stable-diffusion-3.5-large) Medium, Large, Large Turbo @@ -101,17 +101,17 @@ See [models overview](wiki/Models) for details on each model, including their ar ## Getting started -- Get started with **SD.Next** by following the [installation instructions](wiki/Installation) -- For more details, check out [advanced installation](wiki/Advanced-Install) guide -- List and explanation of [command line arguments](wiki/CLI-Arguments) +- Get started with **SD.Next** by following the [installation instructions](https://github.com/vladmandic/automatic/wiki/Installation) +- For more details, check out [advanced installation](https://github.com/vladmandic/automatic/wiki/Advanced-Install) guide +- List and explanation of [command line arguments](https://github.com/vladmandic/automatic/wiki/CLI-Arguments) - Install walkthrough [video](https://www.youtube.com/watch?v=nWTnTyFTuAs) > [!TIP] > And for platform specific information, check out -> [WSL](wiki/WSL) | [Intel Arc](wiki/Intel-ARC) | [DirectML](wiki/DirectML) | [OpenVINO](wiki/OpenVINO) | [ONNX & Olive](wiki/ONNX-Runtime) | [ZLUDA](wiki/ZLUDA) | [AMD ROCm](wiki/AMD-ROCm) | [MacOS](wiki/MacOS-Python.md) | [nVidia](wiki/nVidia) +> [WSL](https://github.com/vladmandic/automatic/wiki/WSL) | [Intel Arc](https://github.com/vladmandic/automatic/wiki/Intel-ARC) | [DirectML](https://github.com/vladmandic/automatic/wiki/DirectML) | [OpenVINO](https://github.com/vladmandic/automatic/wiki/OpenVINO) | [ONNX & Olive](https://github.com/vladmandic/automatic/wiki/ONNX-Runtime) | [ZLUDA](https://github.com/vladmandic/automatic/wiki/ZLUDA) | [AMD ROCm](https://github.com/vladmandic/automatic/wiki/AMD-ROCm) | [MacOS](https://github.com/vladmandic/automatic/wiki/MacOS-Python.md) | [nVidia](https://github.com/vladmandic/automatic/wiki/nVidia) > [!WARNING] -> If you run into issues, check out [troubleshooting](wiki/Troubleshooting) and [debugging](wiki/Debug) guides +> If you run into issues, check out [troubleshooting](https://github.com/vladmandic/automatic/wiki/Troubleshooting) and [debugging](https://github.com/vladmandic/automatic/wiki/Debug) guides > [!TIP] > All command line options can also be set via env variable diff --git a/TODO.md b/TODO.md index 88d704457..973e062dc 100644 --- a/TODO.md +++ b/TODO.md @@ -8,6 +8,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma - SD35 LoRA: - Flux IPAdapter: - Flux Fill/ControlNet/Redux: +- Flux NF4: - SANA: ## Other diff --git a/cli/model-keys.py b/cli/model-keys.py index bd4a91551..45b900bd7 100755 --- a/cli/model-keys.py +++ b/cli/model-keys.py @@ -38,6 +38,16 @@ def list_to_dict(flat_list): return result_dict +def list_compact(flat_list): + result_list = [] + for item in flat_list: + keys = item.split('.') + keys = '.'.join(keys[:2]) + if keys not in result_list: + result_list.append(keys) + return result_list + + def guess_dct(dct: dict): # if has(dct, 'model.diffusion_model.input_blocks') and has(dct, 'model.diffusion_model.label_emb'): # return 'sdxl' @@ -65,7 +75,9 @@ def read_keys(fn): except Exception as e: pprint(e) dct = list_to_dict(keys) + lst = list_compact(keys) pprint(f'file: {fn}') + pprint(lst) pprint(remove_entries_after_depth(dct, 3)) pprint(remove_entries_after_depth(dct, 6)) guess = guess_dct(dct) diff --git a/installer.py b/installer.py index fe52ce668..0b64c3616 100644 --- a/installer.py +++ b/installer.py @@ -459,7 +459,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None): def check_diffusers(): if args.skip_all or args.skip_requirements: return - sha = 'cd6ca9df2987c000b28e13b19bd4eec3ef3c914b' + sha = 'b5fd6f13f5434d69d919cc8cedf0b11db664cf06' pkg = pkg_resources.working_set.by_key.get('diffusers', None) minor = int(pkg.version.split('.')[1] if pkg is not None else 0) cur = opts.get('diffusers_version', '') if minor > 0 else '' diff --git a/modules/model_flux.py b/modules/model_flux.py index c605702c8..17234d9a4 100644 --- a/modules/model_flux.py +++ b/modules/model_flux.py @@ -194,6 +194,7 @@ def load_transformer(file_path): # triggered by opts.sd_unet change if _transformer is not None: transformer = _transformer else: + diffusers_load_config = model_quant.create_bnb_config(diffusers_load_config) transformer = diffusers.FluxTransformer2DModel.from_single_file(file_path, **diffusers_load_config) if transformer is None: shared.log.error('Failed to load UNet model') @@ -213,6 +214,11 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch text_encoder_2 = None vae = None + # unload current model + sd_models.unload_model_weights() + shared.sd_model = None + devices.torch_gc(force=True) + # load overrides if any if shared.opts.sd_unet != 'None': try: @@ -305,8 +311,21 @@ def load_flux(checkpoint_info, diffusers_load_config): # triggered by opts.sd_ch repo_id = 'black-forest-labs/FLUX.1-dev' # workaround since sayakpaul model is missing model_index.json for c in kwargs: if kwargs[c].dtype == torch.float32 and devices.dtype != torch.float32: - shared.log.warning(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} cast dtype={devices.dtype}') + shared.log.warning(f'Load model: type=FLUX component={c} dtype={kwargs[c].dtype} cast dtype={devices.dtype} recast') kwargs[c] = kwargs[c].to(dtype=devices.dtype) - kwargs = model_quant.create_bnb_config(kwargs) - pipe = diffusers.FluxPipeline.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config) + + allow_bnb = 'gguf' not in (sd_unet.loaded_unet or '') + kwargs = model_quant.create_bnb_config(kwargs, allow_bnb) + if checkpoint_info.path.endswith('.safetensors') and os.path.isfile(checkpoint_info.path): + pipe = diffusers.FluxPipeline.from_single_file(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config) + else: + pipe = diffusers.FluxPipeline.from_pretrained(repo_id, cache_dir=shared.opts.diffusers_dir, **kwargs, **diffusers_load_config) + + # release memory + transformer = None + text_encoder_1 = None + text_encoder_2 = None + vae = None + devices.torch_gc() + return pipe diff --git a/modules/model_quant.py b/modules/model_quant.py index 68bdfa7b2..0e7bdd4b3 100644 --- a/modules/model_quant.py +++ b/modules/model_quant.py @@ -7,10 +7,10 @@ bnb = None quanto = None -def create_bnb_config(kwargs = None): +def create_bnb_config(kwargs = None, allow_bnb: bool = True): from modules import shared, devices - if len(shared.opts.bnb_quantization) > 0: - if 'Model' in shared.opts.bnb_quantization and 'transformer' not in (kwargs or {}): + if len(shared.opts.bnb_quantization) > 0 and allow_bnb: + if 'Model' in shared.opts.bnb_quantization: load_bnb() bnb_config = diffusers.BitsAndBytesConfig( load_in_8bit=shared.opts.bnb_quantization_type in ['fp8'], diff --git a/modules/model_sd3.py b/modules/model_sd3.py index 78eee7b4d..b9d579085 100644 --- a/modules/model_sd3.py +++ b/modules/model_sd3.py @@ -120,6 +120,11 @@ def load_sd3(checkpoint_info, cache_dir=None, config=None): repo_id = sd_models.path_to_repo(checkpoint_info.name) fn = checkpoint_info.path + # unload current model + sd_models.unload_model_weights() + shared.sd_model = None + devices.torch_gc(force=True) + kwargs = {} kwargs = load_overrides(kwargs, cache_dir) if fn is None or not os.path.exists(fn): @@ -152,5 +157,5 @@ def load_sd3(checkpoint_info, cache_dir=None, config=None): config=config, **kwargs, ) - devices.torch_gc(force=True) + devices.torch_gc() return pipe diff --git a/modules/model_tools.py b/modules/model_tools.py index 1d016a19e..07cd61b6e 100644 --- a/modules/model_tools.py +++ b/modules/model_tools.py @@ -13,6 +13,16 @@ def remove_entries_after_depth(d, depth, current_depth=0): return d +def list_compact(flat_list): + result_list = [] + for item in flat_list: + keys = item.split('.') + keys = '.'.join(keys[:2]) + if keys not in result_list: + result_list.append(keys) + return result_list + + def list_to_dict(flat_list): result_dict = {} try: diff --git a/modules/schedulers/scheduler_dpm_flowmatch.py b/modules/schedulers/scheduler_dpm_flowmatch.py index 83573105e..1afe54498 100644 --- a/modules/schedulers/scheduler_dpm_flowmatch.py +++ b/modules/schedulers/scheduler_dpm_flowmatch.py @@ -9,11 +9,11 @@ import torch import torchsde from diffusers.configuration_utils import ConfigMixin, register_to_config -from diffusers.utils import BaseOutput, logging +from diffusers.utils import BaseOutput from diffusers.utils.torch_utils import randn_tensor from diffusers.schedulers.scheduling_utils import SchedulerMixin +import scipy.stats -logger = logging.get_logger(__name__) # pylint: disable=invalid-name class BatchedBrownianTree: """A wrapper around torchsde.BrownianTree that enables batches of entropy.""" @@ -101,39 +101,42 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): Args: num_train_timesteps (`int`, defaults to 1000): The number of diffusion steps to train the model. + beta_start (`float`, defaults to 0.0001): + The starting `beta` value of inference. + beta_end (`float`, defaults to 0.02): + The final `beta` value. + beta_schedule (`str`, defaults to `"scaled linear"`): + The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from `linear` or `scaled_linear`. + trained_betas (`np.ndarray`, *optional*): + Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`. solver_order (`int`, defaults to 2): The DPMSolver order which can be `2` or `3`. It is recommended to use `solver_order=2` for guided sampling, and `solver_order=3` for unconditional sampling. - thresholding (`bool`, defaults to `False`): - Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such - as Stable Diffusion. - dynamic_thresholding_ratio (`float`, defaults to 0.995): - The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. - sample_max_value (`float`, defaults to 1.0): - The threshold value for dynamic thresholding. Valid only when `thresholding=True`. algorithm_type (`str`, defaults to `dpmsolver++2M`): Algorithm type for the solver; can be `dpmsolver2`, `dpmsolver2A`, `dpmsolver++2M`, `dpmsolver++2S`, `dpmsolver++sde`, `dpmsolver++2Msde`, or `dpmsolver++3Msde`. solver_type (`str`, defaults to `midpoint`): Solver type for the second-order solver; can be `midpoint` or `heun`. The solver type slightly affects the sample quality, especially for a small number of steps. It is recommended to use `midpoint` solvers. - sigma_schedule (`str`, *optional*, defaults to None): Sigma schedule to compute the `sigmas`. Optionally, we use + sigma_schedule (`str`, *optional*, defaults to None (beta)): Sigma schedule to compute the `sigmas`. Optionally, we use the schedule "karras" introduced in the EDM paper (https://arxiv.org/abs/2206.00364). Other acceptable values are "exponential". The exponential schedule was incorporated in this model: https://huggingface.co/stabilityai/cosxl. Other acceptable values are "lambdas". The uniform-logSNR for step sizes proposed by Lu's DPM-Solver in the noise schedule during the sampling process. The sigmas and time steps are determined according to a sequence of `lambda(t)`. - use_noise_sampler for BrownianTreeNoiseSampler (only valid for `dpmsolver++2S`, `dpmsolver++sde`, `dpmsolver++2Msde`, - or `dpmsolver++3Msde`): A noise sampler backed by a torchsde increasing the stability of convergence. Default strategy + "betas" for step sizes in the noise schedule during the sampling process. Refer to [Beta + Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information. + use_noise_sampler for BrownianTreeNoiseSampler (only valid for `dpmsolver++2S`, `dpmsolver++sde`, `dpmsolver++2Msde`, or `dpmsolver++3Msde`. + A noise sampler backed by a torchsde increasing the stability of convergence. Default strategy (random noise) has it jumping all over the place, but Brownian sampling is more stable. Utilizes the model generation seed provided. midpoint_ratio (`float`, *optional*, range: 0.4 to 0.6, default=0.5): Only valid for (`dpmsolver++sde`, `dpmsolver++2S`). Higher values may result in smoothing, more vivid colors and less noise at the expense of more detail and effect. s_noise (`float`, *optional*, defaults to 1.0): Sigma noise strength: range 0 - 1.1 (only valid for `dpmsolver++2S`, `dpmsolver++sde`, `dpmsolver++2Msde`, or `dpmsolver++3Msde`). The amount of additional noise to counteract loss of detail during sampling. A reasonable range is [1.000, 1.011]. Defaults to 1.0 from the original implementation. - use_SD35_sigmas: (`bool` defaults to False for FLUX and True for SD3). Based on original interpretation of using beta values for determining sigmas. + use_beta_sigmas: (`bool` defaults to False for FLUX and True for SD3). Based on original interpretation of using beta values for determining sigmas. use_dynamic_shifting (`bool` defaults to False for SD3 and True for FLUX). When `True`, shift is ignored. - shift (`float`, defaults to 3.0): The shift value for the timestep schedule for SD3 when not using dynamic shifting - The remaining args are specific to Flux's dynamic shifting based on resolution + shift (`float`, defaults to 3.0): The shift value for the timestep schedule for SD3 when not using dynamic shifting. + The remaining args are specific to Flux's dynamic shifting based on resolution. """ _compatibles = [] @@ -143,10 +146,11 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): def __init__( self, num_train_timesteps: int = 1000, + beta_start: float = 0.00085, + beta_end: float = 0.012, + beta_schedule: str = "scaled linear", + trained_betas: Optional[Union[np.ndarray, List[float]]] = None, solver_order: int = 2, - thresholding: Optional[bool] = False, - dynamic_thresholding_ratio: float = 0.995, - sample_max_value: Optional[float] = 1.0, algorithm_type: str = "dpmsolver++2M", solver_type: str = "midpoint", sigma_schedule: Optional[str] = None, @@ -154,7 +158,7 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): midpoint_ratio: Optional[float] = 0.5, s_noise: Optional[float] = 1.0, use_noise_sampler: Optional[bool] = True, - use_SD35_sigmas: Optional[bool] = False, + use_beta_sigmas: Optional[bool] = False, use_dynamic_shifting=False, base_shift: Optional[float] = 0.5, max_shift: Optional[float] = 1.15, @@ -168,6 +172,12 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): if solver_type not in ["midpoint", "heun"]: raise NotImplementedError(f"{solver_type} is not implemented for {self.__class__}") + if sigma_schedule not in [None, "karras", "exponential", "lambdas", "betas"]: + raise NotImplementedError(f"{sigma_schedule} is not implemented for {self.__class__}") + + if beta_schedule not in ["linear", "scaled linear"]: + raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}") + # setable values timesteps = np.linspace(1, num_train_timesteps, num_train_timesteps, dtype=np.float32)[::-1].copy() timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32) @@ -186,8 +196,6 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): self._begin_index = None self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication self.model_outputs = [None] * solver_order - self.sigma_min = self.sigmas[-1].item() - self.sigma_max = self.sigmas[0].item() @property def step_index(self): @@ -213,7 +221,7 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): """ self._begin_index = begin_index - def time_shift(self, mu: float, sigma: float, t: torch.Tensor): + def time_shift(self, mu: float, sigma: float, t: torch.FloatTensor): return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma) def set_timesteps(self, @@ -235,25 +243,39 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): raise ValueError(" you have a pass a value for `mu` when `use_dynamic_shifting` is set to be `True`") if sigmas is None: - self.use_SD35_sigmas = True + self.use_beta_sigmas = True self.num_inference_steps = num_inference_steps - sigmas1 = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps, dtype=np.float64) - beta_start = 0.00085 - beta_end = 0.012 - betas = torch.linspace(beta_start**0.5, beta_end**0.5, self.config.num_train_timesteps, dtype=torch.float64) ** 2 + beta_start = self.config.beta_start + beta_end = self.config.beta_end + if self.config.trained_betas is not None: + betas = torch.tensor(self.config.trained_betas, dtype=torch.float64) + elif self.config.beta_schedule == "linear": + betas = torch.linspace(beta_start, beta_end, self.config.num_train_timesteps, dtype=torch.float64) + elif self.config.beta_schedule == "scaled linear": + # this schedule is very specific to the latent diffusion model. + betas = torch.linspace(beta_start**0.5, beta_end**0.5, self.config.num_train_timesteps, dtype=torch.float64) ** 2 + else: + raise NotImplementedError(f"{self.config.beta_schedule} is not implemented for {self.__class__}") alphas = 1.0 - betas alphas_cumprod = torch.cumprod(alphas, dim=0) sigmas = np.array(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5) del alphas_cumprod del alphas del betas - elif self.use_SD35_sigmas: + elif self.use_beta_sigmas: num_inference_steps = len(sigmas) self.num_inference_steps = num_inference_steps - sigmas1 = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps, dtype=np.float64) - beta_start = 0.00085 - beta_end = 0.012 - betas = torch.linspace(beta_start**0.5, beta_end**0.5, self.config.num_train_timesteps, dtype=torch.float64) ** 2 + beta_start = self.config.beta_start + beta_end = self.config.beta_end + if self.config.trained_betas is not None: + betas = torch.tensor(self.config.trained_betas, dtype=torch.float64) + elif self.config.beta_schedule == "linear": + betas = torch.linspace(beta_start, beta_end, self.config.num_train_timesteps, dtype=torch.float64) + elif self.config.beta_schedule == "scaled linear": + # this schedule is very specific to the latent diffusion model. + betas = torch.linspace(beta_start**0.5, beta_end**0.5, self.config.num_train_timesteps, dtype=torch.float64) ** 2 + else: + raise NotImplementedError(f"{self.config.beta_schedule} is not implemented for {self.__class__}") alphas = 1.0 - betas alphas_cumprod = torch.cumprod(alphas, dim=0) sigmas = np.array(((1 - alphas_cumprod) / alphas_cumprod) ** 0.5) @@ -265,7 +287,7 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): self.num_inference_steps = num_inference_steps if self.config.sigma_schedule == "exponential": - if self.use_SD35_sigmas: + if self.use_beta_sigmas: sigmas = np.flip(sigmas).copy() sigma_min = sigmas[-1] sigma_max = sigmas[0] @@ -273,13 +295,12 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): OldRange = sigma_max - sigma_min NewRange = 1.0 - sigma_min sigmas = (((sigmas - sigma_min) * NewRange) / OldRange) + sigma_min - del sigmas1 else: sigma_min = sigmas[-1] sigma_max = sigmas[0] sigmas = self._convert_to_exponential(sigma_min, sigma_max, num_inference_steps=num_inference_steps) elif self.config.sigma_schedule == "karras": - if self.use_SD35_sigmas: + if self.use_beta_sigmas: sigmas = np.flip(sigmas).copy() sigma_min = sigmas[-1] sigma_max = sigmas[0] @@ -287,14 +308,13 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): OldRange = sigma_max - sigma_min NewRange = 1.0 - sigma_min sigmas = (((sigmas - sigma_min) * NewRange) / OldRange) + sigma_min - del sigmas1 else: sigma_min = sigmas[-1] sigma_max = sigmas[0] sigmas = self._convert_to_karras(sigma_min, sigma_max, num_inference_steps=num_inference_steps) sigmas = torch.from_numpy(sigmas).to(dtype=torch.float64, device=device) elif self.config.sigma_schedule == "lambdas": - if self.use_SD35_sigmas: + if self.use_beta_sigmas: log_sigmas = np.log(sigmas) lambdas = np.flip(log_sigmas.copy()) lambdas = self._convert_to_lu(in_lambdas=lambdas, num_inference_steps=num_inference_steps) @@ -304,7 +324,6 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): OldRange = sigma_max - sigma_min NewRange = 1.0 - sigma_min sigmas = (((sigmas - sigma_min) * NewRange) / OldRange) + sigma_min - del sigmas1 del lambdas del log_sigmas else: @@ -315,12 +334,25 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): del lambdas del log_sigmas sigmas = torch.from_numpy(sigmas).to(dtype=torch.float64, device=device) - else: - if self.use_SD35_sigmas: + elif self.config.sigma_schedule == "betas": + if self.use_beta_sigmas: + sigmas = np.flip(sigmas).copy() + sigma_min = sigmas[-1] + sigma_max = sigmas[0] + sigmas = self._convert_to_beta(sigma_min, sigma_max, num_inference_steps=num_inference_steps, device=device) + OldRange = sigma_max - sigma_min + NewRange = 1.0 - sigma_min + sigmas = (((sigmas - sigma_min) * NewRange) / OldRange) + sigma_min + else: + sigmas = np.flip(sigmas).copy() + sigma_min = sigmas[-1] + sigmas = np.linspace(1.0, sigma_min, num_inference_steps) + sigmas = torch.from_numpy(sigmas).to(dtype=torch.float64, device=device) + else: + if self.use_beta_sigmas: sigmas = np.flip(sigmas).copy() sigma_min = sigmas[-1] sigmas = np.linspace(1.0, sigma_min, num_inference_steps) - del sigmas1 sigmas = torch.from_numpy(sigmas).to(dtype=torch.float64, device=device) if self.config.use_dynamic_shifting: @@ -339,39 +371,19 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): self._step_index = None self._begin_index = None - # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample - def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: - """ - "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the - prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by - s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing - pixels from saturation at each step. We find that dynamic thresholding results in significantly better - photorealism as well as better image-text alignment, especially when using very large guidance weights." - - https://arxiv.org/abs/2205.11487 - """ - dtype = sample.dtype - batch_size, channels, *remaining_dims = sample.shape - - if dtype not in (torch.float32, torch.float64): - sample = sample.float() # upcast for quantile calculation, and clamp not implemented for cpu half - - # Flatten sample for doing quantile calculation along each image - sample = sample.reshape(batch_size, channels * np.prod(remaining_dims)) - - abs_sample = sample.abs() # "a certain percentile absolute pixel value" - - s = torch.quantile(abs_sample, self.config.dynamic_thresholding_ratio, dim=1) - s = torch.clamp( - s, min=1, max=self.config.sample_max_value - ) # When clamped to min=1, equivalent to standard clipping to [-1, 1] - s = s.unsqueeze(1) # (batch_size, 1) because clamp will broadcast along dim=0 - sample = torch.clamp(sample, -s, s) / s # "we threshold xt0 to the range [-s, s] and then divide by s" - - sample = sample.reshape(batch_size, channels, *remaining_dims) - sample = sample.to(dtype) - - return sample + # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta + def _convert_to_beta(self, sigma_min, sigma_max, num_inference_steps, device: Union[str, torch.device] = None, alpha: float = 0.6, beta: float = 0.6) -> torch.Tensor: + """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)""" + sigmas = torch.Tensor( + [ + sigma_min + (ppf * (sigma_max - sigma_min)) + for ppf in [ + scipy.stats.beta.ppf(timestep, alpha, beta) + for timestep in 1 - np.linspace(0, 1, num_inference_steps).astype(np.float64) + ] + ] + ).to(dtype=torch.float64, device=device) + return sigmas def _convert_to_lu(self, in_lambdas: torch.Tensor, num_inference_steps) -> torch.Tensor: """Constructs the noise schedule of Lu et al. (2022).""" @@ -399,51 +411,6 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps).exp() return sigmas - def convert_model_output( - self, - model_output: torch.Tensor, - sample: torch.Tensor = None, - *args, - **kwargs, - ) -> torch.Tensor: - """ - Convert the model output to the corresponding type the DPMSolver/DPMSolver++ algorithm needs. DPM-Solver is - designed to discretize an integral of the noise prediction model, and DPM-Solver++ is designed to discretize an - integral of the data prediction model. - - - - The algorithm and model type are decoupled. You can use either DPMSolver or DPMSolver++ for both noise - prediction and data prediction models. - - - - Args: - model_output (`torch.Tensor`): - The direct output from the learned diffusion model. - sample (`torch.Tensor`): - A current instance of a sample created by the diffusion process. - - Returns: - `torch.Tensor`: - The converted model output. - """ - timestep = args[0] if len(args) > 0 else kwargs.pop("timestep", None) - if sample is None: - if len(args) > 1: - sample = args[1] - else: - raise ValueError("missing `sample` as a required keyward argument") - - # Flow Match needs to solve an integral of the data prediction model. - sigma = self.sigmas[self.step_index] - x0_pred = sample - sigma * model_output - - if self.config.thresholding: - x0_pred = self._threshold_sample(x0_pred) - - return x0_pred - def index_for_timestep(self, timestep, schedule_timesteps=None): if schedule_timesteps is None: schedule_timesteps = self.timesteps @@ -511,7 +478,9 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): if self.config.algorithm_type in ["dpmsolver2", "dpmsolver2A"]: pass else: - model_output = self.convert_model_output(model_output, sample=sample) + # Flow Match needs to solve an integral of the data prediction model. + sigma = self.sigmas[self.step_index] + model_output = sample - sigma * model_output for i in range(self.config.solver_order - 1): self.model_outputs[i] = self.model_outputs[i + 1] self.model_outputs[-1] = model_output @@ -830,7 +799,7 @@ class FlowMatchDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): return FlowMatchDPMSolverMultistepSchedulerOutput(prev_sample=prev_sample) - def scale_model_input(self, sample: torch.Tensor, *args, **kwargs) -> torch.Tensor: + def scale_model_input(self, sample: torch.FloatTensor, *args, **kwargs) -> torch.FloatTensor: """ Ensures interchangeability with schedulers that need to scale the denoising model input depending on the current timestep. diff --git a/modules/sd_detect.py b/modules/sd_detect.py index 31f773607..062bb32e1 100644 --- a/modules/sd_detect.py +++ b/modules/sd_detect.py @@ -92,7 +92,7 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): guess = 'Stable Diffusion 3' if 'flux' in f.lower(): guess = 'FLUX' - if size > 11000 and size < 20000: + if size > 11000 and size < 16000: warn(f'Model detected as FLUX UNET model, but attempting to load a base model: {op}={f} size={size} MB') # switch for specific variant if guess == 'Stable Diffusion' and 'inpaint' in f.lower(): @@ -112,8 +112,9 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False): if keys is not None and len(keys) > 0: modules = model_tools.list_to_dict(keys) modules = model_tools.remove_entries_after_depth(modules, 3) + lst = model_tools.list_compact(keys) t1 = time.time() - shared.log.debug(f'Autodetect modules: {modules} time={t1-t0:.2f}') + shared.log.debug(f'Autodetect: modules={modules} list={lst} time={t1-t0:.2f}') except Exception as e: shared.log.error(f'Autodetect {op}: file="{f}" {e}') if debug_load: diff --git a/modules/sd_models.py b/modules/sd_models.py index 0336a6268..cf1921a36 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -504,6 +504,8 @@ def move_model(model, device=None, force=False): module.to_empty(device=device) elif 'enable_sequential_cpu_offload' in str(e0): pass # ignore model move if sequential offload is enabled + elif 'Params4bit' in str(e0) or 'Params8bit' in str(e0): + pass # ignore model move if quantization is enabled else: raise e0 t1 = time.time() @@ -819,6 +821,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No if model_type is None: shared.log.error(f'Load {op}: pipeline={shared.opts.diffusers_pipeline} not detected') return + vae_file = None if model_type.startswith('Stable Diffusion') and (op == 'model' or op == 'refiner'): # preload vae for sd models vae_file, vae_source = sd_vae.resolve_vae(checkpoint_info.filename) vae = sd_vae.load_vae_diffusers(checkpoint_info.path, vae_file, vae_source) @@ -897,7 +900,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No set_diffuser_offload(sd_model, op) if op == 'model' and not (os.path.isdir(checkpoint_info.path) or checkpoint_info.type == 'huggingface'): - if getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None: + if getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None and vae_file is not None: sd_vae.apply_vae_config(shared.sd_model.sd_checkpoint_info.filename, vae_file, sd_model) if op == 'refiner' and shared.opts.diffusers_move_refiner: shared.log.debug('Moving refiner model to CPU') diff --git a/modules/sd_samplers_diffusers.py b/modules/sd_samplers_diffusers.py index 370cb767b..60c75b64e 100644 --- a/modules/sd_samplers_diffusers.py +++ b/modules/sd_samplers_diffusers.py @@ -80,13 +80,13 @@ config = { 'DPM++ Cosine': { 'solver_order': 2, 'sigma_schedule': "exponential", 'prediction_type': "v-prediction" }, 'DPM SDE': { 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'noise_sampler_seed': None, 'timestep_spacing': 'linspace', 'steps_offset': 0, }, - 'DPM2 FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver2', 'use_noise_sampler': True }, - 'DPM2a FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver2A', 'use_noise_sampler': True }, - 'DPM2++ 2M FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver++2M', 'use_noise_sampler': True }, - 'DPM2++ 2S FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver++2S', 'use_noise_sampler': True }, - 'DPM2++ SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver++sde', 'use_noise_sampler': True }, - 'DPM2++ 2M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver++2Msde', 'use_noise_sampler': True }, - 'DPM2++ 3M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 3, 'sigma_schedule': None, 'use_SD35_sigmas': False, 'algorithm_type': 'dpmsolver++3Msde', 'use_noise_sampler': True }, + 'DPM2 FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2', 'use_noise_sampler': True }, + 'DPM2a FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver2A', 'use_noise_sampler': True }, + 'DPM2++ 2M FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2M', 'use_noise_sampler': True }, + 'DPM2++ 2S FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2S', 'use_noise_sampler': True }, + 'DPM2++ SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++sde', 'use_noise_sampler': True }, + 'DPM2++ 2M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 2, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++2Msde', 'use_noise_sampler': True }, + 'DPM2++ 3M SDE FlowMatch': { 'shift': 1, 'use_dynamic_shifting': False, 'solver_order': 3, 'sigma_schedule': None, 'use_beta_sigmas': False, 'algorithm_type': 'dpmsolver++3Msde', 'use_noise_sampler': True }, 'Heun': { 'use_beta_sigmas': False, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'timestep_spacing': 'linspace' }, 'Heun FlowMatch': { 'timestep_spacing': "linspace", 'shift': 1 }, @@ -236,8 +236,8 @@ class DiffusionSampler: if 'use_dynamic_shifting' in self.config: if 'Flux' in model.__class__.__name__: self.config['use_dynamic_shifting'] = shared.opts.schedulers_dynamic_shift - if 'use_SD35_sigmas' in self.config: - self.config['use_SD35_sigmas'] = 'StableDiffusion3' in model.__class__.__name__ + if 'use_beta_sigmas' in self.config: + self.config['use_beta_sigmas'] = 'StableDiffusion3' in model.__class__.__name__ if 'rescale_betas_zero_snr' in self.config: self.config['rescale_betas_zero_snr'] = shared.opts.schedulers_rescale_betas if 'timestep_spacing' in self.config and shared.opts.schedulers_timestep_spacing != 'default' and shared.opts.schedulers_timestep_spacing is not None: diff --git a/modules/shared.py b/modules/shared.py index ceec3162c..a89cbbc95 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -722,7 +722,7 @@ options_templates.update(options_section(('saving-images', "Image Options"), { "image_watermark_image": OptionInfo('', "Image watermark file"), })) -options_templates.update(options_section(('saving-paths', "Image Naming & Paths"), { +options_templates.update(options_section(('saving-paths', "Image Paths"), { "saving_sep_images": OptionInfo("

Save options

", "", gr.HTML), "save_images_add_number": OptionInfo(True, "Numbered filenames", component_args=hide_dirs), "use_original_name_batch": OptionInfo(True, "Batch uses original name"), diff --git a/modules/shared_state.py b/modules/shared_state.py index 9947dcb70..7def42b8c 100644 --- a/modules/shared_state.py +++ b/modules/shared_state.py @@ -1,5 +1,4 @@ import os -import sys import time import datetime from modules.errors import log @@ -120,8 +119,8 @@ class State: def end(self, api=None): import modules.devices if self.time_start is None: # someone called end before being - fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access - log.debug(f'Access state.end: {fn}') # pylint: disable=protected-access + # fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access + # log.debug(f'Access state.end: {fn}') # pylint: disable=protected-access self.time_start = time.time() if self.debug_output: log.debug(f'State end: {self.job} time={time.time() - self.time_start:.2f}') diff --git a/modules/ui_sections.py b/modules/ui_sections.py index 7951a9227..f15edb4bd 100644 --- a/modules/ui_sections.py +++ b/modules/ui_sections.py @@ -276,7 +276,7 @@ def create_sampler_options(tabname): else: # shared.native with gr.Row(elem_classes=['flex-break']): - sampler_sigma = gr.Dropdown(label='Sigma method', elem_id=f"{tabname}_sampler_sigma", choices=['default', 'karras', 'beta', 'exponential', 'lambdas'], value=shared.opts.schedulers_sigma, type='value') + sampler_sigma = gr.Dropdown(label='Sigma method', elem_id=f"{tabname}_sampler_sigma", choices=['default', 'karras', 'betas', 'exponential', 'lambdas'], value=shared.opts.schedulers_sigma, type='value') sampler_spacing = gr.Dropdown(label='Timestep spacing', elem_id=f"{tabname}_sampler_spacing", choices=['default', 'linspace', 'leading', 'trailing'], value=shared.opts.schedulers_timestep_spacing, type='value') with gr.Row(elem_classes=['flex-break']): sampler_beta = gr.Dropdown(label='Beta schedule', elem_id=f"{tabname}_sampler_beta", choices=['default', 'linear', 'scaled', 'cosine'], value=shared.opts.schedulers_beta_schedule, type='value') diff --git a/scripts/xyz_grid_classes.py b/scripts/xyz_grid_classes.py index 84a11daff..b80b9f13c 100644 --- a/scripts/xyz_grid_classes.py +++ b/scripts/xyz_grid_classes.py @@ -115,7 +115,7 @@ axis_options = [ AxisOption("[Process] Server options", str, apply_options), AxisOptionTxt2Img("[Sampler] Name", str, apply_sampler, fmt=format_value_add_label, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers]), AxisOptionImg2Img("[Sampler] Name", str, apply_sampler, fmt=format_value_add_label, confirm=confirm_samplers, choices=lambda: [x.name for x in sd_samplers.samplers_for_img2img]), - AxisOption("[Sampler] Sigma method", str, apply_setting("schedulers_sigma"), choices=lambda: ['default', 'karras', 'beta', 'exponential', 'lambdas']), + AxisOption("[Sampler] Sigma method", str, apply_setting("schedulers_sigma"), choices=lambda: ['default', 'karras', 'betas', 'exponential', 'lambdas']), AxisOption("[Sampler] Timestep spacing", str, apply_setting("schedulers_timestep_spacing"), choices=lambda: ['default', 'linspace', 'leading', 'trailing']), AxisOption("[Sampler] Timestep range", int, apply_setting("schedulers_timesteps_range")), AxisOption("[Sampler] Solver order", int, apply_setting("schedulers_solver_order")),