From 0ddf613b49aadafda10f0f84df653d777df3356d Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 14 Jun 2023 11:22:59 -0400 Subject: [PATCH] jumbo merge part two --- .gitignore | 1 + CHANGELOG.md | 18 +++--- TODO.md | 13 +++- extensions-builtin/a1111-sd-webui-lycoris | 2 +- .../multidiffusion-upscaler-for-automatic1111 | 2 +- html/locale_en.json | 25 ++++---- javascript/extraNetworks.js | 34 +++++++---- javascript/script.js | 18 +++--- launch.py | 8 +++ modules/cmd_args.py | 1 - modules/esrgan_model.py | 2 +- modules/extensions.py | 2 - modules/extras.py | 3 +- modules/generation_parameters_copypaste.py | 32 ---------- modules/modelloader.py | 7 ++- modules/processing.py | 60 +++++++++---------- modules/realesrgan_model.py | 2 +- modules/script_callbacks.py | 4 -- modules/sd_models.py | 38 ++++++------ modules/sd_samplers_common.py | 2 +- modules/sd_vae.py | 12 +--- modules/shared.py | 48 ++++++--------- modules/styles.py | 2 +- modules/sub_quadratic_attention.py | 18 +++--- modules/ui_extensions.py | 4 +- modules/ui_extra_networks.py | 2 +- requirements.txt | 2 +- scripts/xyz_grid.py | 26 +++----- 28 files changed, 175 insertions(+), 213 deletions(-) diff --git a/.gitignore b/.gitignore index 01b025cc3..9c24822f2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # defaults __pycache__ +.ruff_cache /cache.json /metadata.json /config.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 42831e94b..8d91f059b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,23 +1,27 @@ # Change Log for SD.Next +## Update for 06/14/2023 + +- simplify token merging +- reorganize some settings + ## Update for 06/13/2023 -One bigger update... +Just a day later and one *bigger update*... Both some **new functionality** as well as **massive merges** from upstream - new cache for models/lora/lyco metadata: `metadata.json` drastically reduces disk access on app startup -- allow saving of **ui default values** +- allow saving/resetting of **ui default values** settings -> ui defaults - ability to run server without loaded model default is to auto-load model on startup, can be changed in settings -> stable diffusion if disabled, model will be loaded on first request, e.g. when you click generate - this is useful when you want to start server to perform other tasks like upscaling which do not rely on model + useful when you want to start server to perform other tasks like upscaling which do not rely on model - updated `accelerate` and `xformers` - huge nubmer of changes ported from **A1111** upstream - this was a massive merge - hopefully this does not cause any regressions - + this was a massive merge, hopefully this does not cause any regressions + and still a bit more pending... ## Update for 06/12/2023 @@ -84,7 +88,7 @@ Another bigger one...And more to come in the next few days... - new live preview mode: taesd i really like this one, so its enabled as default for new installs - settings search feature -- new sampler: sde++ 2m sde +- new sampler: dpm++ 2m sde - fully common save/zip/delete (new) options in all tabs which (again) meant rework of process image tab - system info tab: live gpu utilization/memory graphs for nvidia gpus diff --git a/TODO.md b/TODO.md index b78230b5c..575afef0f 100644 --- a/TODO.md +++ b/TODO.md @@ -23,8 +23,17 @@ Stuff to be investigated... Pick & merge PRs from main repo... -- TODO: -- STATUS: up-to-date 05/13/2023 +- List: + - +- Last: 1e5afd4 +- Todo: + - +- Skipped: + - add explict hires prompt: unnecessarily complicated and spread over large number of commits due to many regressions + - allow scripts to add cross-optimization methods: dangerous + - load extension info in threads: unnecessary as other optimizations already in place perform equally good +- Broken: + - sub-quadratic optimization changes in ## Integration diff --git a/extensions-builtin/a1111-sd-webui-lycoris b/extensions-builtin/a1111-sd-webui-lycoris index 123d1da15..025dea967 160000 --- a/extensions-builtin/a1111-sd-webui-lycoris +++ b/extensions-builtin/a1111-sd-webui-lycoris @@ -1 +1 @@ -Subproject commit 123d1da15d802823480f8020312ce449523f10e2 +Subproject commit 025dea96720197dd4486a5bb8e2f4d72a95a3088 diff --git a/extensions-builtin/multidiffusion-upscaler-for-automatic1111 b/extensions-builtin/multidiffusion-upscaler-for-automatic1111 index b28c03b10..152203139 160000 --- a/extensions-builtin/multidiffusion-upscaler-for-automatic1111 +++ b/extensions-builtin/multidiffusion-upscaler-for-automatic1111 @@ -1 +1 @@ -Subproject commit b28c03b10f45d44d09b570b20fe8a4f61061b294 +Subproject commit 152203139e41029a55f2162155853ec6506272b4 diff --git a/html/locale_en.json b/html/locale_en.json index 247ec959f..fe291aaaf 100644 --- a/html/locale_en.json +++ b/html/locale_en.json @@ -80,7 +80,7 @@ {"id":"","label":"CFG Scale","localized":"","hint":"Classifier Free Guidance scale: how strongly the image should conform to prompt. Lower values produce more creative results, higher values make it follow the prompt more strictly; recommended values between 5-10"}, {"id":"","label":"CLIP skip","localized":"","hint":"Clip skip is a feature that allows users to control the level of specificity of the prompt, the higher the CLIP skip value, the less deep the prompt will be interpreted. CLIP Skip 1 is typical while some anime models produce better results at CLIP skip 2"}, {"id":"","label":"Seed","localized":"","hint":"A value that determines the output of random number generator - if you create an image with same parameters and seed as another image, you'll get the same result"}, - {"id":"","label":"Extra","localized":"","hint":""}, + {"id":"","label":"Extra","localized":"","hint":"Show additional options"}, {"id":"","label":"Variation seed","localized":"","hint":"Seed of a different picture to be mixed into the generation"}, {"id":"","label":"Variation strength","localized":"","hint":"How strong of a variation to produce. At 0, there will be no effect. At 1, you will get the complete picture with variation seed (except for ancestral samplers, where you will just get something)"}, {"id":"","label":"Resize seed from width","localized":"","hint":"Make an attempt to produce a picture similar to what would have been produced with same seed at specified resolution"}, @@ -98,10 +98,10 @@ {"id":"","label":"Show result images","localized":"","hint":"Enable to show the processed images in the image pane"}, {"id":"","label":"Resize","localized":"","hint":"Factor for resizing 1x mean no upscale, 4x means 4 times upscale, high values might lead to memory issues on small graphics cards"}, {"id":"","label":"Crop to fit","localized":"","hint":"If the dimensions of your source image (e.g. 512x510) deviate from your target dimensions (e.g. 1024x768) this function will fit your upscaled image into your target size image. Excess will be cropped"}, - {"id":"","label":"Secondary Upscaler","localized":"","hint":""}, - {"id":"","label":"Upscaler 2 visibility","localized":"","hint":""}, - {"id":"","label":"GFPGAN visibility","localized":"","hint":""}, - {"id":"","label":"CodeFormer visibility","localized":"","hint":""}, + {"id":"","label":"Secondary Upscaler","localized":"","hint":"Select secondary upscaler to run after initial upscaler"}, + {"id":"","label":"Upscaler 2 visibility","localized":"","hint":"Strength of the secondary upscaler"}, + {"id":"","label":"GFPGAN visibility","localized":"","hint":"Strength of GFPGAN face restore network, 0=disabled"}, + {"id":"","label":"CodeFormer visibility","localized":"","hint":"Strength of CodeFormer face restore network, 0=disabled"}, {"id":"","label":"CodeFormer weight (0 = max, 1 = min)","localized":"","hint":""} ], "settings menu": [ @@ -281,7 +281,7 @@ {"id":"","label":"Prompt attention mean normalization","localized":"","hint":""}, {"id":"","label":"Disable conditional batching enabled on low memory systems","localized":"","hint":""}, {"id":"","label":"Enable samplers quantization for sharper and cleaner results","localized":"","hint":""}, - {"id":"","label":"Increase coherency by padding from the last comma within n tokens when using more than 75 tokens","localized":"","hint":""}, + {"id":"","label":"Prompt padding for long prompts","localized":"","hint":"Increase coherency by padding from the last comma within n tokens when using more than 75 tokens"}, {"id":"","label":"Original","localized":"","hint":""}, {"id":"","label":"Diffusers","localized":"","hint":""}, {"id":"","label":"VRAM usage polls per second during generation","localized":"","hint":""}, @@ -425,14 +425,14 @@ {"id":"","label":"DPM++ 2M SDE Karras","localized":"","hint":""}, {"id":"","label":"DDIM","localized":"","hint":"Denoising Diffusion Implicit Models - best at inpainting"}, {"id":"","label":"UniPC","localized":"","hint":"Unified Predictor-Corrector Framework for Fast Sampling of Diffusion Models"}, - {"id":"","label":"Secondary sampler","localized":"","hint":""}, - {"id":"","label":"Force latent upscaler sampler","localized":"","hint":""}, + {"id":"","label":"Secondary sampler","localized":"","hint":"Use specific sampler as fallback sampler if primary is not supported for specific operation"}, + {"id":"","label":"Force latent upscaler sampler","localized":"","hint":"Force specific sampler for second pass operations"}, {"id":"","label":"Noise multiplier for ancestral samplers (eta)","localized":"","hint":""}, {"id":"","label":"Noise multiplier for DDIM (eta)","localized":"","hint":""}, {"id":"","label":"uniform","localized":"","hint":""}, {"id":"","label":"quad","localized":"","hint":""}, {"id":"","label":"sigma churn","localized":"","hint":""}, - {"id":"","label":"Negative Guidance minimum sigma","localized":"","hint":""}, + {"id":"","label":"Negative Guidance minimum sigma","localized":"","hint":"Skip negative prompt for some steps when the image is almost ready, 0=disable"}, {"id":"","label":"sigma tmin","localized":"","hint":""}, {"id":"","label":"sigma noise","localized":"","hint":""}, {"id":"","label":"Noise seed delta (eta)","localized":"","hint":""}, @@ -494,10 +494,9 @@ {"id":"","label":"Card height for Extra Networks (px)","localized":"","hint":""}, {"id":"","label":"Extra text to add before <...> when adding extra network to prompt","localized":"","hint":""}, {"id":"","label":"Add hypernetwork to prompt","localized":"","hint":""}, - {"id":"","label":"Enable redundant token merging via tomesd for speed and memory improvements","localized":"","hint":""}, - {"id":"","label":"Token merging Ratio. Higher merging ratio = faster generation, smaller VRAM usage, lower quality","localized":"","hint":""}, - {"id":"","label":"Apply only to high-res fix pass. Disabling can yield a ~20-35% speedup on contemporary resolutions","localized":"","hint":""}, - {"id":"","label":"Merging Ratio (high-res pass) - If 'Apply only to high-res' is enabled, this will always be the ratio used","localized":"","hint":""}, + {"id":"","label":"Token merging ratio","localized":"","hint":"Enable redundant token merging via tomesd for speed and memory improvements, 0=disabled"}, + {"id":"","label":"Token merging ratio for img2img","localized":"","hint":"Enable redundant token merging for img2img via tomesd for speed and memory improvements, 0=disabled"}, + {"id":"","label":"Token merging ratio for hires pass","localized":"","hint":"Enable redundant token merging for hires pass via tomesd for speed and memory improvements, 0=disabled"}, {"id":"","label":"Use random perturbations - Can improve outputs for certain samplers. For others, it may cause visual artifacting","localized":"","hint":""}, {"id":"","label":"Merge attention (Recommend on)","localized":"","hint":""}, {"id":"","label":"Merge cross attention (Recommend off)","localized":"","hint":""}, diff --git a/javascript/extraNetworks.js b/javascript/extraNetworks.js index d43aa2244..8dbccbc82 100644 --- a/javascript/extraNetworks.js +++ b/javascript/extraNetworks.js @@ -42,18 +42,28 @@ const re_extranet = /<([^:]+:[^:]+):[\d\.]+>/; const re_extranet_g = /\s+<([^:]+:[^:]+):[\d\.]+>/g; function tryToRemoveExtraNetworkFromPrompt(textarea, text) { - let m = text.match(re_extranet); - if (!m) return false; - const partToSearch = m[1]; - let replaced = false; - const newTextareaText = textarea.value.replaceAll(re_extranet_g, (found, index) => { - m = found.match(re_extranet); - if (m[1] === partToSearch) { - replaced = true; - return ''; - } - return found; - }); + var m = text.match(re_extranet); + var replaced = false; + var newTextareaText; + if (m) { + var partToSearch = m[1]; + newTextareaText = textarea.value.replaceAll(re_extranet_g, function(found) { + m = found.match(re_extranet); + if (m[1] == partToSearch) { + replaced = true; + return ""; + } + return found; + }); + } else { + newTextareaText = textarea.value.replaceAll(new RegExp(text, "g"), function(found) { + if (found == text) { + replaced = true; + return ""; + } + return found; + }); + } if (replaced) { textarea.value = newTextareaText; return true; diff --git a/javascript/script.js b/javascript/script.js index ccf5767bb..6e921322a 100644 --- a/javascript/script.js +++ b/javascript/script.js @@ -97,12 +97,16 @@ document.addEventListener('keydown', (e) => { * checks that a UI element is not in another hidden element or tab content */ function uiElementIsVisible(el) { - let isVisible = !el.closest('.\\!hidden'); + if (el === document) return true; + const computedStyle = getComputedStyle(el); + const isVisible = computedStyle.display !== 'none'; if (!isVisible) return false; - while (isVisible = el.closest('.tabitem')?.style.display !== 'none') { - if (!isVisible) return false; - if (el.parentElement) el = el.parentElement; - else break; - } - return isVisible; + return uiElementIsVisible(el.parentNode); +} + +function uiElementInSight(el) { + const clRect = el.getBoundingClientRect(); + const windowHeight = window.innerHeight; + const isOnScreen = clRect.bottom > 0 && clRect.top < windowHeight; + return isOnScreen; } diff --git a/launch.py b/launch.py index 3eb254f0d..c0ae66a00 100644 --- a/launch.py +++ b/launch.py @@ -5,6 +5,7 @@ import shlex import logging import subprocess import installer +from functools import lru_cache commandline_args = os.environ.get('COMMANDLINE_ARGS', "") @@ -32,6 +33,7 @@ def init_modules(): extensions_dir = modules.paths_internal.extensions_dir +@lru_cache() def commit_hash(): # compatbility function global stored_commit_hash # pylint: disable=global-statement if stored_commit_hash is not None: @@ -43,6 +45,7 @@ def commit_hash(): # compatbility function return stored_commit_hash +@lru_cache() def run(command, desc=None, errdesc=None, custom_env=None, live=False): # compatbility function if desc is not None: installer.log.info(desc) @@ -65,18 +68,22 @@ def check_run(command): # compatbility function return result.returncode == 0 +@lru_cache() def is_installed(package): # compatbility function return installer.installed(package) +@lru_cache() def repo_dir(name): # compatbility function return os.path.join(script_path, dir_repos, name) +@lru_cache() def run_python(code, desc=None, errdesc=None): # compatbility function return run(f'"{sys.executable}" -c "{code}"', desc, errdesc) +@lru_cache() def run_pip(pkg, desc=None): # compatbility function if desc is None: desc = pkg @@ -84,6 +91,7 @@ def run_pip(pkg, desc=None): # compatbility function return run(f'"{sys.executable}" -m pip {pkg} --prefer-binary{index_url_line}', desc=f"Installing {desc}", errdesc=f"Couldn't install {desc}") +@lru_cache() def check_run_python(code): # compatbility function return check_run(f'"{sys.executable}" -c "{code}"') diff --git a/modules/cmd_args.py b/modules/cmd_args.py index b385961d7..451b254e5 100644 --- a/modules/cmd_args.py +++ b/modules/cmd_args.py @@ -81,7 +81,6 @@ def compatibility_args(opts, args): group.add_argument("--opt-channelslast", help=argparse.SUPPRESS, default=opts.opt_channelslast) group.add_argument("--xformers", default = (opts.cross_attention_optimization == "xFormers"), action='store_true', help=argparse.SUPPRESS) group.add_argument("--disable-nan-check", help=argparse.SUPPRESS, default=opts.disable_nan_check) - group.add_argument("--token-merging", help=argparse.SUPPRESS, default=opts.token_merging) group.add_argument("--rollback-vae", help=argparse.SUPPRESS, default=opts.rollback_vae) group.add_argument("--no-half", help=argparse.SUPPRESS, default=opts.no_half) group.add_argument("--no-half-vae", help=argparse.SUPPRESS, default=opts.no_half_vae) diff --git a/modules/esrgan_model.py b/modules/esrgan_model.py index caff8624a..e0b79069d 100644 --- a/modules/esrgan_model.py +++ b/modules/esrgan_model.py @@ -154,7 +154,7 @@ class UpscalerESRGAN(Upscaler): if "http" in path: filename = load_file_from_url( url=self.model_url, - model_dir=self.model_path, + model_dir=self.model_download_path, file_name=f"{self.model_name}.pth", progress=True, ) diff --git a/modules/extensions.py b/modules/extensions.py index 2783de129..5064f1357 100644 --- a/modules/extensions.py +++ b/modules/extensions.py @@ -126,8 +126,6 @@ def list_extensions(): extension_paths = [] extension_names = [] extension_folders = [extensions_builtin_dir] if shared.cmd_opts.safe else [extensions_builtin_dir, extensions_dir] - if shared.cmd_opts.base: - extension_folders = [] for dirname in extension_folders: if not os.path.isdir(dirname): return diff --git a/modules/extras.py b/modules/extras.py index 3247c27a0..4dc50a2c7 100644 --- a/modules/extras.py +++ b/modules/extras.py @@ -188,8 +188,9 @@ def run_modelmerger(id_task, primary_model_name, secondary_model_name, tertiary_ output_modelname = os.path.join(ckpt_dir, filename) shared.state.nextjob() shared.state.textinfo = "Saving" - metadata = {"format": "pt", "sd_merge_models": {}, "sd_merge_recipe": None} + metadata = None if save_metadata: + metadata = {"format": "pt", "sd_merge_models": {}} merge_recipe = { "type": "webui", # indicate this model was merged with webui's built-in merger "primary_model_hash": primary_model_info.sha256, diff --git a/modules/generation_parameters_copypaste.py b/modules/generation_parameters_copypaste.py index 55d92f954..fcb0db36e 100644 --- a/modules/generation_parameters_copypaste.py +++ b/modules/generation_parameters_copypaste.py @@ -275,7 +275,6 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model else: res[k] = v - # Missing CLIP skip means it was set to 1 (the default) if "Clip skip" not in res: res["Clip skip"] = "1" @@ -285,28 +284,6 @@ Steps: 20, Sampler: Euler a, CFG scale: 7, Seed: 965400086, Size: 512x512, Model if "Hires resize-1" not in res: res["Hires resize-1"] = 0 res["Hires resize-2"] = 0 - # Infer additional override settings for token merging - token_merging_ratio = res.get("Token merging ratio", None) - token_merging_ratio_hr = res.get("Token merging ratio hr", None) - if token_merging_ratio is not None or token_merging_ratio_hr is not None: - res["Token merging"] = 'True' - if token_merging_ratio is None: - res["Token merging hr only"] = 'True' - else: - res["Token merging hr only"] = 'False' - if res.get("Token merging random", None) is None: - res["Token merging random"] = 'False' - if res.get("Token merging merge attention", None) is None: - res["Token merging merge attention"] = 'True' - if res.get("Token merging merge cross attention", None) is None: - res["Token merging merge cross attention"] = 'False' - if res.get("Token merging merge mlp", None) is None: - res["Token merging merge mlp"] = 'False' - if res.get("Token merging stride x", None) is None: - res["Token merging stride x"] = '2' - if res.get("Token merging stride y", None) is None: - res["Token merging stride y"] = '2' - restore_old_hires_fix_params(res) return res @@ -326,17 +303,8 @@ infotext_to_setting_name_mapping = [ ('UniPC skip type', 'uni_pc_skip_type'), ('UniPC order', 'uni_pc_order'), ('UniPC lower order final', 'uni_pc_lower_order_final'), - ('Token merging', 'token_merging'), ('Token merging ratio', 'token_merging_ratio'), - ('Token merging hr only', 'token_merging_hr_only'), ('Token merging ratio hr', 'token_merging_ratio_hr'), - ('Token merging random', 'token_merging_random'), - ('Token merging merge attention', 'token_merging_merge_attention'), - ('Token merging merge cross attention', 'token_merging_merge_cross_attention'), - ('Token merging merge mlp', 'token_merging_merge_mlp'), - ('Token merging maximum downsampling', 'token_merging_maximum_down_sampling'), - ('Token merging stride x', 'token_merging_stride_x'), - ('Token merging stride y', 'token_merging_stride_y') ] diff --git a/modules/modelloader.py b/modules/modelloader.py index 17bb471f8..c49cdeb5d 100644 --- a/modules/modelloader.py +++ b/modules/modelloader.py @@ -86,7 +86,7 @@ def load_models(model_path: str, model_url: str = None, command_path: str = None if model_url is not None and len(output) == 0: if download_name is not None: from basicsr.utils.download_util import load_file_from_url - dl = load_file_from_url(model_url, model_path, True, download_name) + dl = load_file_from_url(model_url, places[0], True, download_name) output.append(dl) else: output.append(model_url) @@ -181,7 +181,10 @@ def load_upscalers(): for cls in reversed(used_classes.values()): name = cls.__name__ cmd_name = f"{name.lower().replace('upscaler', '')}_models_path" - scaler = cls(commandline_options.get(cmd_name, None)) + commandline_model_path = commandline_options.get(cmd_name, None) + scaler = cls(commandline_model_path) + scaler.user_path = commandline_model_path + scaler.model_download_path = commandline_model_path or scaler.model_path datas += scaler.scalers shared.sd_upscalers = sorted( diff --git a/modules/processing.py b/modules/processing.py index 5553894af..88304f64e 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -9,7 +9,6 @@ import torch import numpy as np from PIL import Image, ImageFilter, ImageOps import cv2 -import tomesd from skimage import exposure from ldm.data.util import AddMiDaS from ldm.models.diffusion.ddpm import LatentDepth2ImageDiffusion @@ -130,6 +129,8 @@ class StableDiffusionProcessing: self.override_settings_restore_afterwards = override_settings_restore_afterwards self.is_using_inpainting_conditioning = False self.disable_extra_networks = False + self.token_merging_ratio = 0 + self.token_merging_ratio_hr = 0 if not seed_enable_extras: self.subseed = -1 self.subseed_strength = 0 @@ -145,7 +146,7 @@ class StableDiffusionProcessing: self.clip_skip = clip_skip self.iteration = 0 self.is_hr_pass = False - opts.data['clip_skip'] = clip_skip + # opts.data['clip_skip'] = clip_skip # todo is this necessary? @property @@ -243,6 +244,11 @@ class StableDiffusionProcessing: def close(self): self.sampler = None # pylint: disable=attribute-defined-outside-init + def get_token_merging_ratio(self, for_hr=False): + if for_hr: + return self.token_merging_ratio_hr or opts.token_merging_ratio_hr or self.token_merging_ratio or opts.token_merging_ratio + return self.token_merging_ratio or opts.token_merging_ratio + class Processed: def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info="", subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments=""): @@ -278,6 +284,7 @@ class Processed: self.s_tmin = p.s_tmin self.s_tmax = p.s_tmax self.s_noise = p.s_noise + self.s_min_uncond = p.s_min_uncond self.sampler_noise_scheduler_override = p.sampler_noise_scheduler_override self.prompt = self.prompt if type(self.prompt) != list else self.prompt[0] self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0] @@ -288,6 +295,8 @@ class Processed: self.all_negative_prompts = all_negative_prompts or p.all_negative_prompts or [self.negative_prompt] self.all_seeds = all_seeds or p.all_seeds or [self.seed] self.all_subseeds = all_subseeds or p.all_subseeds or [self.subseed] + self.token_merging_ratio = p.token_merging_ratio + self.token_merging_ratio_hr = p.token_merging_ratio_hr self.infotexts = infotexts or [info] def js(self): @@ -326,6 +335,9 @@ class Processed: def infotext(self, p: StableDiffusionProcessing, index): return create_infotext(p, self.all_prompts, self.all_seeds, self.all_subseeds, comments=[], position_in_batch=index % self.batch_size, iteration=index // self.batch_size) + def get_token_merging_ratio(self, for_hr=False): + return self.token_merging_ratio_hr if for_hr else self.token_merging_ratio + # from https://discuss.pytorch.org/t/help-regarding-slerp-function-for-generative-model-sampling/32475/3 def slerp(val, low, high): @@ -426,6 +438,9 @@ def fix_seed(p): def create_infotext(p: StableDiffusionProcessing, all_prompts, all_seeds, all_subseeds, comments=None, iteration=0, position_in_batch=0): # pylint: disable=unused-argument index = position_in_batch + iteration * p.batch_size + enable_hr = getattr(p, 'enable_hr', False) + token_merging_ratio = p.get_token_merging_ratio() + token_merging_ratio_hr = p.get_token_merging_ratio(for_hr=True) uses_ensd = opts.eta_noise_seed_delta != 0 if uses_ensd: uses_ensd = sd_samplers_common.is_sampler_using_eta_noise_seed_delta(p) @@ -451,14 +466,8 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts, all_seeds, all_su "ENSD": opts.eta_noise_seed_delta if uses_ensd else None, "Init image hash": getattr(p, 'init_img_hash', None), "Version": git_commit, - "Token merging ratio": None if not (opts.token_merging or cmd_opts.token_merging) or opts.token_merging_hr_only else opts.token_merging_ratio, - "Token merging ratio hr": None if not (opts.token_merging or cmd_opts.token_merging) else opts.token_merging_ratio_hr, - "Token merging random": None if opts.token_merging_random is False else opts.token_merging_random, - "Token merging merge attention": None if opts.token_merging_merge_attention is True else opts.token_merging_merge_attention, - "Token merging merge cross attention": None if opts.token_merging_merge_cross_attention is False else opts.token_merging_merge_cross_attention, - "Token merging merge mlp": None if opts.token_merging_merge_mlp is False else opts.token_merging_merge_mlp, - "Token merging stride x": None if opts.token_merging_stride_x == 2 else opts.token_merging_stride_x, - "Token merging stride y": None if opts.token_merging_stride_y == 2 else opts.token_merging_stride_y, + "Token merging ratio": None if token_merging_ratio == 0 else token_merging_ratio, + "Token merging ratio hr": None if not enable_hr or token_merging_ratio_hr == 0 else token_merging_ratio_hr, "Parser": opts.prompt_attention, } generation_params.update(p.extra_generation_params) @@ -511,9 +520,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed: if k == 'sd_vae': sd_vae.reload_vae_weights() - if (opts.token_merging or cmd_opts.token_merging) and not opts.token_merging_hr_only: - sd_models.apply_token_merging(sd_model=p.sd_model, hr=False) - log.debug('Token merging applied') + sd_models.apply_token_merging(p.sd_model, p.get_token_merging_ratio()) if cmd_opts.profile: """ @@ -531,9 +538,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed: else: res = process_images_inner(p) finally: - if opts.token_merging or cmd_opts.token_merging: - tomesd.remove_patch(p.sd_model) - log.debug('Token merging model optimizations removed') + sd_models.apply_token_merging(p.sd_model, 0) if p.override_settings_restore_afterwards: # restore opts to original state for k, v in stored_opts.items(): setattr(opts, k, v) @@ -642,11 +647,8 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: processed = Processed(p, [], p.seed, "") file.write(processed.infotext(p, 0)) step_multiplier = 1 - if not shared.opts.dont_fix_second_order_samplers_schedule: - try: - step_multiplier = 2 if sd_samplers.all_samplers_map.get(p.sampler_name).aliases[0] in ['k_dpmpp_2s_a', 'k_dpmpp_2s_a_ka', 'k_dpmpp_sde', 'k_dpmpp_sde_ka', 'k_dpm_2', 'k_dpm_2_a', 'k_heun'] else 1 - except Exception: - pass + sampler_config = sd_samplers.find_sampler_config(p.sampler_name) + step_multiplier = 2 if sampler_config and sampler_config.options.get("second_order", False) else 1 if p.n_iter > 1: shared.state.job = f"Batch {n+1} out of {p.n_iter}" @@ -774,7 +776,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed: images_list=output_images, seed=p.all_seeds[0], info=infotext(), - comments="".join(f"\n\n{comment}" for comment in comments), + comments="\n".join(comments), subseed=p.all_subseeds[0], index_of_first_image=index_of_first_image, infotexts=infotexts, @@ -943,16 +945,9 @@ class StableDiffusionProcessingTxt2Img(StableDiffusionProcessing): noise = create_random_tensors(samples.shape[1:], seeds=seeds, subseeds=subseeds, subseed_strength=subseed_strength, p=self) x = None devices.torch_gc() # GC now before running the next img2img to prevent running out of memory - # apply token merging optimizations from tomesd for high-res pass - if (cmd_opts.token_merging or opts.token_merging) and (opts.token_merging_hr_only or opts.token_merging_ratio_hr != opts.token_merging_ratio): - # case where user wants to use separate merge ratios - if not opts.token_merging_hr_only: - # clean patch done by first pass. (clobbering the first patch might be fine? this might be excessive) - tomesd.remove_patch(self.sd_model) - log.debug('Temporarily removed token merging optimizations in preparation for next pass') - sd_models.apply_token_merging(sd_model=self.sd_model, hr=True) - log.debug('Applied token merging for high-res pass') + sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio(for_hr=True)) samples = self.sampler.sample_img2img(self, samples, noise, conditioning, unconditional_conditioning, steps=self.hr_second_pass_steps or self.steps, image_conditioning=image_conditioning) + sd_models.apply_token_merging(self.sd_model, self.get_token_merging_ratio()) self.is_hr_pass = False return samples @@ -1081,3 +1076,6 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): del x devices.torch_gc() return samples + + def get_token_merging_ratio(self, for_hr=False): + return self.token_merging_ratio or ("token_merging_ratio" in self.override_settings and opts.token_merging_ratio) or opts.token_merging_ratio_img2img or opts.token_merging_ratio diff --git a/modules/realesrgan_model.py b/modules/realesrgan_model.py index b90ef96a1..5f5b132e5 100644 --- a/modules/realesrgan_model.py +++ b/modules/realesrgan_model.py @@ -73,7 +73,7 @@ class UpscalerRealESRGAN(Upscaler): print(f"Unable to find model info: {path}") return None if info.local_data_path.startswith("http"): - info.local_data_path = load_file_from_url(url=info.data_path, model_dir=self.model_path, progress=True) + info.local_data_path = load_file_from_url(url=info.data_path, model_dir=self.model_download_path, progress=True) return info except Exception as e: errors.display(e, 'real-esrgan model list') diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py index 4eea6d9fa..a5513ad3f 100644 --- a/modules/script_callbacks.py +++ b/modules/script_callbacks.py @@ -76,10 +76,6 @@ class AfterCFGCallbackParams: self.total_sampling_steps = total_sampling_steps """Total number of sampling steps planned""" - self.output_altered = False - """A flag for CFGDenoiser indicating whether the output has been altered by the callback""" - - class UiTrainTabParams: def __init__(self, txt2img_preview_params): diff --git a/modules/sd_models.py b/modules/sd_models.py index 9acfcbfe3..2ee730b45 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -637,7 +637,6 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False): timer.record("config") if sd_model is None or checkpoint_config != sd_model.used_config: del sd_model - checkpoints_loaded.clear() if shared.backend == shared.Backend.ORIGINAL: load_model(checkpoint_info, already_loaded_state_dict=state_dict, timer=timer) else: @@ -676,26 +675,23 @@ def unload_model_weights(sd_model=None, _info=None): return sd_model -def apply_token_merging(sd_model, hr: bool): +def apply_token_merging(sd_model, token_merging_ratio): """ Applies speed and memory optimizations from tomesd. - - Args: - hr (bool): True if called in the context of a high-res pass """ - - ratio = shared.opts.token_merging_ratio - if hr: - ratio = shared.opts.token_merging_ratio_hr - - tomesd.apply_patch( - sd_model, - ratio=ratio, - max_downsample=shared.opts.token_merging_maximum_down_sampling, - sx=shared.opts.token_merging_stride_x, - sy=shared.opts.token_merging_stride_y, - use_rand=shared.opts.token_merging_random, - merge_attn=shared.opts.token_merging_merge_attention, - merge_crossattn=shared.opts.token_merging_merge_cross_attention, - merge_mlp=shared.opts.token_merging_merge_mlp - ) + current_token_merging_ratio = getattr(sd_model, 'applied_token_merged_ratio', 0) + shared.log.debug(f'Appplying token merging: current={current_token_merging_ratio} target={token_merging_ratio}') + if current_token_merging_ratio == token_merging_ratio: + return + if current_token_merging_ratio > 0: + tomesd.remove_patch(sd_model) + if token_merging_ratio > 0: + tomesd.apply_patch( + sd_model, + ratio=token_merging_ratio, + use_rand=False, # can cause issues with some samplers + merge_attn=True, + merge_crossattn=False, + merge_mlp=False + ) + sd_model.applied_token_merged_ratio = token_merging_ratio diff --git a/modules/sd_samplers_common.py b/modules/sd_samplers_common.py index 07495f586..f310b858f 100644 --- a/modules/sd_samplers_common.py +++ b/modules/sd_samplers_common.py @@ -26,7 +26,7 @@ approximation_indexes = {"Full VAE": 0, "Approximate NN": 1, "Approximate simple def single_sample_to_image(sample, approximation=None): - if approximation is None: + if approximation is None or approximation not in approximation_indexes.keys(): approximation = approximation_indexes.get(opts.show_progress_type, 0) if approximation == 0: x_sample = processing.decode_first_stage(shared.sd_model, sample.unsqueeze(0))[0] * 0.5 + 0.5 diff --git a/modules/sd_vae.py b/modules/sd_vae.py index 0c55d88f0..1943ec5f9 100644 --- a/modules/sd_vae.py +++ b/modules/sd_vae.py @@ -93,17 +93,11 @@ def resolve_vae(checkpoint_file): return shared.cmd_opts.vae, 'forced' is_automatic = shared.opts.sd_vae in {"Automatic", "auto"} # "auto" for people with old config vae_near_checkpoint = find_vae_near_checkpoint(checkpoint_file) - if vae_near_checkpoint is not None and (shared.opts.sd_vae_as_default): + if vae_near_checkpoint is not None: return vae_near_checkpoint, 'near checkpoint' if is_automatic: - for named_vae_location in [ - os.path.join(vae_path, os.path.splitext(os.path.basename(checkpoint_file))[0] + ".pt"), - os.path.join(vae_path, os.path.splitext(os.path.basename(checkpoint_file))[0] + ".ckpt"), - os.path.join(vae_path, os.path.splitext(os.path.basename(checkpoint_file))[0] + ".safetensors"), - os.path.join(vae_path, os.path.splitext(os.path.basename(checkpoint_file))[0] + ".vae.pt"), - os.path.join(vae_path, os.path.splitext(os.path.basename(checkpoint_file))[0] + ".vae.ckpt"), - os.path.join(vae_path, os.path.splitext(os.path.basename(checkpoint_file))[0] + ".vae.safetensors"), - ]: + basename = os.path.join(vae_path, os.path.splitext(os.path.basename(checkpoint_file))[0]) + for named_vae_location in [basename + ".pt", basename + ".ckpt", basename + ".safetensors", basename + ".vae.pt", basename + ".vae.ckpt", basename + ".vae.safetensors"]: if os.path.isfile(named_vae_location): return named_vae_location, 'in VAE dir' if shared.opts.sd_vae == "None": diff --git a/modules/shared.py b/modules/shared.py index 5aa760334..6511ca275 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -293,20 +293,24 @@ options_templates.update(options_section(('sd', "Stable Diffusion"), { "sd_vae_checkpoint_cache": OptionInfo(0, "Number of cached VAE checkpoints", gr.Slider, {"minimum": 0, "maximum": 10, "step": 1}), "sd_vae": OptionInfo("Automatic", "Select VAE", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list), "sd_model_dict": OptionInfo('None', "Stable Diffusion checkpoint dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), - "sd_vae_sliced_encode": OptionInfo(False, "Enable splitting of hires batch processing"), "stream_load": OptionInfo(False, "When loading models attempt stream loading optimized for slow or network storage"), "model_reuse_dict": OptionInfo(False, "When loading models attempt to reuse previous model dictionary"), + "prompt_attention": OptionInfo("Full parser", "Prompt attention parser", gr.Radio, lambda: {"choices": ["Full parser", "Compel parser", "A1111 parser", "Fixed attention"] }), + "prompt_mean_norm": OptionInfo(True, "Prompt attention mean normalization"), + "comma_padding_backtrack": OptionInfo(20, "Prompt padding for long prompts", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1 }), + "sd_backend": OptionInfo("Original", "Stable Diffusion backend (experimental)", gr.Radio, lambda: {"choices": ["Original", "Diffusers"] }), +})) + +options_templates.update(options_section(('optimizations', "Optimizations"), { "cross_attention_optimization": OptionInfo(cross_attention_optimization_default, "Cross-attention optimization method", gr.Radio, lambda: {"choices": shared_items.list_crossattention() }), "cross_attention_options": OptionInfo([], "Cross-attention advanced options", gr.CheckboxGroup, lambda: {"choices": ['xFormers enable flash Attention', 'SDP disable memory attention']}), "sub_quad_q_chunk_size": OptionInfo(512, "Sub-quadratic cross-attention query chunk size", gr.Slider, {"minimum": 16, "maximum": 8192, "step": 8}), "sub_quad_kv_chunk_size": OptionInfo(512, "Sub-quadratic cross-attention kv chunk size", gr.Slider, {"minimum": 0, "maximum": 8192, "step": 8}), "sub_quad_chunk_threshold": OptionInfo(80, "Sub-quadratic cross-attention chunking threshold", gr.Slider, {"minimum": 0, "maximum": 100, "step": 1}), - "prompt_attention": OptionInfo("Full parser", "Prompt attention parser", gr.Radio, lambda: {"choices": ["Full parser", "Compel parser", "A1111 parser", "Fixed attention"] }), - "prompt_mean_norm": OptionInfo(True, "Prompt attention mean normalization"), - "always_batch_cond_uncond": OptionInfo(False, "Disable conditional batching enabled on low memory systems"), - "enable_quantization": OptionInfo(True, "Enable samplers quantization for sharper and cleaner results"), - "comma_padding_backtrack": OptionInfo(20, "Increase coherency by padding from the last comma within n tokens when using more than 75 tokens", gr.Slider, {"minimum": 0, "maximum": 74, "step": 1 }), - "sd_backend": OptionInfo("Original", "Stable Diffusion backend (experimental)", gr.Radio, lambda: {"choices": ["Original", "Diffusers"] }), + "token_merging_ratio": OptionInfo(0.0, "Token merging ratio", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}), + "token_merging_ratio_img2img": OptionInfo(0.0, "Token merging ratio for img2img", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}), + "token_merging_ratio_hr": OptionInfo(0.0, "Token merging ratio for hires pass", gr.Slider, {"minimum": 0.0, "maximum": 0.9, "step": 0.1}), + "sd_vae_sliced_encode": OptionInfo(False, "Enable splitting of hires batch processing"), })) options_templates.update(options_section(('cuda', "Compute Settings"), { @@ -444,6 +448,8 @@ options_templates.update(options_section(('sampler-params', "Sampler Settings"), "show_samplers": OptionInfo(["Euler a", "UniPC", "DDIM", "DPM++ 2M SDE", "DPM++ 2M SDE Karras", "DPM2 Karras", "DPM++ 2M Karras"], "Show samplers in user interface", gr.CheckboxGroup, lambda: {"choices": [x.name for x in list_samplers() if x.name != "PLMS"]}), "fallback_sampler": OptionInfo("Euler a", "Secondary sampler", gr.Dropdown, lambda: {"choices": ["None"] + [x.name for x in list_samplers()]}), "force_latent_sampler": OptionInfo("None", "Force latent upscaler sampler", gr.Dropdown, lambda: {"choices": ["None"] + [x.name for x in list_samplers()]}), + "always_batch_cond_uncond": OptionInfo(False, "Disable conditional batching enabled on low memory systems"), + "enable_quantization": OptionInfo(True, "Enable samplers quantization for sharper and cleaner results"), "eta_ancestral": OptionInfo(1.0, "Noise multiplier for ancestral samplers (eta)", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), "eta_ddim": OptionInfo(0.0, "Noise multiplier for DDIM (eta)", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), "ddim_discretize": OptionInfo('uniform', "DDIM discretize img2img", gr.Radio, {"choices": ['uniform', 'quad']}), @@ -496,6 +502,9 @@ options_templates.update(options_section(('interrogate', "Interrogate"), { })) options_templates.update(options_section(('upscaling', "Upscaling"), { + "face_restoration_model": OptionInfo("CodeFormer", "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in face_restorers]}), + "code_former_weight": OptionInfo(0.2, "CodeFormer weight parameter; 0 = maximum effect; 1 = minimum effect", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), + "face_restoration_unload": OptionInfo(False, "Move face restoration model from VRAM into RAM after processing"), "upscaler_for_img2img": OptionInfo("None", "Default upscaler for image resize operations", gr.Dropdown, lambda: {"choices": [x.name for x in sd_upscalers]}), "realesrgan_enabled_models": OptionInfo(["R-ESRGAN 4x+", "R-ESRGAN 4x+ Anime6B"], "Real-ESRGAN available models", gr.CheckboxGroup, lambda: {"choices": shared_items.realesrgan_models_names()}), "ESRGAN_tile": OptionInfo(192, "Tile size for ESRGAN upscalers (0 = no tiling)", gr.Slider, {"minimum": 0, "maximum": 512, "step": 16}), @@ -506,19 +515,10 @@ options_templates.update(options_section(('upscaling', "Upscaling"), { "dont_fix_second_order_samplers_schedule": OptionInfo(False, "Do not fix prompt schedule for second order samplers"), })) -options_templates.update(options_section(('lora', "Lora"), { +options_templates.update(options_section(('extra_networks', "Extra Networks"), { "lyco_patch_lora": OptionInfo(False, "Use LyCoris handler for all Lora types", gr.Checkbox, { "visible": True }), "lora_disable": OptionInfo(False, "Disable built-in Lora handler", gr.Checkbox, { "visible": True }, onchange=lora_disable), "lora_functional": OptionInfo(False, "Use Kohya method for handling multiple Loras", gr.Checkbox, { "visible": True }), -})) - -options_templates.update(options_section(('face-restoration', "Face restoration"), { - "face_restoration_model": OptionInfo("CodeFormer", "Face restoration model", gr.Radio, lambda: {"choices": [x.name() for x in face_restorers]}), - "code_former_weight": OptionInfo(0.2, "CodeFormer weight parameter; 0 = maximum effect; 1 = minimum effect", gr.Slider, {"minimum": 0, "maximum": 1, "step": 0.01}), - "face_restoration_unload": OptionInfo(False, "Move face restoration model from VRAM into RAM after processing"), -})) - -options_templates.update(options_section(('extra_networks', "Extra Networks"), { "extra_networks_default_view": OptionInfo("cards", "Default view for Extra Networks", gr.Dropdown, {"choices": ["cards", "thumbs"]}), "extra_networks_default_multiplier": OptionInfo(1.0, "Multiplier for extra networks", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.01}), "extra_networks_card_width": OptionInfo(0, "Card width for Extra Networks (px)"), @@ -527,20 +527,6 @@ options_templates.update(options_section(('extra_networks', "Extra Networks"), { "sd_hypernetwork": OptionInfo("None", "Add hypernetwork to prompt", gr.Dropdown, lambda: {"choices": ["None"] + list(hypernetworks.keys())}, refresh=reload_hypernetworks), })) -options_templates.update(options_section(('token_merging', 'Token Merging'), { - "token_merging": OptionInfo(False, "Enable redundant token merging via tomesd for speed and memory improvements", gr.Checkbox), - "token_merging_ratio": OptionInfo(0.5, "Token merging Ratio. Higher merging ratio = faster generation, smaller VRAM usage, lower quality", gr.Slider, {"minimum": 0, "maximum": 0.9, "step": 0.1}), - "token_merging_hr_only": OptionInfo(True, "Apply only to high-res fix pass. Disabling can yield a ~20-35% speedup on contemporary resolutions", gr.Checkbox), - "token_merging_ratio_hr": OptionInfo(0.5, "Merging Ratio for high-res pass", gr.Slider, {"minimum": 0, "maximum": 0.9, "step": 0.1}), - "token_merging_random": OptionInfo(False, "Use random perturbations - Can improve outputs for certain samplers. For others, it may cause visual artifacting", gr.Checkbox), - "token_merging_merge_attention": OptionInfo(True, "Merge attention (Recommend on)", gr.Checkbox), - "token_merging_merge_cross_attention": OptionInfo(False, "Merge cross attention (Recommend off)", gr.Checkbox), - "token_merging_merge_mlp": OptionInfo(False, "Merge mlp (Strongly recommend off)", gr.Checkbox), - "token_merging_maximum_down_sampling": OptionInfo(1, "Maximum down sampling", gr.Radio, lambda: {"choices": [1, 2, 4, 8]}), - "token_merging_stride_x": OptionInfo(2, "Stride - X", gr.Slider, {"minimum": 2, "maximum": 8, "step": 2}), - "token_merging_stride_y": OptionInfo(2, "Stride - Y", gr.Slider, {"minimum": 2, "maximum": 8, "step": 2}) -})) - options_templates.update(options_section((None, "Hidden options"), { "disabled_extensions": OptionInfo([], "Disable these extensions"), "disable_all_extensions": OptionInfo("none", "Disable all extensions (preserves the list of disabled extensions)", gr.Radio, {"choices": ["none", "user", "all"]}), diff --git a/modules/styles.py b/modules/styles.py index 2bc0053da..2ae580355 100644 --- a/modules/styles.py +++ b/modules/styles.py @@ -50,7 +50,7 @@ class StyleDatabase: self.save_styles(self.path) with open(self.path, "r", encoding="utf-8-sig", newline='') as file: - reader = csv.DictReader(file) + reader = csv.DictReader(file, skipinitialspace=True) for row in reader: try: prompt = row["prompt"] if "prompt" in row else row["text"] diff --git a/modules/sub_quadratic_attention.py b/modules/sub_quadratic_attention.py index 206bea8b3..f7302ca8e 100644 --- a/modules/sub_quadratic_attention.py +++ b/modules/sub_quadratic_attention.py @@ -16,16 +16,15 @@ from typing import Optional, NamedTuple, List import torch from torch import Tensor from torch.utils.checkpoint import checkpoint -import numpy as np def narrow_trunc( - input: Tensor, # pylint: disable=redefined-builtin + tensor: Tensor, dim: int, start: int, length: int ) -> Tensor: - return torch.narrow(input, dim, start, length if input.shape[dim] >= start + length else input.shape[dim] - start) + return torch.narrow(tensor, dim, start, length if tensor.shape[dim] >= start + length else tensor.shape[dim] - start) class AttnChunk(NamedTuple): @@ -81,7 +80,7 @@ def _query_chunk_attention( kv_chunk_size: int, ) -> Tensor: _batch_x_heads, k_tokens, _k_channels_per_head = key.shape - _, _, _v_channels_per_head = value.shape + # _, _, v_channels_per_head = value.shape def chunk_scanner(chunk_idx: int) -> AttnChunk: key_chunk = narrow_trunc( @@ -202,14 +201,11 @@ def efficient_dot_product_attention( value=value, ) - res = torch.zeros_like(query) - for i in range(math.ceil(q_tokens / query_chunk_size)): - attn_scores = compute_query_chunk_attn( + res = torch.cat([ + compute_query_chunk_attn( query=get_query_chunk(i * query_chunk_size), key=key, value=value, - ) - - res[:, i * query_chunk_size:i * query_chunk_size + attn_scores.shape[1], :] = attn_scores - + ) for i in range(math.ceil(q_tokens / query_chunk_size)) + ], dim=1) return res diff --git a/modules/ui_extensions.py b/modules/ui_extensions.py index 99c25daa5..7ce3e20f5 100644 --- a/modules/ui_extensions.py +++ b/modules/ui_extensions.py @@ -122,7 +122,9 @@ def make_commit_link(commit_hash, remote, text=None): if text is None: text = commit_hash[:8] if remote.startswith("https://github.com/"): - href = os.path.join(remote, "commit", commit_hash) + if remote.endswith(".git"): + remote = remote[:-4] + href = remote + "/commit/" + commit_hash return f'{text}' else: return text diff --git a/modules/ui_extra_networks.py b/modules/ui_extra_networks.py index d75077222..4b3d6399a 100644 --- a/modules/ui_extra_networks.py +++ b/modules/ui_extra_networks.py @@ -175,7 +175,7 @@ class ExtraNetworksPage: """ Find a preview PNG for a given path (without extension) and call link_preview on it. """ - preview_extensions = ["jpg", "png", "webp", "tiff", "jp2"] + preview_extensions = ["jpg", "jpeg", "png", "webp", "tiff", "jp2"] potential_files = sum([[path + "." + ext, path + ".preview." + ext] for ext in preview_extensions], []) for file in potential_files: if os.path.isfile(file): diff --git a/requirements.txt b/requirements.txt index 37cd496a4..7903c0e06 100644 --- a/requirements.txt +++ b/requirements.txt @@ -58,5 +58,5 @@ protobuf==3.20.3 pytorch_lightning==1.9.4 transformers==4.26.1 timm==0.6.13 -tomesd==0.1.2 +tomesd==0.1.3 urllib3==1.26.15 diff --git a/scripts/xyz_grid.py b/scripts/xyz_grid.py index a1a2b2404..de1b26452 100644 --- a/scripts/xyz_grid.py +++ b/scripts/xyz_grid.py @@ -141,17 +141,10 @@ def apply_face_restore(p, opt, x): p.restore_faces = is_active -def apply_token_merging_ratio_hr(p, x, xs): - shared.opts.data["token_merging_ratio_hr"] = x - - -def apply_token_merging_ratio(p, x, xs): - shared.opts.data["token_merging_ratio"] = x - - -def apply_token_merging_random(p, x, xs): - is_active = x.lower() in ('true', 'yes', 'y', '1') - shared.opts.data["token_merging_random"] = is_active +def apply_override(field): + def fun(p, x, xs): + p.override_settings[field] = x + return fun def format_value_add_label(p, opt, x): @@ -233,9 +226,8 @@ axis_options = [ AxisOptionImg2Img("Image Mask Weight", float, apply_field("inpainting_mask_weight")), AxisOption("UniPC Order", int, apply_uni_pc_order, cost=0.5), AxisOption("Face restore", str, apply_face_restore, fmt=format_value), - AxisOption("ToMe ratio",float, apply_token_merging_ratio), - AxisOption("ToMe ratio for Hires fix",float, apply_token_merging_ratio_hr), - AxisOption("ToMe random pertubations",str, apply_token_merging_random, choices = lambda: ["Yes","No"]) + AxisOption("Token merging ratio", float, apply_override('token_merging_ratio')), + AxisOption("Token merging ratio high-res", float, apply_override('token_merging_ratio_hr')), ] @@ -349,7 +341,6 @@ class SharedSettingsStackHelper(object): self.uni_pc_order = shared.opts.uni_pc_order self.token_merging_ratio_hr = shared.opts.token_merging_ratio_hr self.token_merging_ratio = shared.opts.token_merging_ratio - self.token_merging_random = shared.opts.token_merging_random self.sd_model_checkpoint = shared.opts.sd_model_checkpoint self.sd_model_dict = shared.opts.sd_model_dict self.sd_vae_checkpoint = shared.opts.sd_vae @@ -361,11 +352,10 @@ class SharedSettingsStackHelper(object): shared.opts.data["uni_pc_order"] = self.uni_pc_order shared.opts.data["token_merging_ratio_hr"] = self.token_merging_ratio_hr shared.opts.data["token_merging_ratio"] = self.token_merging_ratio - shared.opts.data["token_merging_random"] = self.token_merging_random shared.opts.data["force_latent_sampler"] = self.force_latent_sampler - if (self.sd_model_dict != shared.opts.sd_model_dict): + if self.sd_model_dict != shared.opts.sd_model_dict: shared.opts.data["sd_model_dict"] = self.sd_model_dict - if (self.sd_model_checkpoint != shared.opts.sd_model_checkpoint): + if self.sd_model_checkpoint != shared.opts.sd_model_checkpoint: shared.opts.data["sd_model_checkpoint"] = self.sd_model_checkpoint sd_models.reload_model_weights() if self.sd_vae_checkpoint != shared.opts.sd_vae: