diff --git a/CHANGELOG.md b/CHANGELOG.md index 96d74dc25..5341d317b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,14 @@ # Change Log for SD.Next -## Update for 2026-02-09 +## Update for 2026-02-11 -- **Upscalers** +- **Image manipulation** + - use high-quality [sharpfin](https://github.com/drhead/Sharpfin) accelerated library + when available (cuda-only), thanks @CalamitousFelicitousness - add support for [spandrel](https://github.com/chaiNNer-org/spandrel) - upscaling engine with suport for new upscaling model families + **upscaling** engine with suport for new upscaling model families - add two new ai upscalers: *RealPLKSR NomosWebPhoto* and *RealPLKSR AnimeSharpV2* - - add two new interpolation methods: *HQX* and *ICB* + - add two new **interpolation** methods: *HQX* and *ICB* - **Features** - pipelines: add **ZImageInpaint**, thanks @CalamitousFelicitousness - add `--remote` command line flag that reduces client/server chatter and improves link stability @@ -18,6 +20,7 @@ - ui: **themes** add *CTD-NT64Light* and *CTD-NT64Dark*, thanks @resonantsky - ui: **gallery** add option to auto-refresh gallery, thanks @awsr - **Internal** + - refactor: to/from image/tensor logic, thanks @CalamitousFelicitousness - refactor: switch to `pyproject.toml` for tool configs - refactor: reorganize `cli` scripts - refactor: move tests to dedicated `/test/` diff --git a/modules/images_resize.py b/modules/images_resize.py index 9fd8757bf..67d14ac2c 100644 --- a/modules/images_resize.py +++ b/modules/images_resize.py @@ -136,7 +136,7 @@ def resize_image(resize_mode: int, im: Union[Image.Image, torch.Tensor], width: return res im = verify_image(im) if not isinstance(im, Image.Image): - shared.log.error(f'Image resize: image={type(im)} invalid type') + shared.log.error(f'Resize image: image={type(im)} invalid type') return im if (resize_mode == 0) or ((im.width == width) and (im.height == height)) or (width == 0 and height == 0): # none res = im.copy() @@ -158,5 +158,5 @@ def resize_image(resize_mode: int, im: Union[Image.Image, torch.Tensor], width: t1 = time.time() fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access if im.width != width or im.height != height: - shared.log.debug(f'Image resize: source={im.width}:{im.height} target={width}:{height} mode="{shared.resize_modes[resize_mode]}" upscaler="{upscaler_name}" type={output_type} time={t1-t0:.2f} fn={fn}') # pylint: disable=protected-access + shared.log.debug(f'Resize image: source={im.width}:{im.height} target={width}:{height} mode="{shared.resize_modes[resize_mode]}" upscaler="{upscaler_name}" type={output_type} time={t1-t0:.2f} fn={fn}') # pylint: disable=protected-access return np.array(res) if output_type == 'np' else res diff --git a/modules/images_sharpfin.py b/modules/images_sharpfin.py index 11ab6504a..67256fe11 100644 --- a/modules/images_sharpfin.py +++ b/modules/images_sharpfin.py @@ -7,20 +7,21 @@ and Triton GPU acceleration when available. Non-CUDA devices fall back to PIL/torch.nn.functional automatically. """ +import sys import torch import numpy as np from PIL import Image from installer import log + _sharpfin_checked = False _sharpfin_ok = False _triton_ok = False -def _check(): +def check_sharpfin(): global _sharpfin_checked, _sharpfin_ok, _triton_ok # pylint: disable=global-statement if not _sharpfin_checked: - # DEBUG: no try/except — let import errors propagate from modules.sharpfin.functional import scale # pylint: disable=unused-import _sharpfin_ok = True try: @@ -39,7 +40,7 @@ KERNEL_MAP = { } -def _resolve_kernel(kernel=None): +def get_kernel(kernel=None): """Resolve kernel name to ResizeKernel enum. Returns None for PIL fallback.""" if kernel is not None: name = kernel @@ -52,7 +53,7 @@ def _resolve_kernel(kernel=None): return getattr(ResizeKernel, KERNEL_MAP[name]) -def _resolve_linearize(linearize=None, is_mask=False): +def get_linearize(linearize=None, is_mask=False): """Determine sRGB linearization setting.""" if is_mask: return False @@ -62,19 +63,17 @@ def _resolve_linearize(linearize=None, is_mask=False): return shared.opts.resize_linearize_srgb -def _should_use_sharpfin(device=None): +def allow_sharpfin(device=None): """Determine if sharpfin should be used based on device.""" if device is None: from modules import devices device = devices.device - # Sharpfin is optimized for CUDA with Triton - # For other devices (CPU, MPS, OpenVINO), use torch/PIL optimized kernels + # Sharpfin is optimized for CUDA with Triton, for other devices (CPU, MPS, OpenVINO), use torch/PIL optimized kernels return hasattr(device, 'type') and device.type == 'cuda' def resize(image, target_size, *, kernel=None, linearize=None, device=None, dtype=None): """Resize PIL.Image or torch.Tensor, returning same type. - Args: image: PIL.Image or torch.Tensor [B,C,H,W] / [C,H,W] target_size: (width, height) for PIL, (H, W) for tensor @@ -83,9 +82,9 @@ def resize(image, target_size, *, kernel=None, linearize=None, device=None, dtyp device: Override compute device dtype: Override compute dtype """ - _check() + check_sharpfin() if isinstance(image, Image.Image): - return _resize_pil(image, target_size, kernel=kernel, linearize=linearize, device=device, dtype=dtype) + return resize_pil(image, target_size, kernel=kernel, linearize=linearize, device=device, dtype=dtype) elif isinstance(image, torch.Tensor): return resize_tensor(image, target_size, kernel=kernel, linearize=linearize if linearize is not None else False) return image @@ -132,22 +131,31 @@ def _scale_pil(scale_fn, tensor, out_res, rk, dev, dt, do_linear, src_h, src_w, return scale_fn(intermediate, (h, w), resize_kernel=rk, device=dev, dtype=dt, do_srgb_conversion=do_linear, use_sparse=False) -def _resize_pil(image, target_size, *, kernel=None, linearize=None, device=None, dtype=None): +def resize_pil(image: Image.Image, target_size: tuple[int, int], *, kernel=None, linearize=None, device=None, dtype=None): """Resize a PIL Image via sharpfin, falling back to PIL on error.""" + fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access w, h = target_size - if image.width == w and image.height == h: + is_mask = image.mode == 'L' + + if (image.width == w) and (image.height == h): + log.debug(f'Resize image: skip={w}x{h} fn={fn}') return image + from modules import devices dev = device if device is not None else devices.device - if not _should_use_sharpfin(dev): + if not allow_sharpfin(dev): + log.debug(f'Resize image: method=PIL source={image.width}x{image.height} target={w}x{h} device={dev} fn={fn}') return image.resize((w, h), resample=Image.Resampling.LANCZOS) - is_mask = image.mode == 'L' - rk = _resolve_kernel(kernel) + + rk = get_kernel(kernel) if rk is None: + log.debug(f'Resize image: method=PI source={image.width}x{image.height} target={w}x{h} kernel=None fn={fn}') return image.resize((w, h), resample=Image.Resampling.LANCZOS) + from modules.sharpfin.functional import scale - dt = dtype if dtype is not None else torch.float16 - do_linear = _resolve_linearize(linearize, is_mask=is_mask) + dt = dtype or torch.float16 + do_linear = get_linearize(linearize, is_mask=is_mask) + log.debug(f'Resize image: method=sharpfin source={image.width}x{image.height} target={w}x{h} kernel={rk} device={dev} linearize={do_linear} fn={fn}') tensor = to_tensor(image) if tensor.dim() == 3: tensor = tensor.unsqueeze(0) @@ -160,7 +168,7 @@ def _resize_pil(image, target_size, *, kernel=None, linearize=None, device=None, return to_pil(result) -def resize_tensor(tensor, target_size, *, kernel=None, linearize=False): +def resize_tensor(tensor: torch.Tensor, target_size: tuple[int, int], *, kernel=None, linearize=False): """Resize tensor [B,C,H,W] or [C,H,W] -> Tensor. For in-pipeline tensor resizes. Args: @@ -169,20 +177,24 @@ def resize_tensor(tensor, target_size, *, kernel=None, linearize=False): kernel: Override kernel name linearize: sRGB linearization (default False for latent/mask data) """ - _check() + fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access + check_sharpfin() from modules import devices dev = devices.device - if not _should_use_sharpfin(dev): - mode = 'bilinear' if target_size[0] * target_size[1] > tensor.shape[-2] * tensor.shape[-1] else 'area' + if not allow_sharpfin(dev): + mode = 'bilinear' if (target_size[0] * target_size[1]) > (tensor.shape[-2] * tensor.shape[-1]) else 'area' + log.debug(f'Resize tensor: method=torch mode={mode} shape={tensor.shape} target={target_size} fn={fn}') inp = tensor if tensor.dim() == 4 else tensor.unsqueeze(0) result = torch.nn.functional.interpolate(inp, size=target_size, mode=mode, antialias=True) return result.squeeze(0) if tensor.dim() == 3 else result - rk = _resolve_kernel(kernel) + rk = get_kernel(kernel) if rk is None: - mode = 'bilinear' if target_size[0] * target_size[1] > tensor.shape[-2] * tensor.shape[-1] else 'area' + mode = 'bilinear' if (target_size[0] * target_size[1]) > (tensor.shape[-2] * tensor.shape[-1]) else 'area' + log.debug(f'Resize tensor: method=torch mode={mode} shape={tensor.shape} target={target_size} kernel=None fn={fn}') inp = tensor if tensor.dim() == 4 else tensor.unsqueeze(0) result = torch.nn.functional.interpolate(inp, size=target_size, mode=mode, antialias=True) return result.squeeze(0) if tensor.dim() == 3 else result + from modules.sharpfin.functional import scale dt = torch.float16 squeezed = False @@ -195,8 +207,10 @@ def resize_tensor(tensor, target_size, *, kernel=None, linearize=False): both_up = (th >= src_h and tw >= src_w) if both_down or both_up: use_sparse = _triton_ok and dev.type == 'cuda' and rk.value == 'magic_kernel_sharp_2021' and both_down + log.debug(f'Resize tensor: method=sharpfin shape={tensor.shape} target={target_size} direction={both_up}:{both_down} kernel={rk} sparse={use_sparse} fn={fn}') result = scale(tensor, target_size, resize_kernel=rk, device=dev, dtype=dt, do_srgb_conversion=linearize, use_sparse=use_sparse) else: + log.debug(f'Resize tensor: method=sharpfin shape={tensor.shape} target={target_size} direction={both_up}:{both_down} kernel={rk} sparse=False fn={fn}') if th > src_h: intermediate = scale(tensor, (th, src_w), resize_kernel=rk, device=dev, dtype=dt, do_srgb_conversion=linearize, use_sparse=False) result = scale(intermediate, (th, tw), resize_kernel=rk, device=dev, dtype=dt, do_srgb_conversion=linearize, use_sparse=False) @@ -208,42 +222,56 @@ def resize_tensor(tensor, target_size, *, kernel=None, linearize=False): return result -def to_tensor(image): +def to_tensor(image: Image.Image | np.ndarray): """PIL Image -> float32 CHW tensor [0,1]. Pure torch, no torchvision.""" + # fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access if not isinstance(image, Image.Image): - raise TypeError(f"Expected PIL Image, got {type(image)}") - pic = np.array(image, copy=True) + pic = np.array(image, copy=True) + elif isinstance(image, np.ndarray): + pic = image.copy() + else: + raise TypeError(f"Expected PIL Image or np.ndarray, got {type(image)}") if pic.ndim == 2: pic = pic[:, :, np.newaxis] tensor = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous() + # log.debug(f'Convert: source={type(image)} target={tensor.shape} fn={fn}') if tensor.dtype == torch.uint8: return tensor.to(torch.float32).div_(255.0) return tensor.to(torch.float32) -def to_pil(tensor): +def to_pil(tensor: torch.Tensor | np.ndarray): """Float CHW/HWC or BCHW/BHWC tensor [0,1] -> PIL Image. Pure torch, no torchvision.""" - if not isinstance(tensor, torch.Tensor): - raise TypeError(f"Expected torch.Tensor, got {type(tensor)}") - tensor = tensor.detach().cpu() - if tensor.dim() == 4: - if tensor.shape[-1] in (1, 3, 4) and tensor.shape[-1] < tensor.shape[-2]: # BHWC - tensor = tensor.permute(0, 3, 1, 2) - tensor = tensor[0] - elif tensor.dim() == 3: - if tensor.shape[-1] in (1, 3, 4) and tensor.shape[-1] < tensor.shape[-2] and tensor.shape[-1] < tensor.shape[-3]: # HWC - tensor = tensor.permute(2, 0, 1) - if tensor.dtype != torch.uint8: - tensor = (tensor.clamp(0, 1) * 255).round().to(torch.uint8) - ndarr = tensor.permute(1, 2, 0).numpy() - if ndarr.shape[2] == 1: - ndarr = ndarr[:, :, 0] - mode = 'L' - elif ndarr.shape[2] == 3: - mode = 'RGB' + if isinstance(tensor, torch.Tensor): + tensor = tensor.detach().cpu() + elif isinstance(tensor, np.ndarray): + tensor = torch.from_numpy(tensor) else: - mode = 'RGBA' - return Image.fromarray(ndarr, mode=mode) + raise TypeError(f"Expected torch.Tensor, got {type(tensor)}") + try: + if tensor.dim() == 4: + if tensor.shape[-1] in (1, 3, 4) and tensor.shape[-1] < tensor.shape[-2]: # BHWC + tensor = tensor.permute(0, 3, 1, 2) + tensor = tensor[0] + elif tensor.dim() == 3: + if tensor.shape[-1] in (1, 3, 4) and tensor.shape[-1] < tensor.shape[-2] and tensor.shape[-1] < tensor.shape[-3]: # HWC + tensor = tensor.permute(2, 0, 1) + if tensor.dtype != torch.uint8: + tensor = (tensor.clamp(0, 1) * 255).round().to(torch.uint8) + ndarr = tensor.permute(1, 2, 0).numpy() + if ndarr.shape[2] == 1: + ndarr = ndarr[:, :, 0] + mode = 'L' + elif ndarr.shape[2] == 3: + mode = 'RGB' + else: + mode = 'RGBA' + image = Image.fromarray(ndarr, mode=mode) + except Exception as e: + image = Image.new('RGB', (tensor.shape[-1], tensor.shape[-2]), color=(152, 32, 48)) + fn = f'{sys._getframe(2).f_code.co_name}:{sys._getframe(1).f_code.co_name}' # pylint: disable=protected-access + log.error(f'Convert: source={type(tensor)} target={image} fn={fn} {e}') + return image def pil_to_tensor(image): diff --git a/modules/sharpfin/LICENSE b/modules/sharpfin/LICENSE deleted file mode 100644 index c845fba18..000000000 --- a/modules/sharpfin/LICENSE +++ /dev/null @@ -1,190 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - Copyright 2024 drhead - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.