diff --git a/javascript/descriptions.js b/javascript/descriptions.js index 14d36e8..9826f12 100644 --- a/javascript/descriptions.js +++ b/javascript/descriptions.js @@ -16,21 +16,24 @@ onUiUpdate(() => { const descs = { '#dumpunet-{}-features-checkbox': 'Extract U-Net features and add their maps to output images.', '#dumpunet-{}-features-layer': 'U-Net layers (IN00-IN11, M00, OUT00-OUT11) which features should be extracted. See tooltip for notations.', - '#dumpunet-{}-features-steps': 'Steps which U-Net features should be extracted. See tooltip for notations', + '#dumpunet-{}-features-steps': 'Steps which U-Net features should be extracted. See tooltip for notations.', + '#dumpunet-{}-features-average': 'Add channel-averaged map to the result.', '#dumpunet-{}-features-dumppath': 'Raw binary files are dumped to here, one image per step per layer.', '#dumpunet-{}-features-colorization-desc': 'Recommends for U-Net features: Custom / Sigmoid (gain=1.0, offset=0.0) / HSL; H=(2+v)/3, S=1.0, L=0.5', '#dumpunet-{}-features-colorization-custom': 'Set RGB/HSL value with given transformed value v. The range of v can be either [0, 1] or [-1, 1] according to the `Value transform` selection.
Input values are processed as `eval(f"lambda v: ( ({r}), ({g}), ({b}) )", { "__builtins__": numpy }, {})`.', '#dumpunet-{}-attention-checkbox': 'Extract attention layer\'s features and add their maps to output images.', '#dumpunet-{}-attention-layer': 'U-Net layers (IN00-IN11, M00, OUT00-OUT11) which features should be extracted. See tooltip for notations.', - '#dumpunet-{}-attention-steps': 'Steps which features should be extracted. See tooltip for notations', + '#dumpunet-{}-attention-steps': 'Steps which features should be extracted. See tooltip for notations.', + '#dumpunet-{}-attention-average': 'For K, add head-averaged map.
For Q*K, add head-averaged map.
For V*Q*K, add channel-averaged map.', '#dumpunet-{}-attention-dumppath': 'Raw binary files are dumped to here, one image per step per layer.', '#dumpunet-{}-attention-colorization-desc': 'Recommends for Attention features: Custom / Auto [0,1] / HSL; H=(2-2*v)/3, S=1.0, L=0.5', '#dumpunet-{}-attention-colorization-custom': 'Set RGB/HSL value with given transformed value v. The range of v can be either [0, 1] or [-1, 1] according to the `Value transform` selection.
Input values are processed as `eval(f"lambda v: ( ({r}), ({g}), ({b}) )", { "__builtins__": numpy }, {})`.', '#dumpunet-{}-layerprompt-checkbox': 'When checked, (~: ... :~) notation is enabled.', '#dumpunet-{}-layerprompt-diff-layer': 'Layers (IN00-IN11, M00, OUT00-OUT11) which features should be extracted. See tooltip for notations.', - '#dumpunet-{}-layerprompt-diff-steps': 'Steps which features should be extracted. See tooltip for notations', + '#dumpunet-{}-layerprompt-diff-steps': 'Steps which features should be extracted. See tooltip for notations.', + '#dumpunet-{}-layerprompt-diff-average': 'Add channel-averaged map to the result.', '#dumpunet-{}-layerprompt-diff-dumppath': 'Raw binary files are dumped to here, one image per step per layer.', '#dumpunet-{}-layerprompt-diff-colorization-desc': 'Recommends for layer prompt\'s diff.: Custom / Sigmoid (gain=1.0, offset=0.0) / HSL; H=(2+v)/3, S=1.0, L=0.5', '#dumpunet-{}-layerprompt-diff-colorization-custom': 'Set RGB/HSL value with given transformed value v. The range of v can be either [0, 1] or [-1, 1] according to the `Value transform` selection.
Input values are processed as `eval(f"lambda v: ( ({r}), ({g}), ({b}) )", { "__builtins__": numpy }, {})`.', diff --git a/scripts/dumpunet.py b/scripts/dumpunet.py index 53c7876..3892205 100644 --- a/scripts/dumpunet.py +++ b/scripts/dumpunet.py @@ -45,6 +45,7 @@ class Script(scripts.Script): result.unet.enabled, result.unet.settings.layers, result.unet.settings.steps, + result.unet.settings.average, result.unet.settings.colorize, result.unet.settings.colorspace, result.unet.settings.R, result.unet.settings.G, result.unet.settings.B, result.unet.settings.H, result.unet.settings.S, result.unet.settings.L, @@ -57,6 +58,7 @@ class Script(scripts.Script): result.attn.enabled, result.attn.settings.layers, result.attn.settings.steps, + result.attn.settings.average, result.attn.settings.others["vqks"], result.attn.settings.colorize, result.attn.settings.colorspace, result.attn.settings.R, result.attn.settings.G, result.attn.settings.B, @@ -71,6 +73,7 @@ class Script(scripts.Script): result.lp.diff_enabled, result.lp.diff_settings.layers, result.lp.diff_settings.steps, + result.lp.diff_settings.average, result.lp.diff_settings.colorize, result.lp.diff_settings.colorspace, result.lp.diff_settings.R, result.lp.diff_settings.G, result.lp.diff_settings.B, result.lp.diff_settings.H, result.lp.diff_settings.S, result.lp.diff_settings.L, @@ -117,6 +120,7 @@ class Script(scripts.Script): unet_features_enabled: bool, layer_input: str, step_input: str, + favg: bool, color_: str, colorspace: str, fr: str, fg: str, fb: str, fh: str, fs: str, fl: str, @@ -127,6 +131,7 @@ class Script(scripts.Script): attn_enabled: bool, attn_layers: str, attn_steps: str, + aavg: bool, attn_vqks: list[str], attn_color_: str, attn_cs: str, ar: str, ag: str, ab: str, @@ -139,6 +144,7 @@ class Script(scripts.Script): layerprompt_diff_enabled: bool, lp_diff_layers: str, lp_diff_steps: str, + lavg: bool, lp_diff_color_: str, lcs: str, lr: str, lg: str, lb: str, lh: str, ls: str, ll: str, @@ -202,14 +208,14 @@ class Script(scripts.Script): proc1, features1, diff1, attn1 = exec(p1, lp0, [ex, exlp, at]) builder1 = ProcessedBuilder() builder1.add_proc(proc1) - ex.add_images(p1, builder1, features1, color) - at.add_images(p1, builder1, attn1, attn_color) + ex.add_images(p1, builder1, features1, favg, color) + at.add_images(p1, builder1, attn1, aavg, attn_color) # layer prompt enabled proc2, features2, diff2, attn2 = exec(p2, lp, [ex, exlp, at]) builder2 = ProcessedBuilder() builder2.add_proc(proc1) - ex.add_images(p2, builder2, features2, color) - at.add_images(p2, builder2, attn2, attn_color) + ex.add_images(p2, builder2, features2, favg, color) + at.add_images(p2, builder2, attn2, aavg, attn_color) proc1 = builder1.to_proc(p1, proc1) proc2 = builder2.to_proc(p2, proc2) @@ -227,7 +233,7 @@ class Script(scripts.Script): t0 = int(time.time()) for img_idx, step, layer, tensor in feature_diff(diff1, diff2, abs=not lp_diff_color): - canvases = feature_to_grid_images(tensor, layer, p.width, p.height, lp_diff_color) + canvases = feature_to_grid_images(tensor, layer, p.width, p.height, lavg, lp_diff_color) for canvas in canvases: putils.add_ref(proc, img_idx, canvas, f"Layer Name: {layer}, Feature Steps: {step}") @@ -239,8 +245,8 @@ class Script(scripts.Script): proc, features1, attn1 = exec(p, lp, [ex, at]) builder = ProcessedBuilder() builder.add_proc(proc) - ex.add_images(p, builder, features1, color) - at.add_images(p, builder, attn1, attn_color) + ex.add_images(p, builder, features1, favg, color) + at.add_images(p, builder, attn1, aavg, attn_color) proc = builder.to_proc(p, proc) return proc diff --git a/scripts/lib/attention/extractor.py b/scripts/lib/attention/extractor.py index 0dd0b8f..a29ae0f 100644 --- a/scripts/lib/attention/extractor.py +++ b/scripts/lib/attention/extractor.py @@ -2,6 +2,7 @@ import sys import math from typing import TYPE_CHECKING +import torch from torch import nn, Tensor, einsum from einops import rearrange @@ -150,6 +151,7 @@ class AttentionExtractor(FeatureExtractorBase): p: StableDiffusionProcessing, builder, extracted_features: MultiImageFeatures[AttnFeatureInfo], + add_average: bool, color: Colorizer ): if not self.enabled: return @@ -159,9 +161,9 @@ class AttentionExtractor(FeatureExtractorBase): print(E("Attention: no images are extracted"), file=sys.stderr, end="", flush=False) print("\033[0m", file=sys.stderr) return - return super().add_images(p, builder, extracted_features, color) + return super().add_images(p, builder, extracted_features, add_average, color) - def feature_to_grid_images(self, feature: AttnFeatureInfo, layer: str, img_idx: int, step: int, width: int, height: int, color: Colorizer): + def feature_to_grid_images(self, feature: AttnFeatureInfo, layer: str, img_idx: int, step: int, width: int, height: int, add_average: bool, color: Colorizer): w, h, ch = get_shape(layer, width, height) images = [] @@ -170,7 +172,7 @@ class AttentionExtractor(FeatureExtractorBase): heads_k, ch_k, n_k = k.shape assert ch_k % 77 == 0, f"ch_k={ch_k}" k1 = rearrange(k, 'a t n -> a n t').contiguous() - k_images = tutils.tensor_to_image(k1, 1, heads_k, color) + k_images = tutils.tensor_to_image(k1, 1, heads_k, color, add_average) images.extend(k_images) del k1 @@ -180,7 +182,15 @@ class AttentionExtractor(FeatureExtractorBase): assert ch_qk % 77 == 0, f"ch_qk={ch_qk}" assert w * h == n_qk, f"w={w}, h={h}, n_qk={n_qk}" qk1 = rearrange(qk, 'a t (h w) -> (a t) h w', h=h).contiguous() - qk_images = tutils.tensor_to_image(qk1, ch_qk, heads_qk, color) + qk_images = tutils.tensor_to_image(qk1, ch_qk, heads_qk, color, False) + if add_average: + # shape = (ch, h, w) + qk_avg = torch.mean( + rearrange(qk, 'a t (h w) -> a t h w', h=h).contiguous(), + 0 + ) + qk_avg_image = tutils.tensor_to_image(qk_avg, ch_qk, 1, color, False) + qk_images = qk_avg_image + qk_images images.extend(qk_images) del qk1 @@ -190,7 +200,7 @@ class AttentionExtractor(FeatureExtractorBase): assert w * h == n_vqk, f"w={w}, h={h}, n_vqk={n_vqk}" assert ch == ch_vqk, f"ch={ch}, ch_vqk={ch_vqk}" vqk1 = rearrange(vqk, '(h w) c -> c h w', h=h).contiguous() - vqk_images = tutils.tensor_to_grid_images(vqk1, layer, width, height, color) + vqk_images = tutils.tensor_to_grid_images(vqk1, layer, width, height, color, add_average) images.extend(vqk_images) del vqk1 diff --git a/scripts/lib/build_ui.py b/scripts/lib/build_ui.py index 9d6efab..06eae33 100644 --- a/scripts/lib/build_ui.py +++ b/scripts/lib/build_ui.py @@ -12,6 +12,7 @@ from scripts.lib import layerinfo class OutputSetting: layers: Textbox steps: Textbox + average: Checkbox colorize: Radio colorspace: Radio R: Textbox @@ -42,6 +43,12 @@ class OutputSetting: elem_id=id("steps") ) + avg = Checkbox( + value=True, + label="Show averaged map", + elem_id=id("average") + ) + components: dict[str,Component] = {} if callback1 is not None: components1 = callback1(id) @@ -103,6 +110,7 @@ class OutputSetting: return OutputSetting( layers, steps, + avg, colorize, colorspace, r, g, b, diff --git a/scripts/lib/feature_extractor.py b/scripts/lib/feature_extractor.py index 4293b3c..9fd38ee 100644 --- a/scripts/lib/feature_extractor.py +++ b/scripts/lib/feature_extractor.py @@ -77,6 +77,7 @@ class FeatureExtractorBase(Generic[TInfo], ExtractorBase): p: StableDiffusionProcessing, builder: ProcessedBuilder, extracted_features: MultiImageFeatures[TInfo], + add_average: bool, color: Colorizer ): @@ -100,7 +101,7 @@ class FeatureExtractorBase(Generic[TInfo], ExtractorBase): if shared.state.interrupted: break - canvases = self.feature_to_grid_images(feature, layer, idx, step, p.width, p.height, color) + canvases = self.feature_to_grid_images(feature, layer, idx, step, p.width, p.height, add_average, color) for canvas in canvases: builder.add_ref(idx, canvas, None, {"Layer Name": layer, "Feature Steps": step}) @@ -108,9 +109,11 @@ class FeatureExtractorBase(Generic[TInfo], ExtractorBase): basename = f"{idx:03}-{layer}-{step:03}-{{ch:04}}-{t0}" self.save_features(feature, layer, idx, step, p.width, p.height, self.path, basename) + if hasattr(shared.total_tqdm, "_tqdm"): + shared.total_tqdm._tqdm.set_postfix_str(layer.ljust(5)) # type: ignore shared.total_tqdm.update() - def feature_to_grid_images(self, feature: TInfo, layer: str, img_idx: int, step: int, width: int, height: int, color: Colorizer): + def feature_to_grid_images(self, feature: TInfo, layer: str, img_idx: int, step: int, width: int, height: int, add_average: bool, color: Colorizer): raise NotImplementedError(f"{self.__class__.__name__}.feature_to_grid_images") def save_features(self, feature: TInfo, layer: str, img_idx: int, step: int, width: int, height: int, path: str, basename: str): diff --git a/scripts/lib/features/extractor.py b/scripts/lib/features/extractor.py index c607724..9e43bb2 100644 --- a/scripts/lib/features/extractor.py +++ b/scripts/lib/features/extractor.py @@ -70,8 +70,8 @@ class FeatureExtractor(FeatureExtractorBase[FeatureInfo]): target = get_unet_layer(unet, layer) self.hook_layer(target, create_hook(layer)) - def feature_to_grid_images(self, feature: FeatureInfo, layer: str, img_idx: int, step: int, width: int, height: int, color: Colorizer): - return feature_to_grid_images(feature, layer, width, height, color) + def feature_to_grid_images(self, feature: FeatureInfo, layer: str, img_idx: int, step: int, width: int, height: int, add_average: bool, color: Colorizer): + return feature_to_grid_images(feature, layer, width, height, add_average, color) def save_features(self, feature: FeatureInfo, layer: str, img_idx: int, step: int, width: int, height: int, path: str, basename: str): save_features(feature, path, basename) diff --git a/scripts/lib/features/utils.py b/scripts/lib/features/utils.py index 267c213..90b660f 100644 --- a/scripts/lib/features/utils.py +++ b/scripts/lib/features/utils.py @@ -47,6 +47,7 @@ def feature_to_grid_images( layer: str, width: int, height: int, + add_average: bool, color: Colorizer ): tensor = feature @@ -54,7 +55,7 @@ def feature_to_grid_images( tensor = feature.output assert isinstance(tensor, Tensor) - canvases = tutils.tensor_to_grid_images(tensor, layer, width, height, color) + canvases = tutils.tensor_to_grid_images(tensor, layer, width, height, color, add_average) return canvases def save_features( diff --git a/scripts/lib/tutils.py b/scripts/lib/tutils.py index 2c31d46..15a17cd 100644 --- a/scripts/lib/tutils.py +++ b/scripts/lib/tutils.py @@ -1,6 +1,7 @@ import os import math +import torch from torch import Tensor import numpy as np from PIL import Image @@ -16,10 +17,11 @@ def tensor_to_grid_images( layer: str, width: int, height: int, - color: Colorizer + color: Colorizer, + add_average: bool = False, ): grid_x, grid_y = get_grid_num(layer, width, height) - canvases = tensor_to_image(tensor, grid_x, grid_y, color) + canvases = tensor_to_image(tensor, grid_x, grid_y, color, add_average) return canvases def tensor_to_image( @@ -27,6 +29,7 @@ def tensor_to_image( grid_x: int, grid_y: int, color: Colorizer, + add_average: bool = False, ): # Regardless of wheather --opt-channelslast is enabled or not, # feature.size() seems to return (batch, ch, h, w). @@ -50,6 +53,11 @@ def tensor_to_image( canvases: list[Image.Image] = [] + if add_average: + avg = torch.mean(tensor, 0) # tensor.shape: (ch, h, w) -> (h, w) + avg_img = tensor2d_to_image(avg, color) + canvases.append(avg_img) + for chs in each_slice(range(max_ch), grid_x * grid_y): chs = list(chs) @@ -66,17 +74,24 @@ def tensor_to_image( break ch = chs.pop(0) - array = tensor[ch].cpu().numpy().astype(np.float32) + image = tensor2d_to_image(tensor[ch], color) # create image x = (iw+1) * ix y = (ih+1) * iy - image = color(array) - canvas.paste(Image.fromarray(image, color.format), (x, y)) + canvas.paste(image, (x, y)) canvases.append(canvas) return canvases +def tensor2d_to_image( + tensor: Tensor, + color: Colorizer, +): + assert len(tensor.shape) == 2, f"tensor.shape = {tensor.shape}" + array = tensor.cpu().numpy().astype(np.float32) + return Image.fromarray(color(array), color.format) + def save_tensor( tensor: Tensor, save_dir: str,