import os import time import json import re import math from collections import defaultdict import numpy as np from torch import nn, Tensor import gradio as gr from PIL import Image import modules.scripts as scripts from modules.processing import process_images, Processed, StableDiffusionProcessing from modules import shared LayerSettings = { # input shape output shape "IN00": ( ( 4, 8, 8), ( 320, 8, 8) ), "IN01": ( ( 320, 8, 8), ( 320, 8, 8) ), "IN02": ( ( 320, 8, 8), ( 320, 8, 8) ), "IN03": ( ( 320, 8, 8), ( 320, 4, 4) ), "IN04": ( ( 320, 4, 4), ( 640, 4, 4) ), "IN05": ( ( 640, 4, 4), ( 640, 4, 4) ), "IN06": ( ( 640, 4, 4), ( 640, 2, 2) ), "IN07": ( ( 640, 2, 2), (1280, 2, 2) ), "IN08": ( (1280, 2, 2), (1280, 2, 2) ), "IN09": ( (1280, 2, 2), (1280, 1, 1) ), "IN10": ( (1280, 1, 1), (1280, 1, 1) ), "IN11": ( (1280, 1, 1), (1280, 1, 1) ), "M00": ( (1280, 1, 1), (1280, 1, 1) ), "OUT00": ( (2560, 1, 1), (1280, 1, 1) ), "OUT01": ( (2560, 1, 1), (1280, 1, 1) ), "OUT02": ( (2560, 1, 1), (1280, 2, 2) ), "OUT03": ( (2560, 2, 2), (1280, 2, 2) ), "OUT04": ( (2560, 2, 2), (1280, 2, 2) ), "OUT05": ( (1920, 2, 2), (1280, 4, 4) ), "OUT06": ( (1920, 4, 4), ( 640, 4, 4) ), "OUT07": ( (1280, 4, 4), ( 640, 4, 4) ), "OUT08": ( ( 960, 4, 4), ( 640, 8, 8) ), "OUT09": ( ( 960, 8, 8), ( 320, 8, 8) ), "OUT10": ( ( 640, 8, 8), ( 320, 8, 8) ), "OUT11": ( ( 640, 8, 8), ( 320, 8, 8) ), } class Script(scripts.Script): def __init__(self) -> None: super().__init__() self.steps_on_batch = 0 def title(self): return "Dump U-net features" def show(self, is_img2img): return not is_img2img def ui(self, is_img2img): with gr.Blocks(elem_id="dumpunet"): layer = gr.Dropdown([f"IN{i:02}" for i in range(12)] + ["M00"] + [f"OUT{i:02}" for i in range(12)], label="Layer", value="M00", elem_id="dumpunet-layer") layer_setting_hidden = gr.HTML(json.dumps(LayerSettings), visible=False, elem_id="dumpunet-layer_setting") steps = gr.Textbox(label="Image saving steps", placeholder="eg. 1,5-20(+5)") color = gr.Checkbox(False, label="Use red/blue color map (red=POSITIVE, black=ZERO, blue=NEGATIVE)") with gr.Blocks(): path_on = gr.Checkbox(False, label="Dump tensor to files") path = gr.Textbox(label="Output path", placeholder="eg. /home/hnmr/unet/") layer_info = gr.HTML(elem_id="dumpunet-layerinfo") return [layer, steps, color, path_on, path] def process_batch(self, p, *args, **kwargs): self.steps_on_batch = 0 def run(self, p: StableDiffusionProcessing, layer: str, step_input: str, color: bool, path_on: bool, path: str): # Currently class scripts.Script does not support {post}process{_batch} hooks # for non-AlwaysVisible scripts. # So we have no legal method to access current batch number. # ugly hack if p.scripts is not None: p.scripts.alwayson_scripts.append(self) # now `process_batch` will be called from modules.processing.process_images try: return self.run_impl(p, layer,step_input, color, path_on, path) finally: if p.scripts is not None: p.scripts.alwayson_scripts.remove(self) def run_impl(self, p: StableDiffusionProcessing, layer: str, step_input: str, color: bool, path_on: bool, path: str): IN = [ f"IN{i:02}" for i in range(12) ] OUT = [ f"OUT{i:02}" for i in range(12) ] assert layer is not None and layer != "", "[DumpUnet] must not be empty." if path_on: assert path is not None and path != "", "[DumpUnet] must not be empty." steps = retrieve_steps(step_input) grid_x, grid_y = get_grid_num(layer, p.width, p.height) unet = p.sd_model.model.diffusion_model # type: ignore #time_embed : nn.modules.container.Sequential #input_blocks : nn.modules.container.ModuleList #middle_block : ldm.modules.diffusionmodules.openaimodel.TimestepEmbedSequential #output_blocks : nn.modules.container.ModuleList #time_embed = unet.time_embed #input_blocks = unet.input_blocks #middle_block = unet.middle_block #output_blocks = unet.output_blocks #summary(unet, (4, 512, 512)) # mkdir -p path if path_on: if os.path.exists(path): assert os.path.isdir(path), "[DumpUnet] already exists and is not a directory." else: os.makedirs(path, exist_ok=True) target : nn.modules.Module if layer in IN: idx = IN.index(layer) target = unet.input_blocks[idx] elif layer == "M00": target = unet.middle_block elif layer in OUT: idx = OUT.index(layer) target = unet.output_blocks[idx] else: assert False, "[DumpUnet] Invalid value." features = defaultdict(list) def create_hook(features, name): def forward_hook(module, inputs, outputs): #print(f"{name}\t{inputs[0].size()}\t{outputs.size()}") self.steps_on_batch += 1 if steps is None or self.steps_on_batch in steps: list = features[self.steps_on_batch] outputs = outputs.detach().clone() for idx in range(outputs.size()[0] // 2): # two same outputs per sample??? output = outputs[idx] list.append({ "name": name, "input_dims": [ x.size() for x in inputs if type(x) == Tensor ], "output_dims": output.size(), "output": output, }) return forward_hook handles = [] handles.append(target.register_forward_hook(create_hook(features, layer))) t0 = int(time.time()) try: proc = process_images(p) finally: for handle in handles: handle.remove() if shared.state.interrupted: return proc index0 = proc.index_of_first_image preview_images, rest_images = proc.images[:index0], proc.images[index0:] assert rest_images is not None and len(rest_images) != 0, f"[DumpUnet] empty output?" # Now `rest_images` is the list of the images we are interested in. images = [] seeds = [] subseeds = [] prompts = [] neg_prompts = [] infotexts = [] def add_image(image, seed, subseed, prompt, neg_prompt, infotext, feature_steps=None): images.append(image) seeds.append(seed) subseeds.append(subseed) prompts.append(prompt) neg_prompts.append(neg_prompt) info = infotext if feature_steps is not None: if info: info += "\n" info += f"Feature Steps: {feature_steps}" infotexts.append(info) for image in preview_images: preview_info = proc.infotexts.pop(0) add_image(image, proc.seed, proc.subseed, proc.prompt, proc.negative_prompt, preview_info) assert all([ len(rest_images) == len(x) for x in [proc.all_seeds, proc.all_subseeds, proc.all_prompts, proc.all_negative_prompts, proc.infotexts] ]), f"[DumpUnet] #images={len(rest_images)}, #seeds={len(proc.all_seeds)}, #subseeds={len(proc.all_subseeds)}, #pr={len(proc.all_prompts)}, #npr={len(proc.all_negative_prompts)}, #info={len(proc.infotexts)}" for idx, (image, *args) in enumerate(zip(rest_images, proc.all_seeds, proc.all_subseeds, proc.all_prompts, proc.all_negative_prompts, proc.infotexts)): add_image(image, *args) for step, fs in features.items(): assert len(rest_images) == len(fs), f"[DumpUnet] #images={len(rest_images)}, #fs={len(fs)} @ index={idx}, step={step}." feature = fs[idx] if shared.state.interrupted: break tensor = feature["output"] assert len(tensor.size()) == 3 basename = f"{idx:03}-{layer}-{step:03}-{{ch:04}}-{t0}" canvases = process(tensor, grid_x, grid_y, tensor.size(), color, path, basename, path_on) for canvas in canvases: add_image(canvas, *args, feature_steps=step) return Processed( p, images, seed=proc.seed, info=proc.info, subseed=proc.subseed, all_seeds=seeds, all_subseeds=subseeds, all_prompts=prompts, all_negative_prompts=neg_prompts, infotexts=infotexts ) def process(tensor: Tensor, grid_x: int, grid_y: int, dims: tuple[int,int,int], color: bool, save_dir: str, basename: str, save_bin: bool = False ): # Regardless of wheather --opt-channelslast is enabled or not, # feature.size() seems to return (batch, ch, h, w). # Is this intended result??? max_ch, ih, iw = dims width = (grid_x * (iw + 1) - 1) height = (grid_y * (ih + 1) - 1) def each_slice(it: range, n: int): cur = [] for x in it: cur.append(x) if n == len(cur): yield cur cur = [] if 0 < len(cur): yield cur canvases = [] color_format = "RGB" if color else "L" for chs in each_slice(range(max_ch), grid_x * grid_y): chs = list(chs) canvas = Image.new(color_format, (width, height), 0) for iy in range(grid_y): if len(chs) == 0: break for ix in range(grid_x): if shared.state.interrupted: break if len(chs) == 0: break ch = chs.pop(0) array = tensor[ch].cpu().numpy().astype(np.float32) filename = basename.format(x=ix, y=iy, ch=ch) # create image x = (iw+1) * ix y = (ih+1) * iy image = tensor_to_image(array, color) canvas.paste(Image.fromarray(image, color_format), (x, y)) # save binary if save_bin: assert save_dir is not None binpath = os.path.join(save_dir, filename + ".bin") with open(binpath, "wb") as io: io.write(bytearray(array)) canvases.append(canvas) return canvases re_num = re.compile(r"^\s*\+?\s*\d+\s*$") re_range = re.compile(r"^\s*(\+?\s*\d+)\s*-\s*(\+?\s*\d+)\s*(?:\(\s*\+?\s*(\d+)\s*\))?\s*$") def retrieve_steps(input: str): if input is None or input == "": return None steps : list[int]|None = [] tokens = input.split(",") for token in tokens: if token == "": continue m1 = re_num.fullmatch(token) m2 = re_range.fullmatch(token) if m1: steps1 = eval("[" + m1.group(0) + "]") elif m2: n1 = eval(m2.group(1)) n2 = eval(m2.group(2)) n3 = eval(m2.group(3)) if m2.group(3) else 1 steps1 = list(range(n1, n2+1, n3)) else: raise ValueError("[DumpUnet] Invalid input for .") steps.extend(steps1) steps = list(set(steps)) if len(steps) == 0: steps = None # all steps else: steps.sort() return steps def get_grid_num(layer: str, width: int, height: int): assert layer is not None and layer != "", "[DumpUnet] must not be empty." assert layer in LayerSettings, "[DumpUnet] Invalid value." _, (ch, mh, mw) = LayerSettings[layer] iw = math.ceil(width / 64) ih = math.ceil(height / 64) w = mw * iw h = mh * ih # w : width of a feature map # h : height of a feature map # ch: a number of a feature map n = [w, h] while ch % 2 == 0: n[n[0]>n[1]] *= 2 ch //= 2 n[n[0]>n[1]] *= ch if n[0] > n[1]: while n[0] > n[1] * 2 and (n[0] // w) % 2 == 0: n[0] //= 2 n[1] *= 2 else: while n[0] * 2 < n[1] and (n[1] // h) % 2 == 0: n[0] *= 2 n[1] //= 2 return [n[0] // w, n[1] // h] def tensor_to_image(array: np.ndarray, color: bool): # array := (-∞, ∞) if color: def colorize(v: float): # v = -1 .. 1 を # v < 0 のとき青 (0, 0, 1) # v > 0 のとき赤 (1 ,0, 0) # にする rgb = (v if v > 0.0 else 0.0, 0.0, -v if v < 0.0 else 0.0) return rgb colorize2 = np.vectorize(colorize, otypes=[np.float32, np.float32, np.float32]) rgb = colorize2(np.clip(array, -1.0, 1.0)) return np.clip((np.dstack(rgb) * 256), 0, 255).astype(np.uint8) else: return np.clip(np.abs(array) * 256, 0, 255).astype(np.uint8)