import time from PIL import Image from modules.logger import log from modules.upscaler import Upscaler, UpscalerData class UpscalerAsymmetricVAE(Upscaler): def __init__(self, dirname=None): # pylint: disable=unused-argument super().__init__(False) self.name = "Asymmetric VAE" self.vae = None self.selected = None self.scalers = [ UpscalerData("Asymmetric VAE v1", None, self), UpscalerData("Asymmetric VAE v2", None, self), ] def do_upscale(self, img: Image, selected_model=None): if selected_model is None: return img import diffusers from modules import shared, devices from modules.image import sharpfin, convert if self.vae is None or (selected_model != self.selected): if 'v1' in selected_model: repo_id = 'Heasterian/AsymmetricAutoencoderKLUpscaler' else: repo_id = 'Heasterian/AsymmetricAutoencoderKLUpscaler_v2' self.vae = diffusers.AsymmetricAutoencoderKL.from_pretrained(repo_id, cache_dir=shared.opts.hfcache_dir) self.vae.requires_grad_(False) self.vae = self.vae.to(device=devices.device, dtype=devices.dtype) self.vae.eval() self.selected = selected_model log.debug(f'Upscaler load: selected="{self.selected}" vae="{repo_id}"') t0 = time.time() img = sharpfin.resize(img, (8 * (img.width // 8), 8 * (img.height // 8))).convert('RGB') tensor = convert.to_tensor(img).unsqueeze(0).to(device=devices.device, dtype=devices.dtype) self.vae = self.vae.to(device=devices.device) tensor = self.vae(tensor).sample upscaled = convert.to_pil(tensor.squeeze().clamp(0.0, 1.0).float().cpu()) self.vae = self.vae.to(device=devices.cpu) t1 = time.time() log.debug(f'Upscale: name="{self.selected}" input={img.size} output={upscaled.size} time={t1 - t0:.2f}') return upscaled class UpscalerWanUpscale(Upscaler): def __init__(self, dirname=None): # pylint: disable=unused-argument super().__init__(False) self.name = "WAN Upscale" self.vae_encode = None self.vae_decode = None self.selected = None self.scalers = [ UpscalerData("WAN Asymmetric Upscale", None, self), ] def do_upscale(self, img: Image, selected_model=None): if selected_model is None: return img import torch.nn.functional as FN import diffusers from modules import shared, devices from modules.image import convert if (self.vae_encode is None) or (self.vae_decode is None) or (selected_model != self.selected): repo_encode = 'Qwen/Qwen-Image-Edit-2509' subfolder_encode = 'vae' self.vae_encode = diffusers.AutoencoderKLWan.from_pretrained(repo_encode, subfolder=subfolder_encode, cache_dir=shared.opts.hfcache_dir) self.vae_encode.requires_grad_(False) self.vae_encode = self.vae_encode.to(device=devices.device, dtype=devices.dtype) self.vae_encode.eval() repo_decode = 'spacepxl/Wan2.1-VAE-upscale2x' subfolder_decode = "diffusers/Wan2.1_VAE_upscale2x_imageonly_real_v1" self.vae_decode = diffusers.AutoencoderKLWan.from_pretrained(repo_decode, subfolder=subfolder_decode, cache_dir=shared.opts.hfcache_dir) self.vae_decode.requires_grad_(False) self.vae_decode = self.vae_decode.to(device=devices.device, dtype=devices.dtype) self.vae_decode.eval() self.selected = selected_model log.debug(f'Upscaler load: selected="{self.selected}" encode="{repo_encode}" decode="{repo_decode}"') t0 = time.time() self.vae_encode = self.vae_encode.to(device=devices.device) tensor = convert.to_tensor(img).unsqueeze(0).unsqueeze(2).to(device=devices.device, dtype=devices.dtype) tensor = self.vae_encode.encode(tensor).latent_dist.mode() self.vae_encode.to(device=devices.cpu) self.vae_decode = self.vae_decode.to(device=devices.device) tensor = self.vae_decode.decode(tensor).sample tensor = FN.pixel_shuffle(tensor.movedim(2, 1), upscale_factor=2).movedim(1, 2) # pixel shuffle needs [..., C, H, W] format self.vae_decode.to(device=devices.cpu) upscaled = convert.to_pil(tensor.squeeze().clamp(0.0, 1.0).float().cpu()) t1 = time.time() log.debug(f'Upscale: name="{self.selected}" input={img.size} output={upscaled.size} time={t1 - t0:.2f}') return upscaled