add remote vae

Signed-off-by: Vladimir Mandic <mandic00@live.com>
2025-02-22 12:50:18 -05:00 · 2025-02-22 12:50:18 -05:00 · 1b2d4286b5
parent f8f987fed6
commit 1b2d4286b5
21 changed files with 133 additions and 70 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,14 +1,20 @@
 # Change Log for SD.Next

-## Update for 2025-02-20
+## Update for 2025-02-22

-Quick release refresh:
- remove ui splash screen on auth fail  
- add `--extensions-dir` cli arg and `SD_EXTENSIONSDIR` env variable to specify extensions directory  
- log full path when reading/saving `config.json`  
- log full path to `sdnext.log`  
- log system hostname in `sdnext.log`  
- log extensions path in `sdnext.log`  
+- **Decode**  
+  - Final step of image generate, VAE decode, is by far the most memory intensive operation and can easily result in out-of-memory errors  
+    What can be done? Well, *Huggingface* is now providing *free-of-charge* **remote-VAE-decode** service!  
+  - How to use? Previous *Full quality* option in UI is replace it with VAE type selector: Full, Tiny, Remote  
+    Currently supports SD15, SDXL and FLUX.1 with more models expected in the near future  
+    Availability is limited, so if remote processing fails SD.Next will fallback to using normal VAE decode process  
+- **Other**  
+  - add `--extensions-dir` cli arg and `SD_EXTENSIONSDIR` env variable to specify extensions directory  
+- **Fixes**  
+  - remove ui splash screen on auth fail  
+  - log full config path, full log path, system name, extensions path
+  - zluda update  
+  - fix zluda with pulid  

 ## Update for 2025-02-18

--- a/cli/run-benchmark.py
+++ b/cli/run-benchmark.py
@ -134,7 +134,7 @@ if __name__ == '__main__':
            "sampler_name": args.sampler,
            "width": args.width,
            "height": args.height,
-            "full_quality": not args.taesd,
+            "vae_type": 'Tiny' if args.taesd else 'Full',
            "cfg_scale": 0,
            "batch_size": 1,
            "n_iter": 1,
--- a/modules/control/run.py
+++ b/modules/control/run.py
@ -228,7 +228,7 @@ def control_run(state: str = '',
                steps: int = 20, sampler_index: int = None,
                seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1,
                cfg_scale: float = 6.0, clip_skip: float = 1.0, image_cfg_scale: float = 6.0, diffusers_guidance_rescale: float = 0.7, pag_scale: float = 0.0, pag_adaptive: float = 0.5, cfg_end: float = 1.0,
-                full_quality: bool = True, tiling: bool = False, hidiffusion: bool = False,
+                vae_type: str = 'Full', tiling: bool = False, hidiffusion: bool = False,
                detailer_enabled: bool = True, detailer_prompt: str = '', detailer_negative: str = '', detailer_steps: int = 10, detailer_strength: float = 0.3,
                hdr_mode: int = 0, hdr_brightness: float = 0, hdr_color: float = 0, hdr_sharpen: float = 0, hdr_clamp: bool = False, hdr_boundary: float = 4.0, hdr_threshold: float = 0.95,
                hdr_maximize: bool = False, hdr_max_center: float = 0.6, hdr_max_boundry: float = 1.0, hdr_color_picker: str = None, hdr_tint_ratio: float = 0,
@ -292,7 +292,7 @@ def control_run(state: str = '',
        diffusers_guidance_rescale = diffusers_guidance_rescale,
        pag_scale = pag_scale,
        pag_adaptive = pag_adaptive,
-        full_quality = full_quality,
+        vae_type = vae_type,
        tiling = tiling,
        hidiffusion = hidiffusion,
        # detailer
--- a/modules/images_resize.py
+++ b/modules/images_resize.py
@ -16,9 +16,9 @@ def resize_image(resize_mode: int, im: Union[Image.Image, torch.Tensor], width:
            return im
        else:
            from modules.processing_vae import vae_encode, vae_decode
-            latents = vae_encode(im, shared.sd_model, full_quality=False) # TODO resize image: enable full VAE mode for resize-latent
+            latents = vae_encode(im, shared.sd_model, vae_type='Tiny') # TODO resize image: enable full VAE mode for resize-latent
            latents = selected_upscaler.scaler.upscale(latents, scale, selected_upscaler.name)
-            im = vae_decode(latents, shared.sd_model, output_type='pil', full_quality=False)[0]
+            im = vae_decode(latents, shared.sd_model, output_type='pil', vae_type='Tiny')[0]
            return im

    def resize(im: Union[Image.Image, torch.Tensor], w, h):
--- a/modules/img2img.py
+++ b/modules/img2img.py
@ -139,7 +139,7 @@ def img2img(id_task: str, state: str, mode: int,
            sampler_index,
            mask_blur, mask_alpha,
            inpainting_fill,
-            full_quality, tiling, hidiffusion,
+            vae_type, tiling, hidiffusion,
            detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
            n_iter, batch_size,
            cfg_scale, image_cfg_scale,
@ -241,7 +241,7 @@ def img2img(id_task: str, state: str, mode: int,
        clip_skip=clip_skip,
        width=width,
        height=height,
-        full_quality=full_quality,
+        vae_type=vae_type,
        tiling=tiling,
        hidiffusion=hidiffusion,
        detailer_enabled=detailer_enabled,
--- a/modules/infotext.py
+++ b/modules/infotext.py
@ -105,7 +105,7 @@ def parse(infotext):
        elif val == "False":
            params[key] = False
        elif key == 'VAE' and val == 'TAESD':
-            params["Full quality"] = False
+            params["VAE type"] = 'Tiny'
        elif size is not None:
            params[f"{key}-1"] = int(size.group(1))
            params[f"{key}-2"] = int(size.group(2))
--- a/modules/processing.py
+++ b/modules/processing.py
@ -151,7 +151,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
            sd_models.reload_model_weights()
        if p.override_settings.get('sd_vae', None) is not None:
            if p.override_settings.get('sd_vae', None) == 'TAESD':
-                p.full_quality = False
+                p.vae_type = 'Tiny'
                p.override_settings.pop('sd_vae', None)
        if p.override_settings.get('Hires upscaler', None) is not None:
            p.enable_hr = True
--- a/modules/processing_args.py
+++ b/modules/processing_args.py
@ -79,7 +79,7 @@ def task_specific_kwargs(p, model):
        }
    if model.__class__.__name__ == 'LatentConsistencyModelPipeline' and hasattr(p, 'init_images') and len(p.init_images) > 0:
        p.ops.append('lcm')
-        init_latents = [processing_vae.vae_encode(image, model=shared.sd_model, full_quality=p.full_quality).squeeze(dim=0) for image in p.init_images]
+        init_latents = [processing_vae.vae_encode(image, model=shared.sd_model, vae_type=p.vae_type).squeeze(dim=0) for image in p.init_images]
        init_latent = torch.stack(init_latents, dim=0).to(shared.device)
        init_noise = p.denoising_strength * processing.create_random_tensors(init_latent.shape[1:], seeds=p.all_seeds, subseeds=p.all_subseeds, subseed_strength=p.subseed_strength, p=p)
        init_latent = (1 - p.denoising_strength) * init_latent + init_noise
--- a/modules/processing_class.py
+++ b/modules/processing_class.py
@ -48,7 +48,7 @@ class StableDiffusionProcessing:
                 styles: List[str] = [],
                 # vae
                 tiling: bool = False,
-                 full_quality: bool = True,
+                 vae_type: str = 'Full',
                 # other
                 hidiffusion: bool = False,
                 do_not_reload_embeddings: bool = False,
@ -169,7 +169,7 @@ class StableDiffusionProcessing:
        self.negative_prompt = negative_prompt
        self.styles = styles
        self.tiling = tiling
-        self.full_quality = full_quality
+        self.vae_type = vae_type
        self.hidiffusion = hidiffusion
        self.do_not_reload_embeddings = do_not_reload_embeddings
        self.detailer_enabled = detailer_enabled
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@ -197,10 +197,10 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
        if p.hr_force:
            shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
            if 'Upscale' in shared.sd_model.__class__.__name__ or 'Flux' in shared.sd_model.__class__.__name__ or 'Kandinsky' in shared.sd_model.__class__.__name__:
-                output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
+                output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
            if p.is_control and hasattr(p, 'task_args') and p.task_args.get('image', None) is not None:
                if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
-                    output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
+                    output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
            update_sampler(p, shared.sd_model, second_pass=True)
            orig_denoise = p.denoising_strength
            p.denoising_strength = strength
@ -289,7 +289,7 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
            noise_level = round(350 * p.denoising_strength)
            output_type='latent'
            if 'Upscale' in shared.sd_refiner.__class__.__name__ or 'Flux' in shared.sd_refiner.__class__.__name__ or 'Kandinsky' in shared.sd_refiner.__class__.__name__:
-                image = processing_vae.vae_decode(latents=image, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
+                image = processing_vae.vae_decode(latents=image, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
                p.extra_generation_params['Noise level'] = noise_level
                output_type = 'np'
            update_sampler(p, shared.sd_refiner, second_pass=True)
@ -370,7 +370,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
                    result_batch = processing_vae.vae_decode(
                        latents = output.images[i],
                        model = model,
-                        full_quality = p.full_quality,
+                        vae_type = p.vae_type,
                        width = width,
                        height = height,
                        frames = frames,
@ -381,7 +381,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
                results = processing_vae.vae_decode(
                    latents = output.images,
                    model = model,
-                    full_quality = p.full_quality,
+                    vae_type = p.vae_type,
                    width = width,
                    height = height,
                    frames = frames,
--- a/modules/processing_helpers.py
+++ b/modules/processing_helpers.py
@ -201,7 +201,7 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
    return x


-def decode_first_stage(model, x, full_quality=True):
+def decode_first_stage(model, x):
    if not shared.opts.keep_incomplete and (shared.state.skipped or shared.state.interrupted):
        shared.log.debug(f'Decode VAE: skipped={shared.state.skipped} interrupted={shared.state.interrupted}')
        x_sample = torch.zeros((len(x), 3, x.shape[2] * 8, x.shape[3] * 8), dtype=devices.dtype_vae, device=devices.device)
@ -210,20 +210,14 @@ def decode_first_stage(model, x, full_quality=True):
    shared.state.job = 'VAE'
    with devices.autocast(disable = x.dtype==devices.dtype_vae):
        try:
-            if full_quality:
-                if hasattr(model, 'decode_first_stage'):
-                    # x_sample = model.decode_first_stage(x) * 0.5 + 0.5
-                    x_sample = model.decode_first_stage(x)
-                elif hasattr(model, 'vae'):
-                    x_sample = processing_vae.vae_decode(latents=x, model=model, output_type='np', full_quality=full_quality)
-                else:
-                    x_sample = x
-                    shared.log.error('Decode VAE unknown model')
+            if hasattr(model, 'decode_first_stage'):
+                # x_sample = model.decode_first_stage(x) * 0.5 + 0.5
+                x_sample = model.decode_first_stage(x)
+            elif hasattr(model, 'vae'):
+                x_sample = processing_vae.vae_decode(latents=x, model=model, output_type='np')
            else:
-                from modules import sd_vae_taesd
-                x_sample = torch.zeros((len(x), 3, x.shape[2] * 8, x.shape[3] * 8), dtype=devices.dtype_vae, device=devices.device)
-                for i in range(len(x_sample)):
-                    x_sample[i] = sd_vae_taesd.decode(x[i]) * 0.5 + 0.5
+                x_sample = x
+                shared.log.error('Decode VAE unknown model')
        except Exception as e:
            x_sample = x
            shared.log.error(f'Decode VAE: {e}')
@ -407,7 +401,7 @@ def resize_init_images(p):
 def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler else latent
    if not torch.is_tensor(latents):
        shared.log.warning('Hires: input is not tensor')
-        first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
+        first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
        return first_pass_images

    if (p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0) and hasattr(p, 'init_hr'):
@ -418,7 +412,7 @@ def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler
        resized_image = images.resize_image(p.hr_resize_mode, latents, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler, context=p.hr_resize_context)
        return resized_image

-    first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
+    first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
    resized_images = []
    for img in first_pass_images:
        resized_image = images.resize_image(p.hr_resize_mode, img, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler, context=p.hr_resize_context)
@ -561,7 +555,7 @@ def save_intermediate(p, latents, suffix):
    for i in range(len(latents)):
        from modules.processing import create_infotext
        info=create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, [], iteration=p.iteration, position_in_batch=i)
-        decoded = processing_vae.vae_decode(latents=latents, model=shared.sd_model, output_type='pil', full_quality=p.full_quality, width=p.width, height=p.height)
+        decoded = processing_vae.vae_decode(latents=latents, model=shared.sd_model, output_type='pil', vae_type=p.vae_type, width=p.width, height=p.height)
        for j in range(len(decoded)):
            images.save_image(decoded[j], path=p.outpath_samples, basename="", seed=p.seeds[i], prompt=p.prompts[i], extension=shared.opts.samples_format, info=info, p=p, suffix=suffix)

--- a/modules/processing_info.py
+++ b/modules/processing_info.py
@ -58,7 +58,6 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
        "Batch": f'{p.n_iter}x{p.batch_size}' if p.n_iter > 1 or p.batch_size > 1 else None,
        "Model": None if (not shared.opts.add_model_name_to_info) or (not shared.sd_model.sd_checkpoint_info.model_name) else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', ''),
        "Model hash": getattr(p, 'sd_model_hash', None if (not shared.opts.add_model_hash_to_info) or (not shared.sd_model.sd_model_hash) else shared.sd_model.sd_model_hash),
-        "VAE": (None if not shared.opts.add_model_name_to_info or sd_vae.loaded_vae_file is None else os.path.splitext(os.path.basename(sd_vae.loaded_vae_file))[0]) if p.full_quality else 'TAESD',
        "Refiner prompt": p.refiner_prompt if len(p.refiner_prompt) > 0 else None,
        "Refiner negative": p.refiner_negative if len(p.refiner_negative) > 0 else None,
        "Styles": "; ".join(p.styles) if p.styles is not None and len(p.styles) > 0 else None,
@ -71,6 +70,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
        "Comment": comment,
        "Operations": '; '.join(ops).replace('"', '') if len(p.ops) > 0 else 'none',
    }
+    if p.vae_type == 'Full':
+        args["VAE"] = (None if not shared.opts.add_model_name_to_info or sd_vae.loaded_vae_file is None else os.path.splitext(os.path.basename(sd_vae.loaded_vae_file))[0])
+    elif p.vae_type == 'Tiny':
+        args["VAE"] = 'TAESD'
    if shared.opts.add_model_name_to_info and getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None:
        args["Model"] = shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')
    if shared.opts.add_model_hash_to_info and getattr(shared.sd_model, 'sd_model_hash', None) is not None:
--- a/modules/processing_original.py
+++ b/modules/processing_original.py
@ -49,7 +49,7 @@ def process_original(p: processing.StableDiffusionProcessing):
    c = get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, p.prompts, p.steps * step_multiplier, cached_c)
    with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
        samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
-    x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae), p.full_quality)[0].cpu() for i in range(samples_ddim.size(0))]
+    x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae))[0].cpu() for i in range(samples_ddim.size(0))]
    try:
        for x in x_samples_ddim:
            devices.test_for_nans(x, "vae")
@ -60,7 +60,7 @@ def process_original(p: processing.StableDiffusionProcessing):
            devices.dtype_vae = torch.bfloat16
            vae_file, vae_source = sd_vae.resolve_vae(p.sd_model.sd_model_checkpoint)
            sd_vae.load_vae(p.sd_model, vae_file, vae_source)
-            x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae), p.full_quality)[0].cpu() for i in range(samples_ddim.size(0))]
+            x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae))[0].cpu() for i in range(samples_ddim.size(0))]
            for x in x_samples_ddim:
                devices.test_for_nans(x, "vae")
        else:
@ -90,7 +90,7 @@ def sample_txt2img(p: processing.StableDiffusionProcessingTxt2Img, conditioning,
        target_height = p.hr_upscale_to_y
        decoded_samples = None
        if shared.opts.samples_save and shared.opts.save_images_before_highres_fix and not p.do_not_save_samples:
-            decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae), p.full_quality)
+            decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae))
            decoded_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
            for i, x_sample in enumerate(decoded_samples):
                x_sample = validate_sample(x_sample)
@ -107,13 +107,13 @@ def sample_txt2img(p: processing.StableDiffusionProcessingTxt2Img, conditioning,
            shared.state.job = 'Upscale'
            samples = images.resize_image(1, samples, target_width, target_height, upscaler_name=p.hr_upscaler)
            if getattr(p, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) < 1.0:
-                image_conditioning = img2img_image_conditioning(p, decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae), p.full_quality), samples)
+                image_conditioning = img2img_image_conditioning(p, decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae)), samples)
            else:
                image_conditioning = txt2img_image_conditioning(p, samples.to(dtype=devices.dtype_vae))
        else:
            shared.state.job = 'Upscale'
            if decoded_samples is None:
-                decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae), p.full_quality)
+                decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae))
                decoded_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
            batch_images = []
            for _i, x_sample in enumerate(decoded_samples):
--- a/modules/processing_vae.py
+++ b/modules/processing_vae.py
@ -17,9 +17,9 @@ def create_latents(image, p, dtype=None, device=None):
    if image is None:
        return image
    elif isinstance(image, Image.Image):
-        latents = vae_encode(image, model=shared.sd_model, full_quality=p.full_quality)
+        latents = vae_encode(image, model=shared.sd_model, vae_type=p.vae_type)
    elif isinstance(image, list):
-        latents = [vae_encode(i, model=shared.sd_model, full_quality=p.full_quality).squeeze(dim=0) for i in image]
+        latents = [vae_encode(i, model=shared.sd_model, vae_type=p.vae_type).squeeze(dim=0) for i in image]
        latents = torch.stack(latents, dim=0).to(shared.device)
    else:
        shared.log.warning(f'Latents: input type: {type(image)} {image}')
@ -230,7 +230,7 @@ def taesd_vae_encode(image):
    return encoded


-def vae_decode(latents, model, output_type='np', full_quality=True, width=None, height=None, frames=None):
+def vae_decode(latents, model, output_type='np', vae_type='Full', width=None, height=None, frames=None):
    t0 = time.time()
    model = model or shared.sd_model
    if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
@ -238,6 +238,15 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
    if latents is None or not torch.is_tensor(latents): # already decoded
        return latents
    prev_job = shared.state.job
+
+    if vae_type == 'Remote':
+        shared.state.job = 'Remote VAE'
+        from modules.sd_vae_remote import remote_decode
+        images = remote_decode(latents=latents, width=width, height=height)
+        shared.state.job = prev_job
+        if images is not None and len(images) > 0:
+            return images
+
    shared.state.job = 'VAE'
    if latents.shape[0] == 0:
        shared.log.error(f'VAE nothing to decode: {latents.shape}')
@ -261,7 +270,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,

    if latents.shape[-1] <= 4: # not a latent, likely an image
        decoded = latents.float().cpu().numpy()
-    elif full_quality and hasattr(model, "vae"):
+    elif vae_type == 'Full' and hasattr(model, "vae"):
        decoded = full_vae_decode(latents=latents, model=model)
    elif hasattr(model, "vqgan"):
        decoded = full_vqgan_decode(latents=latents, model=model)
@ -296,7 +305,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
    return imgs


-def vae_encode(image, model, full_quality=True): # pylint: disable=unused-variable
+def vae_encode(image, model, vae_type='Full'): # pylint: disable=unused-variable
    if shared.state.interrupted or shared.state.skipped:
        return []
    if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
@ -305,7 +314,7 @@ def vae_encode(image, model, full_quality=True): # pylint: disable=unused-variab
        shared.log.error('VAE not found in model')
        return []
    tensor = TF.to_tensor(image.convert("RGB")).unsqueeze(0).to(devices.device, devices.dtype_vae)
-    if full_quality:
+    if vae_type == 'Full':
        tensor = tensor * 2 - 1
        latents = full_vae_encode(image=tensor, model=shared.sd_model)
    else:
@ -321,7 +330,7 @@ def reprocess(gallery):
    if latent is None or gallery is None:
        return None
    shared.log.info(f'Reprocessing: latent={latent.shape}')
-    reprocessed = vae_decode(latent, shared.sd_model, output_type='pil', full_quality=True)
+    reprocessed = vae_decode(latent, shared.sd_model, output_type='pil')
    outputs = []
    for i0, i1 in zip(gallery, reprocessed):
        if isinstance(i1, np.ndarray):
--- a/modules/sd_vae_remote.py
+++ b/modules/sd_vae_remote.py
@ -0,0 +1,50 @@
+import io
+import time
+import base64
+import torch
+import requests
+from PIL import Image
+from safetensors.torch import _tobytes
+
+
+hf_endpoints = {
+    'sd': 'https://lqmfdhmzmy4dw51z.us-east-1.aws.endpoints.huggingface.cloud',
+    'sdxl': 'https://m5fxqwyk0r3uu79o.us-east-1.aws.endpoints.huggingface.cloud',
+    'f1': 'https://zy1z7fzxpgtltg06.us-east-1.aws.endpoints.huggingface.cloud',
+}
+
+
+def remote_decode(latents: torch.Tensor, width: int = 0, height: int = 0, model_type: str = None) -> Image.Image:
+    from modules import devices, shared, errors
+    images = []
+    model_type = model_type or shared.sd_model_type
+    url = hf_endpoints.get(model_type, None)
+    if url is None:
+        shared.log.error(f'Decode: type="remote" type={model_type} unsuppported')
+        return images
+    t0 = time.time()
+    latents = latents.unsqueeze(0) if len(latents.shape) == 3 else latents
+    for i in range(latents.shape[0]):
+        try:
+            latent = latents[i].detach().clone().to(device=devices.cpu, dtype=devices.dtype).unsqueeze(0)
+            encoded = base64.b64encode(_tobytes(latent, "inputs")).decode("utf-8")
+            params = {"shape": list(latent.shape), "dtype": str(latent.dtype).split(".", maxsplit=1)[-1]}
+            if (model_type == 'f1') and (width > 0) and (height > 0):
+                params['width'] = width
+                params['height'] = height
+            response = requests.post(
+                url=url,
+                json={"inputs": encoded, "parameters": params},
+                headers={"Content-Type": "application/json", "Accept": "image/jpeg"},
+                timeout=60,
+            )
+            if not response.ok:
+                shared.log.error(f'Decode: type="remote" model={model_type} code={response.status_code} {response.json()}')
+            else:
+                images.append(Image.open(io.BytesIO(response.content)))
+        except Exception as e:
+            shared.log.error(f'Decode: type="remote" model={model_type} {e}')
+            errors.display(e, 'VAE')
+    t1 = time.time()
+    shared.log.debug(f'Decode: type="remote" model={model_type} args={params} images={images} time={t1-t0:.3f}s')
+    return images
--- a/modules/txt2img.py
+++ b/modules/txt2img.py
@ -11,7 +11,7 @@ debug('Trace: PROCESS')
 def txt2img(id_task, state,
            prompt, negative_prompt, prompt_styles,
            steps, sampler_index, hr_sampler_index,
-            full_quality, tiling, hidiffusion,
+            vae_type, tiling, hidiffusion,
            detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
            n_iter, batch_size,
            cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end,
@ -64,7 +64,7 @@ def txt2img(id_task, state,
        clip_skip=clip_skip,
        width=width,
        height=height,
-        full_quality=full_quality,
+        vae_type=vae_type,
        detailer_enabled=detailer_enabled,
        detailer_prompt=detailer_prompt,
        detailer_negative=detailer_negative,
--- a/modules/ui_control.py
+++ b/modules/ui_control.py
@ -161,7 +161,7 @@ def create_ui(_blocks: gr.Blocks=None):

                mask_controls = masking.create_segment_ui()

-                full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('control')
+                vae_type, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('control')
                hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('control')

                with gr.Accordion(open=False, label="Video", elem_id="control_video", elem_classes=["small-accordion"]):
@ -561,7 +561,7 @@ def create_ui(_blocks: gr.Blocks=None):
                prompt, negative, styles,
                steps, sampler_index,
                seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w,
-                cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end, full_quality, tiling, hidiffusion,
+                cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end, vae_type, tiling, hidiffusion,
                detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
                hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio,
                resize_mode_before, resize_name_before, resize_context_before, width_before, height_before, scale_by_before, selected_scale_tab_before,
@ -646,7 +646,7 @@ def create_ui(_blocks: gr.Blocks=None):
                (image_cfg_scale, "Image CFG scale"),
                (image_cfg_scale, "Hires CFG scale"),
                (guidance_rescale, "CFG rescale"),
-                (full_quality, "Full quality"),
+                (vae_type, "VAE type"),
                (tiling, "Tiling"),
                (hidiffusion, "HiDiffusion"),
                # detailer
--- a/modules/ui_img2img.py
+++ b/modules/ui_img2img.py
@ -131,7 +131,7 @@ def create_ui():
                            denoising_strength = gr.Slider(minimum=0.0, maximum=0.99, step=0.01, label='Denoising strength', value=0.30, elem_id="img2img_denoising_strength")
                            refiner_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Denoise start', value=0.0, elem_id="img2img_refiner_start")

-                    full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('img2img')
+                    vae_type, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('img2img')
                    hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('img2img')
                    enable_hr, hr_sampler_index, hr_denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, hr_refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img')
                    detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength = shared.yolo.ui('img2img')
@ -175,7 +175,7 @@ def create_ui():
                sampler_index,
                mask_blur, mask_alpha,
                inpainting_fill,
-                full_quality, tiling, hidiffusion,
+                vae_type, tiling, hidiffusion,
                detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
                batch_count, batch_size,
                cfg_scale, image_cfg_scale,
@ -261,7 +261,7 @@ def create_ui():
                (image_cfg_scale, "Hires CFG scale"),
                (clip_skip, "Clip skip"),
                (diffusers_guidance_rescale, "CFG rescale"),
-                (full_quality, "Full quality"),
+                (vae_type, "VAE type"),
                (tiling, "Tiling"),
                (hidiffusion, "HiDiffusion"),
                # detailer
--- a/modules/ui_sections.py
+++ b/modules/ui_sections.py
@ -170,8 +170,9 @@ def create_advanced_inputs(tab, base=True):
    with gr.Accordion(open=False, label="Advanced", elem_id=f"{tab}_advanced", elem_classes=["small-accordion"]):
        with gr.Group():
            with gr.Row(elem_id=f"{tab}_advanced_options"):
-                full_quality = gr.Checkbox(label='Full quality', value=True, elem_id=f"{tab}_full_quality")
-                tiling = gr.Checkbox(label='Tiling', value=False, elem_id=f"{tab}_tiling")
+                vae_type = gr.Dropdown(label='VAE type', choices=['Full', 'Tiny', 'Remote'], value='Full', elem_id=f"{tab}_vae_type")
+            with gr.Row(elem_id=f"{tab}_advanced_options"):
+                tiling = gr.Checkbox(label='Texture tiling', value=False, elem_id=f"{tab}_tiling")
                hidiffusion = gr.Checkbox(label='HiDiffusion', value=False, elem_id=f"{tab}_hidiffusion")
            if base:
                cfg_scale, cfg_end = create_cfg_inputs(tab)
@ -185,7 +186,7 @@ def create_advanced_inputs(tab, base=True):
                diffusers_pag_adaptive = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Adaptive scaling', value=0.5, elem_id=f"{tab}_pag_adaptive", visible=shared.native)
            with gr.Row():
                clip_skip = gr.Slider(label='CLIP skip', value=1, minimum=0, maximum=12, step=0.1, elem_id=f"{tab}_clip_skip", interactive=True)
-    return full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, diffusers_pag_scale, diffusers_pag_adaptive, cfg_end
+    return vae_type, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, diffusers_pag_scale, diffusers_pag_adaptive, cfg_end


 def create_correction_inputs(tab):
--- a/modules/ui_txt2img.py
+++ b/modules/ui_txt2img.py
@ -44,7 +44,7 @@ def create_ui():
                    with gr.Accordion(open=False, label="Samplers", elem_classes=["small-accordion"], elem_id="txt2img_sampler_group"):
                        ui_sections.create_sampler_options('txt2img')
                    seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('txt2img')
-                    full_quality, tiling, hidiffusion, _cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, _cfg_end = ui_sections.create_advanced_inputs('txt2img', base=False)
+                    vae_type, tiling, hidiffusion, _cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, _cfg_end = ui_sections.create_advanced_inputs('txt2img', base=False)
                    hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('txt2img')
                    enable_hr, hr_sampler_index, denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img')
                    detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength  = shared.yolo.ui('txt2img')
@ -64,7 +64,7 @@ def create_ui():
                dummy_component, state,
                txt2img_prompt, txt2img_negative_prompt, txt2img_prompt_styles,
                steps, sampler_index, hr_sampler_index,
-                full_quality, tiling, hidiffusion,
+                vae_type, tiling, hidiffusion,
                detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
                batch_count, batch_size,
                cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end,
@ -122,7 +122,7 @@ def create_ui():
                (image_cfg_scale, "Image CFG scale"),
                (image_cfg_scale, "Hires CFG scale"),
                (diffusers_guidance_rescale, "CFG rescale"),
-                (full_quality, "Full quality"),
+                (vae_type, "VAE type"),
                (tiling, "Tiling"),
                (hidiffusion, "HiDiffusion"),
                # detailer
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit e45bfe41f0e494d6d5145443f966ba47560702f5
+Subproject commit 0ef2c8d1d85ea6fb433b7ff8f8e22d295de082c0