add remote vae

Signed-off-by: Vladimir Mandic <mandic00@live.com>
pull/3776/head
Vladimir Mandic 2025-02-22 12:50:18 -05:00
parent f8f987fed6
commit 1b2d4286b5
21 changed files with 133 additions and 70 deletions

View File

@ -1,14 +1,20 @@
# Change Log for SD.Next
## Update for 2025-02-20
## Update for 2025-02-22
Quick release refresh:
- remove ui splash screen on auth fail
- add `--extensions-dir` cli arg and `SD_EXTENSIONSDIR` env variable to specify extensions directory
- log full path when reading/saving `config.json`
- log full path to `sdnext.log`
- log system hostname in `sdnext.log`
- log extensions path in `sdnext.log`
- **Decode**
- Final step of image generate, VAE decode, is by far the most memory intensive operation and can easily result in out-of-memory errors
What can be done? Well, *Huggingface* is now providing *free-of-charge* **remote-VAE-decode** service!
- How to use? Previous *Full quality* option in UI is replace it with VAE type selector: Full, Tiny, Remote
Currently supports SD15, SDXL and FLUX.1 with more models expected in the near future
Availability is limited, so if remote processing fails SD.Next will fallback to using normal VAE decode process
- **Other**
- add `--extensions-dir` cli arg and `SD_EXTENSIONSDIR` env variable to specify extensions directory
- **Fixes**
- remove ui splash screen on auth fail
- log full config path, full log path, system name, extensions path
- zluda update
- fix zluda with pulid
## Update for 2025-02-18

View File

@ -134,7 +134,7 @@ if __name__ == '__main__':
"sampler_name": args.sampler,
"width": args.width,
"height": args.height,
"full_quality": not args.taesd,
"vae_type": 'Tiny' if args.taesd else 'Full',
"cfg_scale": 0,
"batch_size": 1,
"n_iter": 1,

View File

@ -228,7 +228,7 @@ def control_run(state: str = '',
steps: int = 20, sampler_index: int = None,
seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1,
cfg_scale: float = 6.0, clip_skip: float = 1.0, image_cfg_scale: float = 6.0, diffusers_guidance_rescale: float = 0.7, pag_scale: float = 0.0, pag_adaptive: float = 0.5, cfg_end: float = 1.0,
full_quality: bool = True, tiling: bool = False, hidiffusion: bool = False,
vae_type: str = 'Full', tiling: bool = False, hidiffusion: bool = False,
detailer_enabled: bool = True, detailer_prompt: str = '', detailer_negative: str = '', detailer_steps: int = 10, detailer_strength: float = 0.3,
hdr_mode: int = 0, hdr_brightness: float = 0, hdr_color: float = 0, hdr_sharpen: float = 0, hdr_clamp: bool = False, hdr_boundary: float = 4.0, hdr_threshold: float = 0.95,
hdr_maximize: bool = False, hdr_max_center: float = 0.6, hdr_max_boundry: float = 1.0, hdr_color_picker: str = None, hdr_tint_ratio: float = 0,
@ -292,7 +292,7 @@ def control_run(state: str = '',
diffusers_guidance_rescale = diffusers_guidance_rescale,
pag_scale = pag_scale,
pag_adaptive = pag_adaptive,
full_quality = full_quality,
vae_type = vae_type,
tiling = tiling,
hidiffusion = hidiffusion,
# detailer

View File

@ -16,9 +16,9 @@ def resize_image(resize_mode: int, im: Union[Image.Image, torch.Tensor], width:
return im
else:
from modules.processing_vae import vae_encode, vae_decode
latents = vae_encode(im, shared.sd_model, full_quality=False) # TODO resize image: enable full VAE mode for resize-latent
latents = vae_encode(im, shared.sd_model, vae_type='Tiny') # TODO resize image: enable full VAE mode for resize-latent
latents = selected_upscaler.scaler.upscale(latents, scale, selected_upscaler.name)
im = vae_decode(latents, shared.sd_model, output_type='pil', full_quality=False)[0]
im = vae_decode(latents, shared.sd_model, output_type='pil', vae_type='Tiny')[0]
return im
def resize(im: Union[Image.Image, torch.Tensor], w, h):

View File

@ -139,7 +139,7 @@ def img2img(id_task: str, state: str, mode: int,
sampler_index,
mask_blur, mask_alpha,
inpainting_fill,
full_quality, tiling, hidiffusion,
vae_type, tiling, hidiffusion,
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
n_iter, batch_size,
cfg_scale, image_cfg_scale,
@ -241,7 +241,7 @@ def img2img(id_task: str, state: str, mode: int,
clip_skip=clip_skip,
width=width,
height=height,
full_quality=full_quality,
vae_type=vae_type,
tiling=tiling,
hidiffusion=hidiffusion,
detailer_enabled=detailer_enabled,

View File

@ -105,7 +105,7 @@ def parse(infotext):
elif val == "False":
params[key] = False
elif key == 'VAE' and val == 'TAESD':
params["Full quality"] = False
params["VAE type"] = 'Tiny'
elif size is not None:
params[f"{key}-1"] = int(size.group(1))
params[f"{key}-2"] = int(size.group(2))

View File

@ -151,7 +151,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
sd_models.reload_model_weights()
if p.override_settings.get('sd_vae', None) is not None:
if p.override_settings.get('sd_vae', None) == 'TAESD':
p.full_quality = False
p.vae_type = 'Tiny'
p.override_settings.pop('sd_vae', None)
if p.override_settings.get('Hires upscaler', None) is not None:
p.enable_hr = True

View File

@ -79,7 +79,7 @@ def task_specific_kwargs(p, model):
}
if model.__class__.__name__ == 'LatentConsistencyModelPipeline' and hasattr(p, 'init_images') and len(p.init_images) > 0:
p.ops.append('lcm')
init_latents = [processing_vae.vae_encode(image, model=shared.sd_model, full_quality=p.full_quality).squeeze(dim=0) for image in p.init_images]
init_latents = [processing_vae.vae_encode(image, model=shared.sd_model, vae_type=p.vae_type).squeeze(dim=0) for image in p.init_images]
init_latent = torch.stack(init_latents, dim=0).to(shared.device)
init_noise = p.denoising_strength * processing.create_random_tensors(init_latent.shape[1:], seeds=p.all_seeds, subseeds=p.all_subseeds, subseed_strength=p.subseed_strength, p=p)
init_latent = (1 - p.denoising_strength) * init_latent + init_noise

View File

@ -48,7 +48,7 @@ class StableDiffusionProcessing:
styles: List[str] = [],
# vae
tiling: bool = False,
full_quality: bool = True,
vae_type: str = 'Full',
# other
hidiffusion: bool = False,
do_not_reload_embeddings: bool = False,
@ -169,7 +169,7 @@ class StableDiffusionProcessing:
self.negative_prompt = negative_prompt
self.styles = styles
self.tiling = tiling
self.full_quality = full_quality
self.vae_type = vae_type
self.hidiffusion = hidiffusion
self.do_not_reload_embeddings = do_not_reload_embeddings
self.detailer_enabled = detailer_enabled

View File

@ -197,10 +197,10 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
if p.hr_force:
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
if 'Upscale' in shared.sd_model.__class__.__name__ or 'Flux' in shared.sd_model.__class__.__name__ or 'Kandinsky' in shared.sd_model.__class__.__name__:
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
if p.is_control and hasattr(p, 'task_args') and p.task_args.get('image', None) is not None:
if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
update_sampler(p, shared.sd_model, second_pass=True)
orig_denoise = p.denoising_strength
p.denoising_strength = strength
@ -289,7 +289,7 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
noise_level = round(350 * p.denoising_strength)
output_type='latent'
if 'Upscale' in shared.sd_refiner.__class__.__name__ or 'Flux' in shared.sd_refiner.__class__.__name__ or 'Kandinsky' in shared.sd_refiner.__class__.__name__:
image = processing_vae.vae_decode(latents=image, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
image = processing_vae.vae_decode(latents=image, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
p.extra_generation_params['Noise level'] = noise_level
output_type = 'np'
update_sampler(p, shared.sd_refiner, second_pass=True)
@ -370,7 +370,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
result_batch = processing_vae.vae_decode(
latents = output.images[i],
model = model,
full_quality = p.full_quality,
vae_type = p.vae_type,
width = width,
height = height,
frames = frames,
@ -381,7 +381,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
results = processing_vae.vae_decode(
latents = output.images,
model = model,
full_quality = p.full_quality,
vae_type = p.vae_type,
width = width,
height = height,
frames = frames,

View File

@ -201,7 +201,7 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
return x
def decode_first_stage(model, x, full_quality=True):
def decode_first_stage(model, x):
if not shared.opts.keep_incomplete and (shared.state.skipped or shared.state.interrupted):
shared.log.debug(f'Decode VAE: skipped={shared.state.skipped} interrupted={shared.state.interrupted}')
x_sample = torch.zeros((len(x), 3, x.shape[2] * 8, x.shape[3] * 8), dtype=devices.dtype_vae, device=devices.device)
@ -210,20 +210,14 @@ def decode_first_stage(model, x, full_quality=True):
shared.state.job = 'VAE'
with devices.autocast(disable = x.dtype==devices.dtype_vae):
try:
if full_quality:
if hasattr(model, 'decode_first_stage'):
# x_sample = model.decode_first_stage(x) * 0.5 + 0.5
x_sample = model.decode_first_stage(x)
elif hasattr(model, 'vae'):
x_sample = processing_vae.vae_decode(latents=x, model=model, output_type='np', full_quality=full_quality)
else:
x_sample = x
shared.log.error('Decode VAE unknown model')
if hasattr(model, 'decode_first_stage'):
# x_sample = model.decode_first_stage(x) * 0.5 + 0.5
x_sample = model.decode_first_stage(x)
elif hasattr(model, 'vae'):
x_sample = processing_vae.vae_decode(latents=x, model=model, output_type='np')
else:
from modules import sd_vae_taesd
x_sample = torch.zeros((len(x), 3, x.shape[2] * 8, x.shape[3] * 8), dtype=devices.dtype_vae, device=devices.device)
for i in range(len(x_sample)):
x_sample[i] = sd_vae_taesd.decode(x[i]) * 0.5 + 0.5
x_sample = x
shared.log.error('Decode VAE unknown model')
except Exception as e:
x_sample = x
shared.log.error(f'Decode VAE: {e}')
@ -407,7 +401,7 @@ def resize_init_images(p):
def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler else latent
if not torch.is_tensor(latents):
shared.log.warning('Hires: input is not tensor')
first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
return first_pass_images
if (p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0) and hasattr(p, 'init_hr'):
@ -418,7 +412,7 @@ def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler
resized_image = images.resize_image(p.hr_resize_mode, latents, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler, context=p.hr_resize_context)
return resized_image
first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
resized_images = []
for img in first_pass_images:
resized_image = images.resize_image(p.hr_resize_mode, img, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler, context=p.hr_resize_context)
@ -561,7 +555,7 @@ def save_intermediate(p, latents, suffix):
for i in range(len(latents)):
from modules.processing import create_infotext
info=create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, [], iteration=p.iteration, position_in_batch=i)
decoded = processing_vae.vae_decode(latents=latents, model=shared.sd_model, output_type='pil', full_quality=p.full_quality, width=p.width, height=p.height)
decoded = processing_vae.vae_decode(latents=latents, model=shared.sd_model, output_type='pil', vae_type=p.vae_type, width=p.width, height=p.height)
for j in range(len(decoded)):
images.save_image(decoded[j], path=p.outpath_samples, basename="", seed=p.seeds[i], prompt=p.prompts[i], extension=shared.opts.samples_format, info=info, p=p, suffix=suffix)

View File

@ -58,7 +58,6 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
"Batch": f'{p.n_iter}x{p.batch_size}' if p.n_iter > 1 or p.batch_size > 1 else None,
"Model": None if (not shared.opts.add_model_name_to_info) or (not shared.sd_model.sd_checkpoint_info.model_name) else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', ''),
"Model hash": getattr(p, 'sd_model_hash', None if (not shared.opts.add_model_hash_to_info) or (not shared.sd_model.sd_model_hash) else shared.sd_model.sd_model_hash),
"VAE": (None if not shared.opts.add_model_name_to_info or sd_vae.loaded_vae_file is None else os.path.splitext(os.path.basename(sd_vae.loaded_vae_file))[0]) if p.full_quality else 'TAESD',
"Refiner prompt": p.refiner_prompt if len(p.refiner_prompt) > 0 else None,
"Refiner negative": p.refiner_negative if len(p.refiner_negative) > 0 else None,
"Styles": "; ".join(p.styles) if p.styles is not None and len(p.styles) > 0 else None,
@ -71,6 +70,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
"Comment": comment,
"Operations": '; '.join(ops).replace('"', '') if len(p.ops) > 0 else 'none',
}
if p.vae_type == 'Full':
args["VAE"] = (None if not shared.opts.add_model_name_to_info or sd_vae.loaded_vae_file is None else os.path.splitext(os.path.basename(sd_vae.loaded_vae_file))[0])
elif p.vae_type == 'Tiny':
args["VAE"] = 'TAESD'
if shared.opts.add_model_name_to_info and getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None:
args["Model"] = shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')
if shared.opts.add_model_hash_to_info and getattr(shared.sd_model, 'sd_model_hash', None) is not None:

View File

@ -49,7 +49,7 @@ def process_original(p: processing.StableDiffusionProcessing):
c = get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, p.prompts, p.steps * step_multiplier, cached_c)
with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae), p.full_quality)[0].cpu() for i in range(samples_ddim.size(0))]
x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae))[0].cpu() for i in range(samples_ddim.size(0))]
try:
for x in x_samples_ddim:
devices.test_for_nans(x, "vae")
@ -60,7 +60,7 @@ def process_original(p: processing.StableDiffusionProcessing):
devices.dtype_vae = torch.bfloat16
vae_file, vae_source = sd_vae.resolve_vae(p.sd_model.sd_model_checkpoint)
sd_vae.load_vae(p.sd_model, vae_file, vae_source)
x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae), p.full_quality)[0].cpu() for i in range(samples_ddim.size(0))]
x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae))[0].cpu() for i in range(samples_ddim.size(0))]
for x in x_samples_ddim:
devices.test_for_nans(x, "vae")
else:
@ -90,7 +90,7 @@ def sample_txt2img(p: processing.StableDiffusionProcessingTxt2Img, conditioning,
target_height = p.hr_upscale_to_y
decoded_samples = None
if shared.opts.samples_save and shared.opts.save_images_before_highres_fix and not p.do_not_save_samples:
decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae), p.full_quality)
decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae))
decoded_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
for i, x_sample in enumerate(decoded_samples):
x_sample = validate_sample(x_sample)
@ -107,13 +107,13 @@ def sample_txt2img(p: processing.StableDiffusionProcessingTxt2Img, conditioning,
shared.state.job = 'Upscale'
samples = images.resize_image(1, samples, target_width, target_height, upscaler_name=p.hr_upscaler)
if getattr(p, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) < 1.0:
image_conditioning = img2img_image_conditioning(p, decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae), p.full_quality), samples)
image_conditioning = img2img_image_conditioning(p, decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae)), samples)
else:
image_conditioning = txt2img_image_conditioning(p, samples.to(dtype=devices.dtype_vae))
else:
shared.state.job = 'Upscale'
if decoded_samples is None:
decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae), p.full_quality)
decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae))
decoded_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
batch_images = []
for _i, x_sample in enumerate(decoded_samples):

View File

@ -17,9 +17,9 @@ def create_latents(image, p, dtype=None, device=None):
if image is None:
return image
elif isinstance(image, Image.Image):
latents = vae_encode(image, model=shared.sd_model, full_quality=p.full_quality)
latents = vae_encode(image, model=shared.sd_model, vae_type=p.vae_type)
elif isinstance(image, list):
latents = [vae_encode(i, model=shared.sd_model, full_quality=p.full_quality).squeeze(dim=0) for i in image]
latents = [vae_encode(i, model=shared.sd_model, vae_type=p.vae_type).squeeze(dim=0) for i in image]
latents = torch.stack(latents, dim=0).to(shared.device)
else:
shared.log.warning(f'Latents: input type: {type(image)} {image}')
@ -230,7 +230,7 @@ def taesd_vae_encode(image):
return encoded
def vae_decode(latents, model, output_type='np', full_quality=True, width=None, height=None, frames=None):
def vae_decode(latents, model, output_type='np', vae_type='Full', width=None, height=None, frames=None):
t0 = time.time()
model = model or shared.sd_model
if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
@ -238,6 +238,15 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
if latents is None or not torch.is_tensor(latents): # already decoded
return latents
prev_job = shared.state.job
if vae_type == 'Remote':
shared.state.job = 'Remote VAE'
from modules.sd_vae_remote import remote_decode
images = remote_decode(latents=latents, width=width, height=height)
shared.state.job = prev_job
if images is not None and len(images) > 0:
return images
shared.state.job = 'VAE'
if latents.shape[0] == 0:
shared.log.error(f'VAE nothing to decode: {latents.shape}')
@ -261,7 +270,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
if latents.shape[-1] <= 4: # not a latent, likely an image
decoded = latents.float().cpu().numpy()
elif full_quality and hasattr(model, "vae"):
elif vae_type == 'Full' and hasattr(model, "vae"):
decoded = full_vae_decode(latents=latents, model=model)
elif hasattr(model, "vqgan"):
decoded = full_vqgan_decode(latents=latents, model=model)
@ -296,7 +305,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
return imgs
def vae_encode(image, model, full_quality=True): # pylint: disable=unused-variable
def vae_encode(image, model, vae_type='Full'): # pylint: disable=unused-variable
if shared.state.interrupted or shared.state.skipped:
return []
if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
@ -305,7 +314,7 @@ def vae_encode(image, model, full_quality=True): # pylint: disable=unused-variab
shared.log.error('VAE not found in model')
return []
tensor = TF.to_tensor(image.convert("RGB")).unsqueeze(0).to(devices.device, devices.dtype_vae)
if full_quality:
if vae_type == 'Full':
tensor = tensor * 2 - 1
latents = full_vae_encode(image=tensor, model=shared.sd_model)
else:
@ -321,7 +330,7 @@ def reprocess(gallery):
if latent is None or gallery is None:
return None
shared.log.info(f'Reprocessing: latent={latent.shape}')
reprocessed = vae_decode(latent, shared.sd_model, output_type='pil', full_quality=True)
reprocessed = vae_decode(latent, shared.sd_model, output_type='pil')
outputs = []
for i0, i1 in zip(gallery, reprocessed):
if isinstance(i1, np.ndarray):

50
modules/sd_vae_remote.py Normal file
View File

@ -0,0 +1,50 @@
import io
import time
import base64
import torch
import requests
from PIL import Image
from safetensors.torch import _tobytes
hf_endpoints = {
'sd': 'https://lqmfdhmzmy4dw51z.us-east-1.aws.endpoints.huggingface.cloud',
'sdxl': 'https://m5fxqwyk0r3uu79o.us-east-1.aws.endpoints.huggingface.cloud',
'f1': 'https://zy1z7fzxpgtltg06.us-east-1.aws.endpoints.huggingface.cloud',
}
def remote_decode(latents: torch.Tensor, width: int = 0, height: int = 0, model_type: str = None) -> Image.Image:
from modules import devices, shared, errors
images = []
model_type = model_type or shared.sd_model_type
url = hf_endpoints.get(model_type, None)
if url is None:
shared.log.error(f'Decode: type="remote" type={model_type} unsuppported')
return images
t0 = time.time()
latents = latents.unsqueeze(0) if len(latents.shape) == 3 else latents
for i in range(latents.shape[0]):
try:
latent = latents[i].detach().clone().to(device=devices.cpu, dtype=devices.dtype).unsqueeze(0)
encoded = base64.b64encode(_tobytes(latent, "inputs")).decode("utf-8")
params = {"shape": list(latent.shape), "dtype": str(latent.dtype).split(".", maxsplit=1)[-1]}
if (model_type == 'f1') and (width > 0) and (height > 0):
params['width'] = width
params['height'] = height
response = requests.post(
url=url,
json={"inputs": encoded, "parameters": params},
headers={"Content-Type": "application/json", "Accept": "image/jpeg"},
timeout=60,
)
if not response.ok:
shared.log.error(f'Decode: type="remote" model={model_type} code={response.status_code} {response.json()}')
else:
images.append(Image.open(io.BytesIO(response.content)))
except Exception as e:
shared.log.error(f'Decode: type="remote" model={model_type} {e}')
errors.display(e, 'VAE')
t1 = time.time()
shared.log.debug(f'Decode: type="remote" model={model_type} args={params} images={images} time={t1-t0:.3f}s')
return images

View File

@ -11,7 +11,7 @@ debug('Trace: PROCESS')
def txt2img(id_task, state,
prompt, negative_prompt, prompt_styles,
steps, sampler_index, hr_sampler_index,
full_quality, tiling, hidiffusion,
vae_type, tiling, hidiffusion,
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
n_iter, batch_size,
cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end,
@ -64,7 +64,7 @@ def txt2img(id_task, state,
clip_skip=clip_skip,
width=width,
height=height,
full_quality=full_quality,
vae_type=vae_type,
detailer_enabled=detailer_enabled,
detailer_prompt=detailer_prompt,
detailer_negative=detailer_negative,

View File

@ -161,7 +161,7 @@ def create_ui(_blocks: gr.Blocks=None):
mask_controls = masking.create_segment_ui()
full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('control')
vae_type, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('control')
hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('control')
with gr.Accordion(open=False, label="Video", elem_id="control_video", elem_classes=["small-accordion"]):
@ -561,7 +561,7 @@ def create_ui(_blocks: gr.Blocks=None):
prompt, negative, styles,
steps, sampler_index,
seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w,
cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end, full_quality, tiling, hidiffusion,
cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end, vae_type, tiling, hidiffusion,
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio,
resize_mode_before, resize_name_before, resize_context_before, width_before, height_before, scale_by_before, selected_scale_tab_before,
@ -646,7 +646,7 @@ def create_ui(_blocks: gr.Blocks=None):
(image_cfg_scale, "Image CFG scale"),
(image_cfg_scale, "Hires CFG scale"),
(guidance_rescale, "CFG rescale"),
(full_quality, "Full quality"),
(vae_type, "VAE type"),
(tiling, "Tiling"),
(hidiffusion, "HiDiffusion"),
# detailer

View File

@ -131,7 +131,7 @@ def create_ui():
denoising_strength = gr.Slider(minimum=0.0, maximum=0.99, step=0.01, label='Denoising strength', value=0.30, elem_id="img2img_denoising_strength")
refiner_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Denoise start', value=0.0, elem_id="img2img_refiner_start")
full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('img2img')
vae_type, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('img2img')
hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('img2img')
enable_hr, hr_sampler_index, hr_denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, hr_refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img')
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength = shared.yolo.ui('img2img')
@ -175,7 +175,7 @@ def create_ui():
sampler_index,
mask_blur, mask_alpha,
inpainting_fill,
full_quality, tiling, hidiffusion,
vae_type, tiling, hidiffusion,
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
batch_count, batch_size,
cfg_scale, image_cfg_scale,
@ -261,7 +261,7 @@ def create_ui():
(image_cfg_scale, "Hires CFG scale"),
(clip_skip, "Clip skip"),
(diffusers_guidance_rescale, "CFG rescale"),
(full_quality, "Full quality"),
(vae_type, "VAE type"),
(tiling, "Tiling"),
(hidiffusion, "HiDiffusion"),
# detailer

View File

@ -170,8 +170,9 @@ def create_advanced_inputs(tab, base=True):
with gr.Accordion(open=False, label="Advanced", elem_id=f"{tab}_advanced", elem_classes=["small-accordion"]):
with gr.Group():
with gr.Row(elem_id=f"{tab}_advanced_options"):
full_quality = gr.Checkbox(label='Full quality', value=True, elem_id=f"{tab}_full_quality")
tiling = gr.Checkbox(label='Tiling', value=False, elem_id=f"{tab}_tiling")
vae_type = gr.Dropdown(label='VAE type', choices=['Full', 'Tiny', 'Remote'], value='Full', elem_id=f"{tab}_vae_type")
with gr.Row(elem_id=f"{tab}_advanced_options"):
tiling = gr.Checkbox(label='Texture tiling', value=False, elem_id=f"{tab}_tiling")
hidiffusion = gr.Checkbox(label='HiDiffusion', value=False, elem_id=f"{tab}_hidiffusion")
if base:
cfg_scale, cfg_end = create_cfg_inputs(tab)
@ -185,7 +186,7 @@ def create_advanced_inputs(tab, base=True):
diffusers_pag_adaptive = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Adaptive scaling', value=0.5, elem_id=f"{tab}_pag_adaptive", visible=shared.native)
with gr.Row():
clip_skip = gr.Slider(label='CLIP skip', value=1, minimum=0, maximum=12, step=0.1, elem_id=f"{tab}_clip_skip", interactive=True)
return full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, diffusers_pag_scale, diffusers_pag_adaptive, cfg_end
return vae_type, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, diffusers_pag_scale, diffusers_pag_adaptive, cfg_end
def create_correction_inputs(tab):

View File

@ -44,7 +44,7 @@ def create_ui():
with gr.Accordion(open=False, label="Samplers", elem_classes=["small-accordion"], elem_id="txt2img_sampler_group"):
ui_sections.create_sampler_options('txt2img')
seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('txt2img')
full_quality, tiling, hidiffusion, _cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, _cfg_end = ui_sections.create_advanced_inputs('txt2img', base=False)
vae_type, tiling, hidiffusion, _cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, _cfg_end = ui_sections.create_advanced_inputs('txt2img', base=False)
hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('txt2img')
enable_hr, hr_sampler_index, denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img')
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength = shared.yolo.ui('txt2img')
@ -64,7 +64,7 @@ def create_ui():
dummy_component, state,
txt2img_prompt, txt2img_negative_prompt, txt2img_prompt_styles,
steps, sampler_index, hr_sampler_index,
full_quality, tiling, hidiffusion,
vae_type, tiling, hidiffusion,
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
batch_count, batch_size,
cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end,
@ -122,7 +122,7 @@ def create_ui():
(image_cfg_scale, "Image CFG scale"),
(image_cfg_scale, "Hires CFG scale"),
(diffusers_guidance_rescale, "CFG rescale"),
(full_quality, "Full quality"),
(vae_type, "VAE type"),
(tiling, "Tiling"),
(hidiffusion, "HiDiffusion"),
# detailer

2
wiki

@ -1 +1 @@
Subproject commit e45bfe41f0e494d6d5145443f966ba47560702f5
Subproject commit 0ef2c8d1d85ea6fb433b7ff8f8e22d295de082c0