mirror of https://github.com/vladmandic/automatic
parent
f8f987fed6
commit
1b2d4286b5
22
CHANGELOG.md
22
CHANGELOG.md
|
|
@ -1,14 +1,20 @@
|
|||
# Change Log for SD.Next
|
||||
|
||||
## Update for 2025-02-20
|
||||
## Update for 2025-02-22
|
||||
|
||||
Quick release refresh:
|
||||
- remove ui splash screen on auth fail
|
||||
- add `--extensions-dir` cli arg and `SD_EXTENSIONSDIR` env variable to specify extensions directory
|
||||
- log full path when reading/saving `config.json`
|
||||
- log full path to `sdnext.log`
|
||||
- log system hostname in `sdnext.log`
|
||||
- log extensions path in `sdnext.log`
|
||||
- **Decode**
|
||||
- Final step of image generate, VAE decode, is by far the most memory intensive operation and can easily result in out-of-memory errors
|
||||
What can be done? Well, *Huggingface* is now providing *free-of-charge* **remote-VAE-decode** service!
|
||||
- How to use? Previous *Full quality* option in UI is replace it with VAE type selector: Full, Tiny, Remote
|
||||
Currently supports SD15, SDXL and FLUX.1 with more models expected in the near future
|
||||
Availability is limited, so if remote processing fails SD.Next will fallback to using normal VAE decode process
|
||||
- **Other**
|
||||
- add `--extensions-dir` cli arg and `SD_EXTENSIONSDIR` env variable to specify extensions directory
|
||||
- **Fixes**
|
||||
- remove ui splash screen on auth fail
|
||||
- log full config path, full log path, system name, extensions path
|
||||
- zluda update
|
||||
- fix zluda with pulid
|
||||
|
||||
## Update for 2025-02-18
|
||||
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ if __name__ == '__main__':
|
|||
"sampler_name": args.sampler,
|
||||
"width": args.width,
|
||||
"height": args.height,
|
||||
"full_quality": not args.taesd,
|
||||
"vae_type": 'Tiny' if args.taesd else 'Full',
|
||||
"cfg_scale": 0,
|
||||
"batch_size": 1,
|
||||
"n_iter": 1,
|
||||
|
|
|
|||
|
|
@ -228,7 +228,7 @@ def control_run(state: str = '',
|
|||
steps: int = 20, sampler_index: int = None,
|
||||
seed: int = -1, subseed: int = -1, subseed_strength: float = 0, seed_resize_from_h: int = -1, seed_resize_from_w: int = -1,
|
||||
cfg_scale: float = 6.0, clip_skip: float = 1.0, image_cfg_scale: float = 6.0, diffusers_guidance_rescale: float = 0.7, pag_scale: float = 0.0, pag_adaptive: float = 0.5, cfg_end: float = 1.0,
|
||||
full_quality: bool = True, tiling: bool = False, hidiffusion: bool = False,
|
||||
vae_type: str = 'Full', tiling: bool = False, hidiffusion: bool = False,
|
||||
detailer_enabled: bool = True, detailer_prompt: str = '', detailer_negative: str = '', detailer_steps: int = 10, detailer_strength: float = 0.3,
|
||||
hdr_mode: int = 0, hdr_brightness: float = 0, hdr_color: float = 0, hdr_sharpen: float = 0, hdr_clamp: bool = False, hdr_boundary: float = 4.0, hdr_threshold: float = 0.95,
|
||||
hdr_maximize: bool = False, hdr_max_center: float = 0.6, hdr_max_boundry: float = 1.0, hdr_color_picker: str = None, hdr_tint_ratio: float = 0,
|
||||
|
|
@ -292,7 +292,7 @@ def control_run(state: str = '',
|
|||
diffusers_guidance_rescale = diffusers_guidance_rescale,
|
||||
pag_scale = pag_scale,
|
||||
pag_adaptive = pag_adaptive,
|
||||
full_quality = full_quality,
|
||||
vae_type = vae_type,
|
||||
tiling = tiling,
|
||||
hidiffusion = hidiffusion,
|
||||
# detailer
|
||||
|
|
|
|||
|
|
@ -16,9 +16,9 @@ def resize_image(resize_mode: int, im: Union[Image.Image, torch.Tensor], width:
|
|||
return im
|
||||
else:
|
||||
from modules.processing_vae import vae_encode, vae_decode
|
||||
latents = vae_encode(im, shared.sd_model, full_quality=False) # TODO resize image: enable full VAE mode for resize-latent
|
||||
latents = vae_encode(im, shared.sd_model, vae_type='Tiny') # TODO resize image: enable full VAE mode for resize-latent
|
||||
latents = selected_upscaler.scaler.upscale(latents, scale, selected_upscaler.name)
|
||||
im = vae_decode(latents, shared.sd_model, output_type='pil', full_quality=False)[0]
|
||||
im = vae_decode(latents, shared.sd_model, output_type='pil', vae_type='Tiny')[0]
|
||||
return im
|
||||
|
||||
def resize(im: Union[Image.Image, torch.Tensor], w, h):
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ def img2img(id_task: str, state: str, mode: int,
|
|||
sampler_index,
|
||||
mask_blur, mask_alpha,
|
||||
inpainting_fill,
|
||||
full_quality, tiling, hidiffusion,
|
||||
vae_type, tiling, hidiffusion,
|
||||
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
|
||||
n_iter, batch_size,
|
||||
cfg_scale, image_cfg_scale,
|
||||
|
|
@ -241,7 +241,7 @@ def img2img(id_task: str, state: str, mode: int,
|
|||
clip_skip=clip_skip,
|
||||
width=width,
|
||||
height=height,
|
||||
full_quality=full_quality,
|
||||
vae_type=vae_type,
|
||||
tiling=tiling,
|
||||
hidiffusion=hidiffusion,
|
||||
detailer_enabled=detailer_enabled,
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ def parse(infotext):
|
|||
elif val == "False":
|
||||
params[key] = False
|
||||
elif key == 'VAE' and val == 'TAESD':
|
||||
params["Full quality"] = False
|
||||
params["VAE type"] = 'Tiny'
|
||||
elif size is not None:
|
||||
params[f"{key}-1"] = int(size.group(1))
|
||||
params[f"{key}-2"] = int(size.group(2))
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ def process_images(p: StableDiffusionProcessing) -> Processed:
|
|||
sd_models.reload_model_weights()
|
||||
if p.override_settings.get('sd_vae', None) is not None:
|
||||
if p.override_settings.get('sd_vae', None) == 'TAESD':
|
||||
p.full_quality = False
|
||||
p.vae_type = 'Tiny'
|
||||
p.override_settings.pop('sd_vae', None)
|
||||
if p.override_settings.get('Hires upscaler', None) is not None:
|
||||
p.enable_hr = True
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ def task_specific_kwargs(p, model):
|
|||
}
|
||||
if model.__class__.__name__ == 'LatentConsistencyModelPipeline' and hasattr(p, 'init_images') and len(p.init_images) > 0:
|
||||
p.ops.append('lcm')
|
||||
init_latents = [processing_vae.vae_encode(image, model=shared.sd_model, full_quality=p.full_quality).squeeze(dim=0) for image in p.init_images]
|
||||
init_latents = [processing_vae.vae_encode(image, model=shared.sd_model, vae_type=p.vae_type).squeeze(dim=0) for image in p.init_images]
|
||||
init_latent = torch.stack(init_latents, dim=0).to(shared.device)
|
||||
init_noise = p.denoising_strength * processing.create_random_tensors(init_latent.shape[1:], seeds=p.all_seeds, subseeds=p.all_subseeds, subseed_strength=p.subseed_strength, p=p)
|
||||
init_latent = (1 - p.denoising_strength) * init_latent + init_noise
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ class StableDiffusionProcessing:
|
|||
styles: List[str] = [],
|
||||
# vae
|
||||
tiling: bool = False,
|
||||
full_quality: bool = True,
|
||||
vae_type: str = 'Full',
|
||||
# other
|
||||
hidiffusion: bool = False,
|
||||
do_not_reload_embeddings: bool = False,
|
||||
|
|
@ -169,7 +169,7 @@ class StableDiffusionProcessing:
|
|||
self.negative_prompt = negative_prompt
|
||||
self.styles = styles
|
||||
self.tiling = tiling
|
||||
self.full_quality = full_quality
|
||||
self.vae_type = vae_type
|
||||
self.hidiffusion = hidiffusion
|
||||
self.do_not_reload_embeddings = do_not_reload_embeddings
|
||||
self.detailer_enabled = detailer_enabled
|
||||
|
|
|
|||
|
|
@ -197,10 +197,10 @@ def process_hires(p: processing.StableDiffusionProcessing, output):
|
|||
if p.hr_force:
|
||||
shared.sd_model = sd_models.set_diffuser_pipe(shared.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
|
||||
if 'Upscale' in shared.sd_model.__class__.__name__ or 'Flux' in shared.sd_model.__class__.__name__ or 'Kandinsky' in shared.sd_model.__class__.__name__:
|
||||
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
|
||||
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
|
||||
if p.is_control and hasattr(p, 'task_args') and p.task_args.get('image', None) is not None:
|
||||
if hasattr(shared.sd_model, "vae") and output.images is not None and len(output.images) > 0:
|
||||
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
|
||||
output.images = processing_vae.vae_decode(latents=output.images, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.hr_upscale_to_x, height=p.hr_upscale_to_y) # controlnet cannnot deal with latent input
|
||||
update_sampler(p, shared.sd_model, second_pass=True)
|
||||
orig_denoise = p.denoising_strength
|
||||
p.denoising_strength = strength
|
||||
|
|
@ -289,7 +289,7 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
|
|||
noise_level = round(350 * p.denoising_strength)
|
||||
output_type='latent'
|
||||
if 'Upscale' in shared.sd_refiner.__class__.__name__ or 'Flux' in shared.sd_refiner.__class__.__name__ or 'Kandinsky' in shared.sd_refiner.__class__.__name__:
|
||||
image = processing_vae.vae_decode(latents=image, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
|
||||
image = processing_vae.vae_decode(latents=image, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
|
||||
p.extra_generation_params['Noise level'] = noise_level
|
||||
output_type = 'np'
|
||||
update_sampler(p, shared.sd_refiner, second_pass=True)
|
||||
|
|
@ -370,7 +370,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
|
|||
result_batch = processing_vae.vae_decode(
|
||||
latents = output.images[i],
|
||||
model = model,
|
||||
full_quality = p.full_quality,
|
||||
vae_type = p.vae_type,
|
||||
width = width,
|
||||
height = height,
|
||||
frames = frames,
|
||||
|
|
@ -381,7 +381,7 @@ def process_decode(p: processing.StableDiffusionProcessing, output):
|
|||
results = processing_vae.vae_decode(
|
||||
latents = output.images,
|
||||
model = model,
|
||||
full_quality = p.full_quality,
|
||||
vae_type = p.vae_type,
|
||||
width = width,
|
||||
height = height,
|
||||
frames = frames,
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see
|
|||
return x
|
||||
|
||||
|
||||
def decode_first_stage(model, x, full_quality=True):
|
||||
def decode_first_stage(model, x):
|
||||
if not shared.opts.keep_incomplete and (shared.state.skipped or shared.state.interrupted):
|
||||
shared.log.debug(f'Decode VAE: skipped={shared.state.skipped} interrupted={shared.state.interrupted}')
|
||||
x_sample = torch.zeros((len(x), 3, x.shape[2] * 8, x.shape[3] * 8), dtype=devices.dtype_vae, device=devices.device)
|
||||
|
|
@ -210,20 +210,14 @@ def decode_first_stage(model, x, full_quality=True):
|
|||
shared.state.job = 'VAE'
|
||||
with devices.autocast(disable = x.dtype==devices.dtype_vae):
|
||||
try:
|
||||
if full_quality:
|
||||
if hasattr(model, 'decode_first_stage'):
|
||||
# x_sample = model.decode_first_stage(x) * 0.5 + 0.5
|
||||
x_sample = model.decode_first_stage(x)
|
||||
elif hasattr(model, 'vae'):
|
||||
x_sample = processing_vae.vae_decode(latents=x, model=model, output_type='np', full_quality=full_quality)
|
||||
else:
|
||||
x_sample = x
|
||||
shared.log.error('Decode VAE unknown model')
|
||||
if hasattr(model, 'decode_first_stage'):
|
||||
# x_sample = model.decode_first_stage(x) * 0.5 + 0.5
|
||||
x_sample = model.decode_first_stage(x)
|
||||
elif hasattr(model, 'vae'):
|
||||
x_sample = processing_vae.vae_decode(latents=x, model=model, output_type='np')
|
||||
else:
|
||||
from modules import sd_vae_taesd
|
||||
x_sample = torch.zeros((len(x), 3, x.shape[2] * 8, x.shape[3] * 8), dtype=devices.dtype_vae, device=devices.device)
|
||||
for i in range(len(x_sample)):
|
||||
x_sample[i] = sd_vae_taesd.decode(x[i]) * 0.5 + 0.5
|
||||
x_sample = x
|
||||
shared.log.error('Decode VAE unknown model')
|
||||
except Exception as e:
|
||||
x_sample = x
|
||||
shared.log.error(f'Decode VAE: {e}')
|
||||
|
|
@ -407,7 +401,7 @@ def resize_init_images(p):
|
|||
def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler else latent
|
||||
if not torch.is_tensor(latents):
|
||||
shared.log.warning('Hires: input is not tensor')
|
||||
first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
|
||||
first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
|
||||
return first_pass_images
|
||||
|
||||
if (p.hr_upscale_to_x == 0 or p.hr_upscale_to_y == 0) and hasattr(p, 'init_hr'):
|
||||
|
|
@ -418,7 +412,7 @@ def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler
|
|||
resized_image = images.resize_image(p.hr_resize_mode, latents, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler, context=p.hr_resize_context)
|
||||
return resized_image
|
||||
|
||||
first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, full_quality=p.full_quality, output_type='pil', width=p.width, height=p.height)
|
||||
first_pass_images = processing_vae.vae_decode(latents=latents, model=shared.sd_model, vae_type=p.vae_type, output_type='pil', width=p.width, height=p.height)
|
||||
resized_images = []
|
||||
for img in first_pass_images:
|
||||
resized_image = images.resize_image(p.hr_resize_mode, img, p.hr_upscale_to_x, p.hr_upscale_to_y, upscaler_name=p.hr_upscaler, context=p.hr_resize_context)
|
||||
|
|
@ -561,7 +555,7 @@ def save_intermediate(p, latents, suffix):
|
|||
for i in range(len(latents)):
|
||||
from modules.processing import create_infotext
|
||||
info=create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, [], iteration=p.iteration, position_in_batch=i)
|
||||
decoded = processing_vae.vae_decode(latents=latents, model=shared.sd_model, output_type='pil', full_quality=p.full_quality, width=p.width, height=p.height)
|
||||
decoded = processing_vae.vae_decode(latents=latents, model=shared.sd_model, output_type='pil', vae_type=p.vae_type, width=p.width, height=p.height)
|
||||
for j in range(len(decoded)):
|
||||
images.save_image(decoded[j], path=p.outpath_samples, basename="", seed=p.seeds[i], prompt=p.prompts[i], extension=shared.opts.samples_format, info=info, p=p, suffix=suffix)
|
||||
|
||||
|
|
|
|||
|
|
@ -58,7 +58,6 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
|
|||
"Batch": f'{p.n_iter}x{p.batch_size}' if p.n_iter > 1 or p.batch_size > 1 else None,
|
||||
"Model": None if (not shared.opts.add_model_name_to_info) or (not shared.sd_model.sd_checkpoint_info.model_name) else shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', ''),
|
||||
"Model hash": getattr(p, 'sd_model_hash', None if (not shared.opts.add_model_hash_to_info) or (not shared.sd_model.sd_model_hash) else shared.sd_model.sd_model_hash),
|
||||
"VAE": (None if not shared.opts.add_model_name_to_info or sd_vae.loaded_vae_file is None else os.path.splitext(os.path.basename(sd_vae.loaded_vae_file))[0]) if p.full_quality else 'TAESD',
|
||||
"Refiner prompt": p.refiner_prompt if len(p.refiner_prompt) > 0 else None,
|
||||
"Refiner negative": p.refiner_negative if len(p.refiner_negative) > 0 else None,
|
||||
"Styles": "; ".join(p.styles) if p.styles is not None and len(p.styles) > 0 else None,
|
||||
|
|
@ -71,6 +70,10 @@ def create_infotext(p: StableDiffusionProcessing, all_prompts=None, all_seeds=No
|
|||
"Comment": comment,
|
||||
"Operations": '; '.join(ops).replace('"', '') if len(p.ops) > 0 else 'none',
|
||||
}
|
||||
if p.vae_type == 'Full':
|
||||
args["VAE"] = (None if not shared.opts.add_model_name_to_info or sd_vae.loaded_vae_file is None else os.path.splitext(os.path.basename(sd_vae.loaded_vae_file))[0])
|
||||
elif p.vae_type == 'Tiny':
|
||||
args["VAE"] = 'TAESD'
|
||||
if shared.opts.add_model_name_to_info and getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None:
|
||||
args["Model"] = shared.sd_model.sd_checkpoint_info.model_name.replace(',', '').replace(':', '')
|
||||
if shared.opts.add_model_hash_to_info and getattr(shared.sd_model, 'sd_model_hash', None) is not None:
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ def process_original(p: processing.StableDiffusionProcessing):
|
|||
c = get_conds_with_caching(prompt_parser.get_multicond_learned_conditioning, p.prompts, p.steps * step_multiplier, cached_c)
|
||||
with devices.without_autocast() if devices.unet_needs_upcast else devices.autocast():
|
||||
samples_ddim = p.sample(conditioning=c, unconditional_conditioning=uc, seeds=p.seeds, subseeds=p.subseeds, subseed_strength=p.subseed_strength, prompts=p.prompts)
|
||||
x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae), p.full_quality)[0].cpu() for i in range(samples_ddim.size(0))]
|
||||
x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae))[0].cpu() for i in range(samples_ddim.size(0))]
|
||||
try:
|
||||
for x in x_samples_ddim:
|
||||
devices.test_for_nans(x, "vae")
|
||||
|
|
@ -60,7 +60,7 @@ def process_original(p: processing.StableDiffusionProcessing):
|
|||
devices.dtype_vae = torch.bfloat16
|
||||
vae_file, vae_source = sd_vae.resolve_vae(p.sd_model.sd_model_checkpoint)
|
||||
sd_vae.load_vae(p.sd_model, vae_file, vae_source)
|
||||
x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae), p.full_quality)[0].cpu() for i in range(samples_ddim.size(0))]
|
||||
x_samples_ddim = [processing.decode_first_stage(p.sd_model, samples_ddim[i:i+1].to(dtype=devices.dtype_vae))[0].cpu() for i in range(samples_ddim.size(0))]
|
||||
for x in x_samples_ddim:
|
||||
devices.test_for_nans(x, "vae")
|
||||
else:
|
||||
|
|
@ -90,7 +90,7 @@ def sample_txt2img(p: processing.StableDiffusionProcessingTxt2Img, conditioning,
|
|||
target_height = p.hr_upscale_to_y
|
||||
decoded_samples = None
|
||||
if shared.opts.samples_save and shared.opts.save_images_before_highres_fix and not p.do_not_save_samples:
|
||||
decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae), p.full_quality)
|
||||
decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae))
|
||||
decoded_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
|
||||
for i, x_sample in enumerate(decoded_samples):
|
||||
x_sample = validate_sample(x_sample)
|
||||
|
|
@ -107,13 +107,13 @@ def sample_txt2img(p: processing.StableDiffusionProcessingTxt2Img, conditioning,
|
|||
shared.state.job = 'Upscale'
|
||||
samples = images.resize_image(1, samples, target_width, target_height, upscaler_name=p.hr_upscaler)
|
||||
if getattr(p, "inpainting_mask_weight", shared.opts.inpainting_mask_weight) < 1.0:
|
||||
image_conditioning = img2img_image_conditioning(p, decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae), p.full_quality), samples)
|
||||
image_conditioning = img2img_image_conditioning(p, decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae)), samples)
|
||||
else:
|
||||
image_conditioning = txt2img_image_conditioning(p, samples.to(dtype=devices.dtype_vae))
|
||||
else:
|
||||
shared.state.job = 'Upscale'
|
||||
if decoded_samples is None:
|
||||
decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae), p.full_quality)
|
||||
decoded_samples = decode_first_stage(p.sd_model, samples.to(dtype=devices.dtype_vae))
|
||||
decoded_samples = torch.clamp((decoded_samples + 1.0) / 2.0, min=0.0, max=1.0)
|
||||
batch_images = []
|
||||
for _i, x_sample in enumerate(decoded_samples):
|
||||
|
|
|
|||
|
|
@ -17,9 +17,9 @@ def create_latents(image, p, dtype=None, device=None):
|
|||
if image is None:
|
||||
return image
|
||||
elif isinstance(image, Image.Image):
|
||||
latents = vae_encode(image, model=shared.sd_model, full_quality=p.full_quality)
|
||||
latents = vae_encode(image, model=shared.sd_model, vae_type=p.vae_type)
|
||||
elif isinstance(image, list):
|
||||
latents = [vae_encode(i, model=shared.sd_model, full_quality=p.full_quality).squeeze(dim=0) for i in image]
|
||||
latents = [vae_encode(i, model=shared.sd_model, vae_type=p.vae_type).squeeze(dim=0) for i in image]
|
||||
latents = torch.stack(latents, dim=0).to(shared.device)
|
||||
else:
|
||||
shared.log.warning(f'Latents: input type: {type(image)} {image}')
|
||||
|
|
@ -230,7 +230,7 @@ def taesd_vae_encode(image):
|
|||
return encoded
|
||||
|
||||
|
||||
def vae_decode(latents, model, output_type='np', full_quality=True, width=None, height=None, frames=None):
|
||||
def vae_decode(latents, model, output_type='np', vae_type='Full', width=None, height=None, frames=None):
|
||||
t0 = time.time()
|
||||
model = model or shared.sd_model
|
||||
if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
|
||||
|
|
@ -238,6 +238,15 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
|
|||
if latents is None or not torch.is_tensor(latents): # already decoded
|
||||
return latents
|
||||
prev_job = shared.state.job
|
||||
|
||||
if vae_type == 'Remote':
|
||||
shared.state.job = 'Remote VAE'
|
||||
from modules.sd_vae_remote import remote_decode
|
||||
images = remote_decode(latents=latents, width=width, height=height)
|
||||
shared.state.job = prev_job
|
||||
if images is not None and len(images) > 0:
|
||||
return images
|
||||
|
||||
shared.state.job = 'VAE'
|
||||
if latents.shape[0] == 0:
|
||||
shared.log.error(f'VAE nothing to decode: {latents.shape}')
|
||||
|
|
@ -261,7 +270,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
|
|||
|
||||
if latents.shape[-1] <= 4: # not a latent, likely an image
|
||||
decoded = latents.float().cpu().numpy()
|
||||
elif full_quality and hasattr(model, "vae"):
|
||||
elif vae_type == 'Full' and hasattr(model, "vae"):
|
||||
decoded = full_vae_decode(latents=latents, model=model)
|
||||
elif hasattr(model, "vqgan"):
|
||||
decoded = full_vqgan_decode(latents=latents, model=model)
|
||||
|
|
@ -296,7 +305,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True, width=None,
|
|||
return imgs
|
||||
|
||||
|
||||
def vae_encode(image, model, full_quality=True): # pylint: disable=unused-variable
|
||||
def vae_encode(image, model, vae_type='Full'): # pylint: disable=unused-variable
|
||||
if shared.state.interrupted or shared.state.skipped:
|
||||
return []
|
||||
if not hasattr(model, 'vae') and hasattr(model, 'pipe'):
|
||||
|
|
@ -305,7 +314,7 @@ def vae_encode(image, model, full_quality=True): # pylint: disable=unused-variab
|
|||
shared.log.error('VAE not found in model')
|
||||
return []
|
||||
tensor = TF.to_tensor(image.convert("RGB")).unsqueeze(0).to(devices.device, devices.dtype_vae)
|
||||
if full_quality:
|
||||
if vae_type == 'Full':
|
||||
tensor = tensor * 2 - 1
|
||||
latents = full_vae_encode(image=tensor, model=shared.sd_model)
|
||||
else:
|
||||
|
|
@ -321,7 +330,7 @@ def reprocess(gallery):
|
|||
if latent is None or gallery is None:
|
||||
return None
|
||||
shared.log.info(f'Reprocessing: latent={latent.shape}')
|
||||
reprocessed = vae_decode(latent, shared.sd_model, output_type='pil', full_quality=True)
|
||||
reprocessed = vae_decode(latent, shared.sd_model, output_type='pil')
|
||||
outputs = []
|
||||
for i0, i1 in zip(gallery, reprocessed):
|
||||
if isinstance(i1, np.ndarray):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
import io
|
||||
import time
|
||||
import base64
|
||||
import torch
|
||||
import requests
|
||||
from PIL import Image
|
||||
from safetensors.torch import _tobytes
|
||||
|
||||
|
||||
hf_endpoints = {
|
||||
'sd': 'https://lqmfdhmzmy4dw51z.us-east-1.aws.endpoints.huggingface.cloud',
|
||||
'sdxl': 'https://m5fxqwyk0r3uu79o.us-east-1.aws.endpoints.huggingface.cloud',
|
||||
'f1': 'https://zy1z7fzxpgtltg06.us-east-1.aws.endpoints.huggingface.cloud',
|
||||
}
|
||||
|
||||
|
||||
def remote_decode(latents: torch.Tensor, width: int = 0, height: int = 0, model_type: str = None) -> Image.Image:
|
||||
from modules import devices, shared, errors
|
||||
images = []
|
||||
model_type = model_type or shared.sd_model_type
|
||||
url = hf_endpoints.get(model_type, None)
|
||||
if url is None:
|
||||
shared.log.error(f'Decode: type="remote" type={model_type} unsuppported')
|
||||
return images
|
||||
t0 = time.time()
|
||||
latents = latents.unsqueeze(0) if len(latents.shape) == 3 else latents
|
||||
for i in range(latents.shape[0]):
|
||||
try:
|
||||
latent = latents[i].detach().clone().to(device=devices.cpu, dtype=devices.dtype).unsqueeze(0)
|
||||
encoded = base64.b64encode(_tobytes(latent, "inputs")).decode("utf-8")
|
||||
params = {"shape": list(latent.shape), "dtype": str(latent.dtype).split(".", maxsplit=1)[-1]}
|
||||
if (model_type == 'f1') and (width > 0) and (height > 0):
|
||||
params['width'] = width
|
||||
params['height'] = height
|
||||
response = requests.post(
|
||||
url=url,
|
||||
json={"inputs": encoded, "parameters": params},
|
||||
headers={"Content-Type": "application/json", "Accept": "image/jpeg"},
|
||||
timeout=60,
|
||||
)
|
||||
if not response.ok:
|
||||
shared.log.error(f'Decode: type="remote" model={model_type} code={response.status_code} {response.json()}')
|
||||
else:
|
||||
images.append(Image.open(io.BytesIO(response.content)))
|
||||
except Exception as e:
|
||||
shared.log.error(f'Decode: type="remote" model={model_type} {e}')
|
||||
errors.display(e, 'VAE')
|
||||
t1 = time.time()
|
||||
shared.log.debug(f'Decode: type="remote" model={model_type} args={params} images={images} time={t1-t0:.3f}s')
|
||||
return images
|
||||
|
|
@ -11,7 +11,7 @@ debug('Trace: PROCESS')
|
|||
def txt2img(id_task, state,
|
||||
prompt, negative_prompt, prompt_styles,
|
||||
steps, sampler_index, hr_sampler_index,
|
||||
full_quality, tiling, hidiffusion,
|
||||
vae_type, tiling, hidiffusion,
|
||||
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
|
||||
n_iter, batch_size,
|
||||
cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end,
|
||||
|
|
@ -64,7 +64,7 @@ def txt2img(id_task, state,
|
|||
clip_skip=clip_skip,
|
||||
width=width,
|
||||
height=height,
|
||||
full_quality=full_quality,
|
||||
vae_type=vae_type,
|
||||
detailer_enabled=detailer_enabled,
|
||||
detailer_prompt=detailer_prompt,
|
||||
detailer_negative=detailer_negative,
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ def create_ui(_blocks: gr.Blocks=None):
|
|||
|
||||
mask_controls = masking.create_segment_ui()
|
||||
|
||||
full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('control')
|
||||
vae_type, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('control')
|
||||
hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('control')
|
||||
|
||||
with gr.Accordion(open=False, label="Video", elem_id="control_video", elem_classes=["small-accordion"]):
|
||||
|
|
@ -561,7 +561,7 @@ def create_ui(_blocks: gr.Blocks=None):
|
|||
prompt, negative, styles,
|
||||
steps, sampler_index,
|
||||
seed, subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w,
|
||||
cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end, full_quality, tiling, hidiffusion,
|
||||
cfg_scale, clip_skip, image_cfg_scale, guidance_rescale, pag_scale, pag_adaptive, cfg_end, vae_type, tiling, hidiffusion,
|
||||
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
|
||||
hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio,
|
||||
resize_mode_before, resize_name_before, resize_context_before, width_before, height_before, scale_by_before, selected_scale_tab_before,
|
||||
|
|
@ -646,7 +646,7 @@ def create_ui(_blocks: gr.Blocks=None):
|
|||
(image_cfg_scale, "Image CFG scale"),
|
||||
(image_cfg_scale, "Hires CFG scale"),
|
||||
(guidance_rescale, "CFG rescale"),
|
||||
(full_quality, "Full quality"),
|
||||
(vae_type, "VAE type"),
|
||||
(tiling, "Tiling"),
|
||||
(hidiffusion, "HiDiffusion"),
|
||||
# detailer
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ def create_ui():
|
|||
denoising_strength = gr.Slider(minimum=0.0, maximum=0.99, step=0.01, label='Denoising strength', value=0.30, elem_id="img2img_denoising_strength")
|
||||
refiner_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Denoise start', value=0.0, elem_id="img2img_refiner_start")
|
||||
|
||||
full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('img2img')
|
||||
vae_type, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end = ui_sections.create_advanced_inputs('img2img')
|
||||
hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('img2img')
|
||||
enable_hr, hr_sampler_index, hr_denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, hr_refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img')
|
||||
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength = shared.yolo.ui('img2img')
|
||||
|
|
@ -175,7 +175,7 @@ def create_ui():
|
|||
sampler_index,
|
||||
mask_blur, mask_alpha,
|
||||
inpainting_fill,
|
||||
full_quality, tiling, hidiffusion,
|
||||
vae_type, tiling, hidiffusion,
|
||||
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
|
||||
batch_count, batch_size,
|
||||
cfg_scale, image_cfg_scale,
|
||||
|
|
@ -261,7 +261,7 @@ def create_ui():
|
|||
(image_cfg_scale, "Hires CFG scale"),
|
||||
(clip_skip, "Clip skip"),
|
||||
(diffusers_guidance_rescale, "CFG rescale"),
|
||||
(full_quality, "Full quality"),
|
||||
(vae_type, "VAE type"),
|
||||
(tiling, "Tiling"),
|
||||
(hidiffusion, "HiDiffusion"),
|
||||
# detailer
|
||||
|
|
|
|||
|
|
@ -170,8 +170,9 @@ def create_advanced_inputs(tab, base=True):
|
|||
with gr.Accordion(open=False, label="Advanced", elem_id=f"{tab}_advanced", elem_classes=["small-accordion"]):
|
||||
with gr.Group():
|
||||
with gr.Row(elem_id=f"{tab}_advanced_options"):
|
||||
full_quality = gr.Checkbox(label='Full quality', value=True, elem_id=f"{tab}_full_quality")
|
||||
tiling = gr.Checkbox(label='Tiling', value=False, elem_id=f"{tab}_tiling")
|
||||
vae_type = gr.Dropdown(label='VAE type', choices=['Full', 'Tiny', 'Remote'], value='Full', elem_id=f"{tab}_vae_type")
|
||||
with gr.Row(elem_id=f"{tab}_advanced_options"):
|
||||
tiling = gr.Checkbox(label='Texture tiling', value=False, elem_id=f"{tab}_tiling")
|
||||
hidiffusion = gr.Checkbox(label='HiDiffusion', value=False, elem_id=f"{tab}_hidiffusion")
|
||||
if base:
|
||||
cfg_scale, cfg_end = create_cfg_inputs(tab)
|
||||
|
|
@ -185,7 +186,7 @@ def create_advanced_inputs(tab, base=True):
|
|||
diffusers_pag_adaptive = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Adaptive scaling', value=0.5, elem_id=f"{tab}_pag_adaptive", visible=shared.native)
|
||||
with gr.Row():
|
||||
clip_skip = gr.Slider(label='CLIP skip', value=1, minimum=0, maximum=12, step=0.1, elem_id=f"{tab}_clip_skip", interactive=True)
|
||||
return full_quality, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, diffusers_pag_scale, diffusers_pag_adaptive, cfg_end
|
||||
return vae_type, tiling, hidiffusion, cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, diffusers_pag_scale, diffusers_pag_adaptive, cfg_end
|
||||
|
||||
|
||||
def create_correction_inputs(tab):
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ def create_ui():
|
|||
with gr.Accordion(open=False, label="Samplers", elem_classes=["small-accordion"], elem_id="txt2img_sampler_group"):
|
||||
ui_sections.create_sampler_options('txt2img')
|
||||
seed, reuse_seed, subseed, reuse_subseed, subseed_strength, seed_resize_from_h, seed_resize_from_w = ui_sections.create_seed_inputs('txt2img')
|
||||
full_quality, tiling, hidiffusion, _cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, _cfg_end = ui_sections.create_advanced_inputs('txt2img', base=False)
|
||||
vae_type, tiling, hidiffusion, _cfg_scale, clip_skip, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, _cfg_end = ui_sections.create_advanced_inputs('txt2img', base=False)
|
||||
hdr_mode, hdr_brightness, hdr_color, hdr_sharpen, hdr_clamp, hdr_boundary, hdr_threshold, hdr_maximize, hdr_max_center, hdr_max_boundry, hdr_color_picker, hdr_tint_ratio = ui_sections.create_correction_inputs('txt2img')
|
||||
enable_hr, hr_sampler_index, denoising_strength, hr_resize_mode, hr_resize_context, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img')
|
||||
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength = shared.yolo.ui('txt2img')
|
||||
|
|
@ -64,7 +64,7 @@ def create_ui():
|
|||
dummy_component, state,
|
||||
txt2img_prompt, txt2img_negative_prompt, txt2img_prompt_styles,
|
||||
steps, sampler_index, hr_sampler_index,
|
||||
full_quality, tiling, hidiffusion,
|
||||
vae_type, tiling, hidiffusion,
|
||||
detailer_enabled, detailer_prompt, detailer_negative, detailer_steps, detailer_strength,
|
||||
batch_count, batch_size,
|
||||
cfg_scale, image_cfg_scale, diffusers_guidance_rescale, pag_scale, pag_adaptive, cfg_end,
|
||||
|
|
@ -122,7 +122,7 @@ def create_ui():
|
|||
(image_cfg_scale, "Image CFG scale"),
|
||||
(image_cfg_scale, "Hires CFG scale"),
|
||||
(diffusers_guidance_rescale, "CFG rescale"),
|
||||
(full_quality, "Full quality"),
|
||||
(vae_type, "VAE type"),
|
||||
(tiling, "Tiling"),
|
||||
(hidiffusion, "HiDiffusion"),
|
||||
# detailer
|
||||
|
|
|
|||
2
wiki
2
wiki
|
|
@ -1 +1 @@
|
|||
Subproject commit e45bfe41f0e494d6d5145443f966ba47560702f5
|
||||
Subproject commit 0ef2c8d1d85ea6fb433b7ff8f8e22d295de082c0
|
||||
Loading…
Reference in New Issue