diff --git a/html/reference.json b/html/reference.json index 1ab1291f7..6aa309e8e 100644 --- a/html/reference.json +++ b/html/reference.json @@ -99,6 +99,11 @@ "desc": "PixArt-α is a Transformer-based T2I diffusion model whose image generation quality is competitive with state-of-the-art image generators (e.g., Imagen, SDXL, and even Midjourney), and the training speed markedly surpasses existing large-scale T2I models. Extensive experiments demonstrate that PIXART-α excels in image quality, artistry, and semantic control. It can directly generate 1024px images from text prompts within a single sampling process.", "preview": "PixArt-alpha--PixArt-XL-2-1024-MS.jpg" }, + "Pixart-α XL 2 Large LCM": { + "path": "PixArt-alpha/PixArt-LCM-XL-2-1024-MS", + "desc": "Pixart-α consists of pure transformer blocks for latent diffusion: It can directly generate 1024px images from text prompts within a single sampling process. LCMs is a diffusion distillation method which predict PF-ODE's solution directly in latent space, achieving super fast inference with few steps. Following LCM LoRA, we illustrative of the generation speed we achieve on various computers. Let us stress again how liberating it is to explore image generation so easily with PixArt-LCM.", + "preview": "PixArt-alpha--PixArt-XL-2-1024-MS.jpg" + }, "Warp Wuerstchen": { "path": "warp-ai/wuerstchen", "desc": "Würstchen is a diffusion model whose text-conditional model works in a highly compressed latent space of images. Why is this important? Compressing data can reduce computational costs for both training and inference by magnitudes. Training on 1024x1024 images, is way more expensive than training at 32x32. Usually, other works make use of a relatively small compression, in the range of 4x - 8x spatial compression. Würstchen takes this to an extreme. Through its novel design, we achieve a 42x spatial compression. Würstchen employs a two-stage compression, what we call Stage A and Stage B. Stage A is a VQGAN, and Stage B is a Diffusion Autoencoder (more details can be found in the paper). A third model, Stage C, is learned in that highly compressed latent space. This training requires fractions of the compute used for current top-performing models, allowing also cheaper and faster inference.", diff --git a/modules/sd_models.py b/modules/sd_models.py index f8375092b..0a0df6c82 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -538,13 +538,10 @@ sd2_clip_weight = 'cond_stage_model.model.transformer.resblocks.0.attn.in_proj_w def change_backend(): - shared.log.info(f'Backend changed: {shared.backend}') + shared.log.info(f'Backend changed: from={shared.backend} to={shared.opts.sd_backend}') shared.log.warning('Full server restart required to apply all changes') - if shared.backend == shared.Backend.ORIGINAL: - change_from = shared.Backend.DIFFUSERS - else: - change_from = shared.Backend.ORIGINAL - unload_model_weights(change_from=change_from) + unload_model_weights() + shared.backend = shared.Backend.ORIGINAL if shared.opts.sd_backend == 'original' else shared.Backend.DIFFUSERS checkpoints_loaded.clear() from modules.sd_samplers import list_samplers list_samplers(shared.backend) @@ -1287,13 +1284,13 @@ def disable_offload(sd_model): remove_hook_from_module(model, recurse=True) -def unload_model_weights(op='model', change_from='none'): +def unload_model_weights(op='model'): if shared.compiled_model_state is not None: shared.compiled_model_state.compiled_cache.clear() shared.compiled_model_state.partitioned_modules.clear() if op == 'model' or op == 'dict': if model_data.sd_model: - if (shared.backend == shared.Backend.ORIGINAL and change_from != shared.Backend.DIFFUSERS) or change_from == shared.Backend.ORIGINAL: + if shared.backend == shared.Backend.ORIGINAL: from modules import sd_hijack model_data.sd_model.to(devices.cpu) sd_hijack.model_hijack.undo_hijack(model_data.sd_model) @@ -1304,7 +1301,7 @@ def unload_model_weights(op='model', change_from='none'): shared.log.debug(f'Unload weights {op}: {memory_stats()}') else: if model_data.sd_refiner: - if (shared.backend == shared.Backend.ORIGINAL and change_from != shared.Backend.DIFFUSERS) or change_from == shared.Backend.ORIGINAL: + if shared.backend == shared.Backend.ORIGINAL: from modules import sd_hijack model_data.sd_model.to(devices.cpu) sd_hijack.model_hijack.undo_hijack(model_data.sd_refiner) diff --git a/modules/shared.py b/modules/shared.py index 190aad792..f5f721f41 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -291,11 +291,11 @@ else: # cuda options_templates.update(options_section(('sd', "Execution & Models"), { - "sd_backend": OptionInfo('diffusers' if backend == Backend.DIFFUSERS else 'original', "Execution backend", gr.Radio, {"choices": ["original", "diffusers"] }), - "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on server start"), + "sd_backend": OptionInfo(default_backend, "Execution backend", gr.Radio, {"choices": ["original", "diffusers"] }), "sd_model_checkpoint": OptionInfo(default_checkpoint, "Base model", gr.Dropdown, lambda: {"choices": list_checkpoint_tiles()}, refresh=refresh_checkpoints), "sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), "sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list), + "sd_checkpoint_autoload": OptionInfo(True, "Model autoload on server start"), "sd_model_dict": OptionInfo('None', "Use baseline data from a different model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints), "stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": backend == Backend.ORIGINAL }), "model_reuse_dict": OptionInfo(False, "When loading models attempt to reuse previous model dictionary", gr.Checkbox, {"visible": False}),