cleanup server settings

pull/2701/head
Vladimir Mandic 2024-01-09 14:51:21 -05:00
parent 9eb56919df
commit 23a4e6ee06
3 changed files with 13 additions and 11 deletions

View File

@ -99,6 +99,11 @@
"desc": "PixArt-α is a Transformer-based T2I diffusion model whose image generation quality is competitive with state-of-the-art image generators (e.g., Imagen, SDXL, and even Midjourney), and the training speed markedly surpasses existing large-scale T2I models. Extensive experiments demonstrate that PIXART-α excels in image quality, artistry, and semantic control. It can directly generate 1024px images from text prompts within a single sampling process.",
"preview": "PixArt-alpha--PixArt-XL-2-1024-MS.jpg"
},
"Pixart-α XL 2 Large LCM": {
"path": "PixArt-alpha/PixArt-LCM-XL-2-1024-MS",
"desc": "Pixart-α consists of pure transformer blocks for latent diffusion: It can directly generate 1024px images from text prompts within a single sampling process. LCMs is a diffusion distillation method which predict PF-ODE's solution directly in latent space, achieving super fast inference with few steps. Following LCM LoRA, we illustrative of the generation speed we achieve on various computers. Let us stress again how liberating it is to explore image generation so easily with PixArt-LCM.",
"preview": "PixArt-alpha--PixArt-XL-2-1024-MS.jpg"
},
"Warp Wuerstchen": {
"path": "warp-ai/wuerstchen",
"desc": "Würstchen is a diffusion model whose text-conditional model works in a highly compressed latent space of images. Why is this important? Compressing data can reduce computational costs for both training and inference by magnitudes. Training on 1024x1024 images, is way more expensive than training at 32x32. Usually, other works make use of a relatively small compression, in the range of 4x - 8x spatial compression. Würstchen takes this to an extreme. Through its novel design, we achieve a 42x spatial compression. Würstchen employs a two-stage compression, what we call Stage A and Stage B. Stage A is a VQGAN, and Stage B is a Diffusion Autoencoder (more details can be found in the paper). A third model, Stage C, is learned in that highly compressed latent space. This training requires fractions of the compute used for current top-performing models, allowing also cheaper and faster inference.",

View File

@ -538,13 +538,10 @@ sd2_clip_weight = 'cond_stage_model.model.transformer.resblocks.0.attn.in_proj_w
def change_backend():
shared.log.info(f'Backend changed: {shared.backend}')
shared.log.info(f'Backend changed: from={shared.backend} to={shared.opts.sd_backend}')
shared.log.warning('Full server restart required to apply all changes')
if shared.backend == shared.Backend.ORIGINAL:
change_from = shared.Backend.DIFFUSERS
else:
change_from = shared.Backend.ORIGINAL
unload_model_weights(change_from=change_from)
unload_model_weights()
shared.backend = shared.Backend.ORIGINAL if shared.opts.sd_backend == 'original' else shared.Backend.DIFFUSERS
checkpoints_loaded.clear()
from modules.sd_samplers import list_samplers
list_samplers(shared.backend)
@ -1287,13 +1284,13 @@ def disable_offload(sd_model):
remove_hook_from_module(model, recurse=True)
def unload_model_weights(op='model', change_from='none'):
def unload_model_weights(op='model'):
if shared.compiled_model_state is not None:
shared.compiled_model_state.compiled_cache.clear()
shared.compiled_model_state.partitioned_modules.clear()
if op == 'model' or op == 'dict':
if model_data.sd_model:
if (shared.backend == shared.Backend.ORIGINAL and change_from != shared.Backend.DIFFUSERS) or change_from == shared.Backend.ORIGINAL:
if shared.backend == shared.Backend.ORIGINAL:
from modules import sd_hijack
model_data.sd_model.to(devices.cpu)
sd_hijack.model_hijack.undo_hijack(model_data.sd_model)
@ -1304,7 +1301,7 @@ def unload_model_weights(op='model', change_from='none'):
shared.log.debug(f'Unload weights {op}: {memory_stats()}')
else:
if model_data.sd_refiner:
if (shared.backend == shared.Backend.ORIGINAL and change_from != shared.Backend.DIFFUSERS) or change_from == shared.Backend.ORIGINAL:
if shared.backend == shared.Backend.ORIGINAL:
from modules import sd_hijack
model_data.sd_model.to(devices.cpu)
sd_hijack.model_hijack.undo_hijack(model_data.sd_refiner)

View File

@ -291,11 +291,11 @@ else: # cuda
options_templates.update(options_section(('sd', "Execution & Models"), {
"sd_backend": OptionInfo('diffusers' if backend == Backend.DIFFUSERS else 'original', "Execution backend", gr.Radio, {"choices": ["original", "diffusers"] }),
"sd_checkpoint_autoload": OptionInfo(True, "Model autoload on server start"),
"sd_backend": OptionInfo(default_backend, "Execution backend", gr.Radio, {"choices": ["original", "diffusers"] }),
"sd_model_checkpoint": OptionInfo(default_checkpoint, "Base model", gr.Dropdown, lambda: {"choices": list_checkpoint_tiles()}, refresh=refresh_checkpoints),
"sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints),
"sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list),
"sd_checkpoint_autoload": OptionInfo(True, "Model autoload on server start"),
"sd_model_dict": OptionInfo('None', "Use baseline data from a different model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints),
"stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": backend == Backend.ORIGINAL }),
"model_reuse_dict": OptionInfo(False, "When loading models attempt to reuse previous model dictionary", gr.Checkbox, {"visible": False}),