diffusers img2img and inpaint

pull/1793/merge
Vladimir Mandic 2023-07-22 15:39:41 -04:00
parent 511a8cbb61
commit 567faeb751
13 changed files with 103 additions and 91 deletions

View File

@ -1,20 +1,29 @@
# Change Log for SD.Next
## Update for 07/21/2023
## Update for 07/22/2023
- new loading screens and artwork
- extra networks: add add/remove tags to prompt (e.g. lora activation keywords)
- extensions: fix couple of compatibility items
- number of hires fixes
- diffusers: option to set vae upcast in settings
- sd-xl: enable fp16 vae decode when using optimized vae
- general:
- new loading screens and artwork
- extra networks: add add/remove tags to prompt (e.g. lora activation keywords)
- extensions: fix couple of compatibility items
- firefox compatibility improvements
- original
- fix hires secondary sampler
this now fully obsoletes `fallback_sampler` and `force_latent_sampler`
- diffusers:
- implement img2img and inpainting (experimental)
actual support and qualiy depends on model
it works as expected for sd 1.5, but not so much for sd-xl for now
- add option to save image before refiner pass
- option to set vae upcast in settings
- enable fp16 vae decode when using optimized vae
this pretty much doubles performance of decode step (delay after generate is done)
- sd-xl: loading vae now applies to both base and refiner
- diffusers 0.19.dev
- sd-xl: loading vae now applies to both base and refiner and saves a bit of vram
- diffusers: future-proof
requires `diffusers==0.19.dev`, not yet released, but can be installed manually
- sd-xl: denoising_start/denoising_end
- sd-xl: enable dual prompts
this is used regardless if refiner is enabled/loaded
dual prompt is used if set regardless if refiner is enabled/loaded
if refiner is loaded & enabled, refiner prompt will also be used for refiner pass
- primary prompt goes to [OpenAI CLIP-ViT/L-14](https://huggingface.co/openai/clip-vit-large-patch14)
- refiner prompt goes to [OpenCLIP-ViT/bigG-14](https://huggingface.co/laion/CLIP-ViT-bigG-14-laion2B-39B-b160k)

View File

@ -13,13 +13,11 @@ Stuff to be added, in no particular order...
- Diffusers:
- Add SD-XL Lora
- Add SD-XL Sketch/Inpaint
- Fix SD-XL Img2img/Inpaint
- Add VAE direct load from safetensors
- Fix Kandinsky 2.2 model
- Fix DeepFloyd IF model
- Redo Prompt parser
- Add Explicit VAE step
- Add Save image before refiner (depends on explicit VAE)
- Redo Prompt parser for diffusers
- Add unCLIP model
- Technical debt:
- Port **A1111** stuff

@ -1 +1 @@
Subproject commit 098f6cd887ac5f6f5f0e7cc9d81460095d2be012
Subproject commit 8063252cf5c1ddac5c91135e448b9aea7cf3c871

@ -1 +1 @@
Subproject commit 66827337d7688b4255b9abb25edcc21b4f3ce913
Subproject commit 6386e0c8f5b5ca0b6f81836edc85f83d4a1d6781

View File

@ -54,6 +54,8 @@ function modalKeyHandler(event) {
}
function showModal(event) {
// console.log('showModal', event);
// const source = gradioApp().querySelectorAll('.gradio-gallery > div > img')[0];
const source = event.target || event.srcElement;
const modalImage = gradioApp().getElementById('modalImage');
const lb = gradioApp().getElementById('lightboxModal');
@ -85,14 +87,17 @@ function modalZoomSet(modalImage, enable) {
function setupImageForLightbox(e) {
if (e.dataset.modded) return;
console.log('setupImageForLightbox', e);
e.dataset.modded = true;
e.style.cursor = 'pointer';
e.style.userSelect = 'none';
e.addEventListener('click', (evt) => {
const event = (navigator.userAgent.toLowerCase().indexOf('firefox') > -1) ? 'mousedown' : 'click'; // silly firefox workaround since it triggers events in wrong order
e.addEventListener(event, (evt) => {
if (evt.button !== 0) return;
const initialZoom = (localStorage.getItem('modalZoom') || true) === 'yes';
modalZoomSet(gradioApp().getElementById('modalImage'), initialZoom);
evt.preventDefault();
evt.stopPropagation();
showModal(evt);
}, true);
}

View File

@ -1,4 +1,4 @@
const locale = {
const localeData = {
data: [],
timeout: null,
finished: false,
@ -7,25 +7,25 @@ const locale = {
};
function tooltipCreate() {
locale.el = document.createElement('div');
locale.el.className = 'tooltip';
locale.el.id = 'tooltip-container';
locale.el.innerText = 'this is a hint';
gradioApp().appendChild(locale.el);
if (window.opts.tooltips === 'None') locale.type = 0;
if (window.opts.tooltips === 'Browser default') locale.type = 1;
if (window.opts.tooltips === 'UI tooltips') locale.type = 2;
localeData.el = document.createElement('div');
localeData.el.className = 'tooltip';
localeData.el.id = 'tooltip-container';
localeData.el.innerText = 'this is a hint';
gradioApp().appendChild(localeData.el);
if (window.opts.tooltips === 'None') localeData.type = 0;
if (window.opts.tooltips === 'Browser default') localeData.type = 1;
if (window.opts.tooltips === 'UI tooltips') localeData.type = 2;
}
async function tooltipShow(e) {
if (e.target.dataset.hint) {
locale.el.classList.add('tooltip-show');
locale.el.innerHTML = `<b>${e.target.textContent}</b><br>${e.target.dataset.hint}`;
localeData.el.classList.add('tooltip-show');
localeData.el.innerHTML = `<b>${e.target.textContent}</b><br>${e.target.dataset.hint}`;
}
}
async function tooltipHide(e) {
locale.el.classList.remove('tooltip-show');
localeData.el.classList.remove('tooltip-show');
}
async function validateHints(elements, data) {
@ -47,11 +47,11 @@ async function validateHints(elements, data) {
}
async function setHints() {
if (locale.finished) return;
if (locale.data.length === 0) {
if (localeData.finished) return;
if (localeData.data.length === 0) {
const res = await fetch('/file=html/locale_en.json');
const json = await res.json();
locale.data = Object.values(json).flat();
localeData.data = Object.values(json).flat();
}
const elements = [
...Array.from(gradioApp().querySelectorAll('button')),
@ -59,22 +59,22 @@ async function setHints() {
];
if (elements.length === 0) return;
if (Object.keys(opts).length === 0) return;
if (!locale.el) tooltipCreate();
if (!localeData.el) tooltipCreate();
let localized = 0;
let hints = 0;
locale.finished = true;
localeData.finished = true;
const t0 = performance.now();
for (const el of elements) {
const found = locale.data.find((l) => l.label === el.textContent.trim());
const found = localeData.data.find((l) => l.label === el.textContent.trim());
if (found?.localized?.length > 0) {
localized++;
el.textContent = found.localized;
}
if (found?.hint?.length > 0) {
hints++;
if (locale.type === 1) {
if (localeData.type === 1) {
el.title = found.hint;
} else if (locale.type === 2) {
} else if (localeData.type === 2) {
el.dataset.hint = found.hint;
el.addEventListener('mouseover', tooltipShow);
el.addEventListener('mouseout', tooltipHide);
@ -84,12 +84,12 @@ async function setHints() {
}
}
const t1 = performance.now();
console.log('setHints', { type: locale.type, elements: elements.length, localized, hints, data: locale.data.length, time: t1 - t0 });
console.log('setHints', { type: localeData.type, elements: elements.length, localized, hints, data: localeData.data.length, time: t1 - t0 });
removeSplash();
// validateHints(elements, locale.data)
// validateHints(elements, localeData.data)
}
onAfterUiUpdate(async () => {
if (locale.timeout) clearTimeout(locale.timeout);
locale.timeout = setTimeout(setHints, 250);
if (localeData.timeout) clearTimeout(localeData.timeout);
localeData.timeout = setTimeout(setHints, 250);
});

View File

@ -355,7 +355,7 @@ div#extras_scale_to_tab div.form{
flex-direction: row;
}
.modalControls { display: flex; justify-content: space-evenly; background-color: transparent; position: absolute; width: -webkit-fill-available; z-index: 1; }
.modalControls { display: flex; justify-content: space-evenly; background-color: transparent; position: absolute; width: 99%; z-index: 1; }
.modalControls:hover { background-color: #50505050; }
.modalControls span { color: white; font-size: 2em; font-weight: bold; cursor: pointer; filter: grayscale(100%); }
.modalControls span:hover, .modalControls span:focus { color: var(--highlight-color); filter: none; }

View File

@ -221,12 +221,16 @@ class StableDiffusionProcessing:
image_conditioning = image_conditioning.to(device=shared.device, dtype=source_image.dtype)
return image_conditioning
def diffusers_image_conditioning(self, _source_image, latent_image, _image_mask=None):
# shared.log.warning('Diffusers not implemented: img2img_image_conditioning')
return latent_image.new_zeros(latent_image.shape[0], 5, 1, 1)
def img2img_image_conditioning(self, source_image, latent_image, image_mask=None):
source_image = devices.cond_cast_float(source_image)
# HACK: Using introspection as the Depth2Image model doesn't appear to uniquely
# identify itself with a field common to all models. The conditioning_key is also hybrid.
if shared.backend == shared.Backend.DIFFUSERS:
shared.log.warning('Diffusers not implemented: img2img_image_conditioning')
return self.diffusers_image_conditioning(source_image, latent_image, image_mask)
if isinstance(self.sd_model, LatentDepth2ImageDiffusion):
return self.depth2img_image_conditioning(source_image)
if hasattr(self.sd_model, 'cond_stage_key') and self.sd_model.cond_stage_key == "edit":
@ -992,20 +996,19 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
self.refiner_negative = refiner_negative
self.enable_hr = None
def init(self, all_prompts, all_seeds, all_subseeds):
image_mask = self.image_mask
if shared.backend == shared.Backend.DIFFUSERS and image_mask is None:
if shared.backend == shared.Backend.DIFFUSERS and self.image_mask is None:
sd_models.set_diffuser_pipe(self.sd_model, sd_models.DiffusersTaskType.IMAGE_2_IMAGE)
elif shared.backend == shared.Backend.DIFFUSERS and image_mask is not None:
elif shared.backend == shared.Backend.DIFFUSERS and self.image_mask is not None:
sd_models.set_diffuser_pipe(self.sd_model, sd_models.DiffusersTaskType.INPAINTING)
self.sd_model.dtype = self.sd_model.unet.dtype
force_latent_upscaler = shared.opts.data.get('force_latent_sampler')
if self.sampler_name in ['PLMS']:
self.sampler_name = force_latent_upscaler if force_latent_upscaler != 'None' else shared.opts.fallback_sampler # PLMS does not support img2img, use fallback instead
if self.sampler_name == "PLMS":
self.sampler_name = 'UniPC'
self.sampler = sd_samplers.create_sampler(self.sampler_name, self.sd_model)
crop_region = None
image_mask = self.image_mask
if image_mask is not None:
image_mask = image_mask.convert('L')
if self.inpainting_mask_invert:
@ -1043,11 +1046,13 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
if image_mask is not None:
image_masked = Image.new('RGBa', (image.width, image.height))
image_masked.paste(image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(self.mask_for_overlay.convert('L')))
self.mask = image_mask # assign early for diffusers
self.overlay_images.append(image_masked.convert('RGBA'))
# crop_region is not None if we are doing inpaint full res
if crop_region is not None:
image = image.crop(crop_region)
image = images.resize_image(2, image, self.width, self.height)
self.init_images = image # assign early for diffusers
if image_mask is not None:
if self.inpainting_fill != 1:
image = masking.fill(image, latent_mask)
@ -1067,16 +1072,14 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
batch_images = np.array(imgs)
else:
raise RuntimeError(f"bad number of images passed: {len(imgs)}; expecting {self.batch_size} or less")
if shared.backend == shared.Backend.DIFFUSERS:
# we've already set self.init_images and self.mask and we dont need any more processing
return
image = torch.from_numpy(batch_images)
image = 2. * image - 1.
image = image.to(device=shared.device, dtype=devices.dtype_vae)
if shared.backend == shared.Backend.ORIGINAL:
self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))
else:
# TODO Diffusers don't pre-encode the latents for diffusers to allow the UI to stay general for different model types
self.init_latent = torch.Tensor(1)
self.init_latent = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(image))
if self.resize_mode == 3:
self.init_latent = torch.nn.functional.interpolate(self.init_latent, size=(self.height // opt_f, self.width // opt_f), mode="bilinear")
if image_mask is not None:
@ -1084,11 +1087,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing):
latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2]))
latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255
latmask = latmask[0]
latmask = np.around(latmask)
latmask = np.tile(latmask[None], (4, 1, 1))
latmask = np.around(latmask)
self.mask = torch.asarray(1.0 - latmask).to(device=shared.device, dtype=self.sd_model.dtype)
self.nmask = torch.asarray(latmask).to(device=shared.device, dtype=self.sd_model.dtype)
# this needs to be fixed to be done in sample() using actual seeds for batches
if self.inpainting_fill == 2:
self.init_latent = self.init_latent * self.mask + create_random_tensors(self.init_latent.shape[1:], all_seeds[0:self.init_latent.shape[0]]) * self.nmask
elif self.inpainting_fill == 3:

View File

@ -4,6 +4,7 @@ import modules.devices as devices
import modules.shared as shared
import modules.sd_samplers as sd_samplers
import modules.sd_models as sd_models
import modules.images as images
from modules.lora_diffusers import lora_state, unload_diffusers_lora
from modules.processing import StableDiffusionProcessing
@ -16,12 +17,12 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
shared.state.sampling_steps = p.steps
shared.state.current_latent = latents
def vae_decode(latents, model):
def vae_decode(latents, model, output_type='np'):
if hasattr(model, 'vae'):
shared.log.debug(f'Diffusers VAE decode: name={model.vae.config.get("_name_or_path", "default")} upcast={model.vae.config.get("force_upcast", None)}')
decoded = model.vae.decode(latents / model.vae.config.scaling_factor, return_dict=False)[0]
images = model.image_processor.postprocess(decoded, output_type='np')
return images
imgs = model.image_processor.postprocess(decoded, output_type=output_type)
return imgs
else:
return latents
@ -54,16 +55,23 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
for arg in kwargs:
if arg in possible:
args[arg] = kwargs[arg]
shared.log.debug(f'Diffuser pipeline: {pipeline.__class__.__name__} possible={possible}')
else:
pass
# shared.log.debug(f'Diffuser not supported: pipeline={pipeline.__class__.__name__} task={sd_models.get_diffusers_task(model)} arg={arg}')
# shared.log.debug(f'Diffuser pipeline: {pipeline.__class__.__name__} possible={possible}')
clean = args.copy()
clean.pop('callback', None)
clean.pop('callback_steps', None)
clean.pop('image', None)
clean.pop('mask_image', None)
clean.pop('prompt', None)
clean.pop('negative_prompt', None)
if 'image' in clean:
clean['image'] = type(clean['image'])
if 'mask_image' in clean:
clean['mask_image'] = type(clean['mask_image'])
if 'prompt' in clean:
clean['prompt'] = len(clean['prompt'])
if 'negative_prompt' in clean:
clean['negative_prompt'] = len(clean['negative_prompt'])
clean['generator'] = generator_device
shared.log.debug(f'Diffuser pipeline: {pipeline.__class__.__name__} set={clean}')
shared.log.debug(f'Diffuser pipeline: {pipeline.__class__.__name__} task={sd_models.get_diffusers_task(model)} set={clean}')
return args
if (not hasattr(shared.sd_model.scheduler, 'name')) or (shared.sd_model.scheduler.name != p.sampler_name) and (p.sampler_name != 'Default'):
@ -79,10 +87,9 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
if sd_models.get_diffusers_task(shared.sd_model) == sd_models.DiffusersTaskType.TEXT_2_IMAGE:
task_specific_kwargs = {"height": p.height, "width": p.width}
elif sd_models.get_diffusers_task(shared.sd_model) == sd_models.DiffusersTaskType.IMAGE_2_IMAGE:
task_specific_kwargs = {"image": p.init_images[0], "strength": p.denoising_strength}
task_specific_kwargs = {"image": p.init_images, "strength": p.denoising_strength}
elif sd_models.get_diffusers_task(shared.sd_model) == sd_models.DiffusersTaskType.INPAINTING:
# TODO(PVP): change out to latents once possible with `diffusers`
task_specific_kwargs = {"image": p.init_images[0], "mask_image": p.image_mask, "strength": p.denoising_strength}
task_specific_kwargs = {"image": p.init_images, "mask_image": p.mask, "strength": p.denoising_strength}
# TODO diffusers use transformers for prompt parsing
# from modules.prompt_parser import parse_prompt_attention
@ -124,13 +131,12 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
for i in range(len(output.images)):
"""
# TODO save before refiner
if shared.opts.save and not p.do_not_save_samples and shared.opts.save_images_before_refiner and hasattr(shared.sd_model, 'vae'):
info=infotext(n, i)
image = decode_first_stage(shared.sd_model, output.images[i].to(dtype=devices.dtype_vae))
images.save_image(image, path=p.outpath_samples, basename="", seed=seeds[i], prompt=prompts[i], extension=shared.opts.samples_format, info=info, p=p, suffix="-before-refiner")
"""
from modules.processing import create_infotext
info=create_infotext(p, p.all_prompts, p.all_seeds, p.all_subseeds, [], iteration=p.iteration, position_in_batch=i)
decoded = vae_decode(output.images, shared.sd_model, output_type='pil')
for i in range(len(decoded)):
images.save_image(decoded[i], path=p.outpath_samples, basename="", seed=seeds[i], prompt=prompts[i], extension=shared.opts.samples_format, info=info, p=p, suffix="-before-refiner")
pipe_args = set_pipeline_args(
model=shared.sd_refiner,

View File

@ -455,7 +455,7 @@ options_templates.update(options_section(('saving-images', "Image Options"), {
"save_txt": OptionInfo(False, "Create text file next to every image with generation parameters"),
"save_log_fn": OptionInfo("", "Create JSON log file for each saved image", component_args=hide_dirs),
"save_images_before_highres_fix": OptionInfo(False, "Save copy of image before applying highres fix"),
# "save_images_before_refiner": OptionInfo(False, "Save copy of image before running refiner"),
"save_images_before_refiner": OptionInfo(False, "Save copy of image before running refiner"),
"save_images_before_face_restoration": OptionInfo(False, "Save copy of image before doing face restoration"),
"save_images_before_color_correction": OptionInfo(False, "Save copy of image before applying color correction"),
"save_mask": OptionInfo(False, "Save copy of the inpainting greyscale mask"),
@ -532,7 +532,7 @@ options_templates.update(options_section(('live-preview', "Live Previews"), {
options_templates.update(options_section(('sampler-params', "Sampler Settings"), {
"show_samplers": OptionInfo(["Default", "Euler a", "UniPC", "DEIS", "DDIM", "DPM 1S", "DPM 2M", "DPM++ 2M SDE", "DPM++ 2M SDE Karras", "DPM2 Karras", "DPM++ 2M Karras"], "Show samplers in user interface", gr.CheckboxGroup, lambda: {"choices": [x.name for x in list_samplers() if x.name != "PLMS"]}),
"fallback_sampler": OptionInfo("Euler a", "Secondary sampler", gr.Dropdown, lambda: {"choices": ["None"] + [x.name for x in list_samplers()]}),
# "fallback_sampler": OptionInfo("Euler a", "Secondary sampler", gr.Dropdown, lambda: {"choices": ["None"] + [x.name for x in list_samplers()]}),
# "force_latent_sampler": OptionInfo("None", "Force latent upscaler sampler", gr.Dropdown, lambda: {"choices": ["None"] + [x.name for x in list_samplers()]}),
'uni_pc_variant': OptionInfo("bh1", "UniPC variant", gr.Radio, {"choices": ["bh1", "bh2", "vary_coeff"]}),
'uni_pc_skip_type': OptionInfo("time_uniform", "UniPC skip type", gr.Radio, {"choices": ["time_uniform", "time_quadratic", "logSNR"]}),

View File

@ -383,7 +383,7 @@ def create_ui(startup_timer = None):
with FormGroup(visible=False, elem_id="txt2img_second_pass") as hr_options:
hr_second_pass_steps, latent_index = create_sampler_and_steps_selection(modules.sd_samplers.samplers, "txt2img", False)
with FormRow(elem_id="txt2img_hires_fix_row1", variant="compact"):
denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.3, elem_id="txt2img_denoising_strength")
denoising_strength = gr.Slider(minimum=0.05, maximum=1.0, step=0.01, label='Denoising strength', value=0.3, elem_id="txt2img_denoising_strength")
with FormRow():
hr_final_resolution = FormHTML(value="", elem_id="txtimg_hr_finalres", label="Upscaled resolution", interactive=False)
@ -666,7 +666,7 @@ def create_ui(startup_timer = None):
image_cfg_scale = gr.Slider(minimum=0, maximum=30.0, step=0.05, label='Image CFG Scale', value=1.5, elem_id="img2img_image_cfg_scale")
diffusers_guidance_rescale = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Guidance Rescale', value=0.7, elem_id="txt2img_image_cfg_rescale")
with FormRow():
denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75, elem_id="img2img_denoising_strength")
denoising_strength = gr.Slider(minimum=0.05, maximum=1.0, step=0.01, label='Denoising strength', value=0.75, elem_id="img2img_denoising_strength")
refiner_denoise_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Denoise start', value=0.0, elem_id="txt2img_refiner_denoise_start")
refiner_denoise_end = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label='Denoise end', value=1.0, elem_id="txt2img_refiner_denoise_end")

View File

@ -13,15 +13,6 @@ class ExtraNetworksPageHypernetworks(ui_extra_networks.ExtraNetworksPage):
def list_items(self):
for name, path in shared.hypernetworks.items():
path, _ext = os.path.splitext(path)
print('HERE', {
"name": name,
"filename": path,
"preview": self.find_preview(path),
"description": self.find_description(path),
"search_term": self.search_terms_from_path(path),
"prompt": json.dumps(f"<hypernet:{name}:{shared.opts.extra_networks_default_multiplier}>"),
"local_preview": f"{path}.preview.{shared.opts.samples_format}",
})
yield {
"name": name,
"filename": path,

View File

@ -16,6 +16,7 @@
"url": "git+https://github.com/vladmandic/automatic.git"
},
"devDependencies": {
"esbuild": "^0.18.15",
"eslint": "^8.44.0",
"eslint-config-airbnb-base": "^15.0.0"
},