diff --git a/CHANGELOG.md b/CHANGELOG.md index dfee980b0..76324adc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,9 @@ And it also includes fixes for all reported issues so far since hires is only used for txt2img, control reuses existing resize functionality any image size is used as txt2img target size but if resize scale is also set its used to additionally upscale image after initial txt2img and for hires pass + - add support for **scripts** and **extensions** + you can now combine control workflow with your favorite script or extension + *note* extensions that are hard-coded for txt2img or img2img tabs may not work until they are updated - add **marigold** depth map processor this is state-of-the-art depth estimation model, but its quite heavy on resources - add **openpose xl** controlnet @@ -30,6 +33,7 @@ And it also includes fixes for all reported issues so far - more compact unit layout - reduce usage of temp files - add context menu to action buttons + - move ip-adapter implementation to control tabs - resize by now applies to input image or frame individually allows for processing where input images are of different sizes - fix input image size @@ -38,15 +42,17 @@ And it also includes fixes for all reported issues so far - fix batch/folder/video modes - fix pipeline switching between different modes - [FaceID](https://huggingface.co/h94/IP-Adapter-FaceID) - full implementation for *SD15* and *SD-XL*, to use simply select from *Scripts* - - **Base** (93MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-H-14* (2.5GB) as image encoder - - **SXDL** (1022MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-bigG-14* (3.7GB) as image encoder - - **Plus** (150MB) uses *InsightFace* to generate face embeds and *CLIP-ViT-H-14-laion2B* (3.8GB) as image encoder - *note*: all models are downloaded on first use + - full implementation for *SD15* and *SD-XL*, to use simply select from *Scripts* + **Base** (93MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-H-14* (2.5GB) as image encoder + **SXDL** (1022MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-bigG-14* (3.7GB) as image encoder + **Plus** (150MB) uses *InsightFace* to generate face embeds and *CLIP-ViT-H-14-laion2B* (3.8GB) as image encoder + - *note*: all models are downloaded on first use + - enable use via api, thanks @trojaner - [IPAdapter](https://huggingface.co/h94/IP-Adapter) - additional models for *SD15* and *SD-XL*, to use simply select from *Scripts*: - - **SD15**: Base, Base ViT-G, Light, Plus, Plus Face, Full Face - - **SDXL**: Base SXDL, Base ViT-H SXDL, Plus ViT-H SXDL, Plus Face ViT-H SXDL + - additional models for *SD15* and *SD-XL*, to use simply select from *Scripts*: + **SD15**: Base, Base ViT-G, Light, Plus, Plus Face, Full Face + **SDXL**: Base SXDL, Base ViT-H SXDL, Plus ViT-H SXDL, Plus Face ViT-H SXDL + - enable use via api, thanks @trojaner - **Improvements** - **ui** - globally configurable font size @@ -111,6 +117,7 @@ And it also includes fixes for all reported issues so far - img2img: clip and blip interrogate - img2img: sampler selection offset - api: return current image in progress api if requested + - api: sanitize response object - sampler: guard against invalid sampler index - config: reset default cfg scale to 6.0 - processing: correct display metadata diff --git a/modules/control/run.py b/modules/control/run.py index 5f180bd52..5c5d6da30 100644 --- a/modules/control/run.py +++ b/modules/control/run.py @@ -14,8 +14,8 @@ from modules.control.units import xs # VisLearn ControlNet-XS from modules.control.units import lite # Kohya ControlLLLite from modules.control.units import t2iadapter # TencentARC T2I-Adapter from modules.control.units import reference # ControlNet-Reference -from modules.control.units import ipadapter # IP-Adapter -from modules import devices, shared, errors, processing, images, sd_models, scripts +from scripts import ipadapter # pylint: disable=no-name-in-module +from modules import devices, shared, errors, processing, images, sd_models, scripts # pylint: disable=ungrouped-imports debug = shared.log.trace if os.environ.get('SD_CONTROL_DEBUG', None) is not None else lambda *args, **kwargs: None @@ -82,7 +82,7 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after, denoising_strength, batch_count, batch_size, mask_blur, mask_overlap, video_skip_frames, video_type, video_duration, video_loop, video_pad, video_interpolate, - ip_adapter, ip_scale, ip_image, ip_type, + ip_adapter, ip_scale, ip_image, *input_script_args ): global pipe, original_pipeline # pylint: disable=global-statement @@ -209,7 +209,7 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ debug(f'Control: run type={unit_type} models={has_models}') if unit_type == 'adapter' and has_models: - p.extra_generation_params["Control mode"] = 'Adapter' + p.extra_generation_params["Control mode"] = 'T2I-Adapter' p.extra_generation_params["Control conditioning"] = use_conditioning p.task_args['adapter_conditioning_scale'] = use_conditioning instance = t2iadapter.AdapterPipeline(selected_models, shared.sd_model) @@ -255,11 +255,10 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ pipe = instance.pipeline if inits is not None: shared.log.warning('Control: ControlNet-XS does not support separate init image') - else: # run in img2img mode + else: # run in txt2img/img2img mode if len(active_strength) > 0: p.strength = active_strength[0] pipe = diffusers.AutoPipelineForText2Image.from_pipe(shared.sd_model) # use set_diffuser_pipe - # pipe = diffusers.AutoPipelineForImage2Image.from_pipe(shared.sd_model) # use set_diffuser_pipe instance = None debug(f'Control pipeline: class={pipe.__class__} args={vars(p)}') @@ -280,9 +279,6 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ debug(f'Control device={devices.device} dtype={devices.dtype}') sd_models.copy_diffuser_options(shared.sd_model, original_pipeline) # copy options from original pipeline sd_models.set_diffuser_options(shared.sd_model) - if ipadapter.apply_ip_adapter(shared.sd_model, p, ip_adapter, ip_scale, ip_image, reset=True): - original_pipeline.feature_extractor = shared.sd_model.feature_extractor - original_pipeline.image_encoder = shared.sd_model.image_encoder try: with devices.inference_context(): @@ -429,9 +425,6 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ else: p.task_args['image'] = init_image - if ip_type == 1 and ip_adapter != 'none': - p.task_args['ip_adapter_image'] = input_image - if is_generator: image_txt = f'{processed_image.width}x{processed_image.height}' if processed_image is not None else 'None' msg = f'process | {index} of {frames if video is not None else len(inputs)} | {"Image" if video is None else "Frame"} {image_txt}' @@ -477,6 +470,11 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ if unit_type == 'lite': instance.apply(selected_models, p.image, use_conditioning) + # ip adapter + if ipadapter.apply(shared.sd_model, p, ip_adapter, ip_scale, ip_image or input_image): + original_pipeline.feature_extractor = shared.sd_model.feature_extractor + original_pipeline.image_encoder = shared.sd_model.image_encoder + # pipeline output = None if pipe is not None: # run new pipeline @@ -485,7 +483,6 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_ debug(f'Control exec pipeline: args={p.task_args} image={p.task_args.get("image", None)} control={p.task_args.get("control_image", None)} mask={p.task_args.get("mask_image", None)} ref={p.task_args.get("ref_image", None)}') p.scripts = scripts.scripts_control p.script_args = input_script_args - print('HERE', p.script_args) processed = p.scripts.run(p, *input_script_args) if processed is None: processed: processing.Processed = processing.process_images(p) # run actual pipeline diff --git a/modules/control/unit.py b/modules/control/unit.py index 0865aba01..b4505e91d 100644 --- a/modules/control/unit.py +++ b/modules/control/unit.py @@ -119,6 +119,8 @@ class Unit(): # mashup of gradio controls and mapping to actual implementation c self.controlnet = lite.ControlLLLite(device=default_device, dtype=default_dtype) elif self.type == 'reference': pass + elif self.type == 'ip': + pass else: log.error(f'Control unknown type: unit={unit_type}') return diff --git a/modules/control/units/ipadapter.py b/modules/control/units/ipadapter.py deleted file mode 100644 index 7ea1cc814..000000000 --- a/modules/control/units/ipadapter.py +++ /dev/null @@ -1,88 +0,0 @@ -import time -from PIL import Image -from modules import shared, processing, devices - - -image_encoder = None -image_encoder_type = None -loaded = None -ADAPTERS = [ - 'none', - 'ip-adapter_sd15', - 'ip-adapter_sd15_light', - 'ip-adapter-plus_sd15', - 'ip-adapter-plus-face_sd15', - 'ip-adapter-full-face_sd15', - # 'models/ip-adapter_sd15_vit-G', # RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x1024 and 1280x3072) - 'ip-adapter_sdxl', - # 'sdxl_models/ip-adapter_sdxl_vit-h', - # 'sdxl_models/ip-adapter-plus_sdxl_vit-h', - # 'sdxl_models/ip-adapter-plus-face_sdxl_vit-h', -] - - -def apply_ip_adapter(pipe, p: processing.StableDiffusionProcessing, adapter, scale, image, reset=False): # pylint: disable=arguments-differ - from transformers import CLIPVisionModelWithProjection - # overrides - if hasattr(p, 'ip_adapter_name'): - adapter = p.ip_adapter_name - if hasattr(p, 'ip_adapter_scale'): - scale = p.ip_adapter_scale - if hasattr(p, 'ip_adapter_image'): - image = p.ip_adapter_image - # init code - global loaded, image_encoder, image_encoder_type # pylint: disable=global-statement - if pipe is None: - return - if shared.backend != shared.Backend.DIFFUSERS: - shared.log.warning('IP adapter: not in diffusers mode') - return False - if adapter == 'none': - if hasattr(pipe, 'set_ip_adapter_scale'): - pipe.set_ip_adapter_scale(0) - if loaded is not None: - shared.log.debug('IP adapter: unload attention processor') - pipe.unet.config.encoder_hid_dim_type = None - loaded = None - return False - if image is None: - image = Image.new('RGB', (512, 512), (0, 0, 0)) - if not hasattr(pipe, 'load_ip_adapter'): - shared.log.error(f'IP adapter: pipeline not supported: {pipe.__class__.__name__}') - return False - if getattr(pipe, 'image_encoder', None) is None or getattr(pipe, 'image_encoder', None) == (None, None): - if shared.sd_model_type == 'sd': - subfolder = 'models/image_encoder' - elif shared.sd_model_type == 'sdxl': - subfolder = 'sdxl_models/image_encoder' - else: - shared.log.error(f'IP adapter: unsupported model type: {shared.sd_model_type}') - return False - if image_encoder is None or image_encoder_type != shared.sd_model_type: - try: - image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder=subfolder, torch_dtype=devices.dtype, cache_dir=shared.opts.diffusers_dir, use_safetensors=True).to(devices.device) - image_encoder_type = shared.sd_model_type - except Exception as e: - shared.log.error(f'IP adapter: failed to load image encoder: {e}') - return False - pipe.image_encoder = image_encoder - - # main code - subfolder = 'models' if 'sd15' in adapter else 'sdxl_models' - if adapter != loaded or getattr(pipe.unet.config, 'encoder_hid_dim_type', None) is None or reset: - t0 = time.time() - if loaded is not None: - # shared.log.debug('IP adapter: reset attention processor') - loaded = None - else: - shared.log.debug('IP adapter: load attention processor') - pipe.load_ip_adapter("h94/IP-Adapter", subfolder=subfolder, weight_name=f'{adapter}.safetensors') - t1 = time.time() - shared.log.info(f'IP adapter load: adapter="{adapter}" scale={scale} image={image} time={t1-t0:.2f}') - loaded = adapter - else: - shared.log.debug(f'IP adapter cache: adapter="{adapter}" scale={scale} image={image}') - pipe.set_ip_adapter_scale(scale) - p.task_args['ip_adapter_image'] = p.batch_size * [image] - p.extra_generation_params["IP Adapter"] = f'{adapter}:{scale}' - return True diff --git a/modules/hypernetworks/hypernetwork.py b/modules/hypernetworks/hypernetwork.py index 5e75d2440..f5d192b05 100644 --- a/modules/hypernetworks/hypernetwork.py +++ b/modules/hypernetworks/hypernetwork.py @@ -284,7 +284,7 @@ class Hypernetwork: def list_hypernetworks(path): res = {} def list_folder(folder): - for filename in shared.listdir(folder): + for filename in os.listdir(folder): fn = os.path.join(folder, filename) if os.path.isfile(fn) and fn.lower().endswith(".pt"): name = os.path.splitext(os.path.basename(fn))[0] diff --git a/modules/images.py b/modules/images.py index adf8d78b3..e117b6860 100644 --- a/modules/images.py +++ b/modules/images.py @@ -504,7 +504,7 @@ def get_next_sequence_number(path, basename): prefix_length = len(basename) if not os.path.isdir(path): return 0 - for p in shared.listdir(path): + for p in os.listdir(path): if p.startswith(basename): parts = os.path.splitext(p[prefix_length:])[0].split('-') # splits the filename (removing the basename first if one is defined, so the sequence number is always the first element) try: diff --git a/modules/img2img.py b/modules/img2img.py index a26995760..bb51119d3 100644 --- a/modules/img2img.py +++ b/modules/img2img.py @@ -22,10 +22,10 @@ def process_batch(p, input_files, input_dir, output_dir, inpaint_mask_dir, args) if not os.path.isdir(input_dir): shared.log.error(f"Process batch: directory not found: {input_dir}") return - image_files = shared.listdir(input_dir) + image_files = os.listdir(input_dir) is_inpaint_batch = False if inpaint_mask_dir: - inpaint_masks = shared.listdir(inpaint_mask_dir) + inpaint_masks = os.listdir(inpaint_mask_dir) is_inpaint_batch = len(inpaint_masks) > 0 if is_inpaint_batch: shared.log.info(f"Process batch: inpaint batch masks={len(inpaint_masks)}") diff --git a/modules/modelloader.py b/modules/modelloader.py index 32a945c3e..d88df6b09 100644 --- a/modules/modelloader.py +++ b/modules/modelloader.py @@ -272,7 +272,7 @@ def load_diffusers_models(model_path: str, command_path: str = None, clear=True) if not os.path.isfile(os.path.join(cache_path, "hidden")): output.append(str(r.repo_id)) """ - for folder in shared.listdir(place): + for folder in os.listdir(place): try: if "--" not in folder: continue @@ -282,7 +282,7 @@ def load_diffusers_models(model_path: str, command_path: str = None, clear=True) name = name.replace("--", "/") folder = os.path.join(place, folder) friendly = os.path.join(place, name) - snapshots = shared.listdir(os.path.join(folder, "snapshots")) + snapshots = os.listdir(os.path.join(folder, "snapshots")) if len(snapshots) == 0: shared.log.warning(f"Diffusers folder has no snapshots: location={place} folder={folder} name={name}") continue @@ -579,7 +579,7 @@ def move_files(src_path: str, dest_path: str, ext_filter: str = None): if not os.path.exists(dest_path): os.makedirs(dest_path) if os.path.exists(src_path): - for file in shared.listdir(src_path): + for file in os.listdir(src_path): fullpath = os.path.join(src_path, file) if os.path.isfile(fullpath): if ext_filter is not None: diff --git a/modules/postprocessing.py b/modules/postprocessing.py index b5df4868e..dc2ae3d45 100644 --- a/modules/postprocessing.py +++ b/modules/postprocessing.py @@ -38,7 +38,7 @@ def run_postprocessing(extras_mode, image, image_folder: List[tempfile.NamedTemp elif extras_mode == 2: assert not shared.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled' assert input_dir, 'input directory not selected' - image_list = shared.listdir(input_dir) + image_list = os.listdir(input_dir) for filename in image_list: try: image = Image.open(filename) diff --git a/modules/script_callbacks.py b/modules/script_callbacks.py index aa71f8870..6454db2f2 100644 --- a/modules/script_callbacks.py +++ b/modules/script_callbacks.py @@ -1,3 +1,4 @@ +import os import sys import time from collections import namedtuple @@ -113,12 +114,19 @@ callback_map = dict( callbacks_on_reload=[], ) - +timers = {} def timer(t0: float, script, callback: str): t1 = time.time() - s = round(t1 - t0, 2) - if s > 0.1: - errors.log.debug(f'Script: {s} {callback} {script}') + k = f'{os.path.basename(script)}:{callback}' + if k not in timers: + timers[k] = 0 + timers[k] += t1 - t0 + + +def print_timers(): + for k, v in timers.items(): + if v > 0.05: + errors.log.debug(f'Script: time={v:.2f} {k}') def clear_callbacks(): diff --git a/modules/scripts.py b/modules/scripts.py index b44bfe51e..d6d920cf1 100644 --- a/modules/scripts.py +++ b/modules/scripts.py @@ -11,6 +11,7 @@ from installer import log AlwaysVisible = object() time_component = {} time_setup = {} +debug = log.trace if os.environ.get('SD_SCRIPT_DEBUG', None) is not None else lambda *args, **kwargs: None class PostprocessImageArgs: @@ -24,6 +25,7 @@ class PostprocessBatchListArgs: class Script: + parent = None name = None filename = None args_from = None @@ -172,11 +174,8 @@ class Script: def elem_id(self, item_id): """helper function to generate id for a HTML element, constructs final id out of script name, tab and user-supplied item_id""" - need_tabname = self.show(True) == self.show(False) - tabkind = 'img2img' if self.is_img2img else 'txt2txt' - tabname = f"{tabkind}_" if need_tabname else "" title = re.sub(r'[^a-z_0-9]', '', re.sub(r'\s', '_', self.title().lower())) - return f'script_{tabname}{title}_{item_id}' + return f'script_{self.parent}_{title}_{item_id}' current_basedir = paths.script_path @@ -224,7 +223,7 @@ def list_scripts(scriptdirname, extension): else: priority = priority + script.priority priority_list.append(ScriptFile(script.basedir, script.filename, script.path, priority)) - # log.debug(f'Adding script: {script.basedir} {script.filename} {script.path} {priority}') + debug(f'Adding script: {script.basedir} {script.filename} {script.path} {priority}') priority_sort = sorted(priority_list, key=lambda item: item.priority + item.path.lower(), reverse=False) return priority_sort @@ -255,7 +254,7 @@ def load_scripts(): for script_class in module.__dict__.values(): if type(script_class) != type: continue - # log.debug(f'Registering script: {scriptfile.path}') + debug(f'Registering script: {scriptfile.path}') if issubclass(script_class, Script): scripts_data.append(ScriptClassData(script_class, scriptfile.path, scriptfile.basedir, module)) elif issubclass(script_class, scripts_postprocessing.ScriptPostprocessing): @@ -356,7 +355,7 @@ class ScriptRunner: log.error(f'Script initialize: {path} {e}') """ - def create_script_ui(self, script): # TODO this is legacy implementation + def create_script_ui(self, script): import modules.api.models as api_models script.args_from = len(self.inputs) script.args_to = len(self.inputs) @@ -401,67 +400,69 @@ class ScriptRunner: def prepare_ui(self): self.inputs = [None] - def create_script_ui(self, script, inputs = [], inputs_alwayson = []): # noqa + def setup_ui(self, parent='unknown', accordion=True): import modules.api.models as api_models - script.args_from = len(inputs) - script.args_to = len(inputs) - controls = wrap_call(script.ui, script.filename, "ui", script.is_img2img) - if controls is None: - return - script.name = wrap_call(script.title, script.filename, "title", default=script.filename).lower() - api_args = [] - for control in controls: - if not isinstance(control, gr.components.IOComponent): - log.error(f'Invalid script control: "{script.filename}" control={control}') - continue - control.custom_script_source = os.path.basename(script.filename) - arg_info = api_models.ScriptArg(label=control.label or "") - for field in ("value", "minimum", "maximum", "step", "choices"): - v = getattr(control, field, None) - if v is not None: - setattr(arg_info, field, v) - api_args.append(arg_info) - - script.api_info = api_models.ScriptInfo( - name=script.name, - is_img2img=script.is_img2img, - is_alwayson=script.alwayson, - args=api_args, - ) - if script.infotext_fields is not None: - self.infotext_fields += script.infotext_fields - if script.paste_field_names is not None: - self.paste_field_names += script.paste_field_names - inputs += controls - inputs_alwayson += [script.alwayson for _ in controls] - script.args_to = len(inputs) - - def select_script(self, script_index): - selected_script = self.selectable_scripts[script_index - 1] if script_index > 0 else None - return [gr.update(visible=selected_script == s) for s in self.selectable_scripts] - - def init_field(self, title): - if title == 'None': # called when an initial value is set from ui-config.json to show script's UI components - return - script_index = self.titles.index(title) - self.selectable_scripts[script_index].group.visible = True - - def setup_ui(self, accordion=True): self.titles = [wrap_call(script.title, script.filename, "title") or f"{script.filename} [error]" for script in self.selectable_scripts] inputs = [] inputs_alwayson = [True] - dropdown = gr.Dropdown(label="Script", elem_id="script_list", choices=["None"] + self.titles, value="None", type="index") + def create_script_ui(script: Script, inputs, inputs_alwayson): + script.parent = parent + script.args_from = len(inputs) + script.args_to = len(inputs) + controls = wrap_call(script.ui, script.filename, "ui", script.is_img2img) + if controls is None: + return + script.name = wrap_call(script.title, script.filename, "title", default=script.filename).lower() + api_args = [] + for control in controls: + debug(f'Script control: parent={script.parent} script="{script.name}" label="{control.label}" type={control} id={control.elem_id}') + if not isinstance(control, gr.components.IOComponent): + log.error(f'Invalid script control: "{script.filename}" control={control}') + continue + control.custom_script_source = os.path.basename(script.filename) + arg_info = api_models.ScriptArg(label=control.label or "") + for field in ("value", "minimum", "maximum", "step", "choices"): + v = getattr(control, field, None) + if v is not None: + setattr(arg_info, field, v) + api_args.append(arg_info) + + script.api_info = api_models.ScriptInfo( + name=script.name, + is_img2img=script.is_img2img, + is_alwayson=script.alwayson, + args=api_args, + ) + if script.infotext_fields is not None: + self.infotext_fields += script.infotext_fields + if script.paste_field_names is not None: + self.paste_field_names += script.paste_field_names + inputs += controls + inputs_alwayson += [script.alwayson for _ in controls] + script.args_to = len(inputs) + + dropdown = gr.Dropdown(label="Script", elem_id=f'{parent}_script_list', choices=["None"] + self.titles, value="None", type="index") inputs.insert(0, dropdown) for script in self.selectable_scripts: with gr.Group(visible=False) as group: t0 = time.time() - self.create_script_ui(script, inputs, inputs_alwayson) + create_script_ui(script, inputs, inputs_alwayson) time_setup[script.title()] = time_setup.get(script.title(), 0) + (time.time()-t0) script.group = group - dropdown.init_field = self.init_field - dropdown.change(fn=self.select_script, inputs=[dropdown], outputs=[script.group for script in self.selectable_scripts]) + def select_script(script_index): + selected_script = self.selectable_scripts[script_index - 1] if script_index > 0 else None + return [gr.update(visible=selected_script == s) for s in self.selectable_scripts] + + def init_field(title): + if title == 'None': # called when an initial value is set from ui-config.json to show script's UI components + return + script_index = self.titles.index(title) + self.selectable_scripts[script_index].group.visible = True + + dropdown.init_field = init_field + dropdown.change(fn=select_script, inputs=[dropdown], outputs=[script.group for script in self.selectable_scripts]) def onload_script_visibility(params): title = params.get('Script', None) @@ -473,13 +474,11 @@ class ScriptRunner: else: return gr.update(visible=False) - # with gr.Group(elem_id='scripts_alwayson_img2img' if self.is_img2img else 'scripts_alwayson_txt2img'): - with gr.Accordion(label="Extensions", elem_id='scripts_alwayson_img2img' if self.is_img2img else 'scripts_alwayson_txt2img') if accordion else gr.Group(): + with gr.Accordion(label="Extensions", elem_id=f'{parent}_script_alwayson') if accordion else gr.Group(): for script in self.alwayson_scripts: t0 = time.time() - elem_id = f'script_{"txt2img" if script.is_txt2img else "img2img"}_{script.title().lower().replace(" ", "_")}' - with gr.Group(elem_id=elem_id, elem_classes=['extension-script']) as group: - self.create_script_ui(script, inputs, inputs_alwayson) + with gr.Group(elem_id=f'{parent}_script_{script.title().lower().replace(" ", "_")}', elem_classes=['extension-script']) as group: + create_script_ui(script, inputs, inputs_alwayson) script.group = group time_setup[script.title()] = time_setup.get(script.title(), 0) + (time.time()-t0) diff --git a/modules/textual_inversion/preprocess.py b/modules/textual_inversion/preprocess.py index 220e23077..a2ef4c6e6 100644 --- a/modules/textual_inversion/preprocess.py +++ b/modules/textual_inversion/preprocess.py @@ -135,7 +135,7 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre overlap_ratio = max(0.0, min(0.9, overlap_ratio)) assert src != dst, 'same directory specified as source and destination' os.makedirs(dst, exist_ok=True) - files = shared.listdir(src) + files = os.listdir(src) shared.state.job = "preprocess" shared.state.textinfo = "Preprocessing..." shared.state.job_count = len(files) diff --git a/modules/ui_control.py b/modules/ui_control.py index 47de774ff..0c81c5070 100644 --- a/modules/ui_control.py +++ b/modules/ui_control.py @@ -11,9 +11,8 @@ from modules.control.units import xs # vislearn ControlNet-XS from modules.control.units import lite # vislearn ControlNet-XS from modules.control.units import t2iadapter # TencentARC T2I-Adapter from modules.control.units import reference # reference pipeline -from modules.control.units import ipadapter # reference pipeline -from modules import errors, shared, progress, sd_samplers, ui_components, ui_symbols, ui_common, ui_sections, generation_parameters_copypaste, call_queue, scripts -from modules.ui_components import FormGroup +from scripts import ipadapter # pylint: disable=no-name-in-module +from modules import errors, shared, progress, sd_samplers, ui_components, ui_symbols, ui_common, ui_sections, generation_parameters_copypaste, call_queue, scripts # pylint: disable=ungrouped-imports gr_height = 512 @@ -29,7 +28,7 @@ busy = False # used to synchronize select_input and generate_click def initialize(): from modules import devices - shared.log.debug(f'Control initialize: models={shared.opts.control_dir}') + shared.log.debug(f'UI initialize: control models={shared.opts.control_dir}') controlnet.cache_dir = os.path.join(shared.opts.control_dir, 'controlnet') xs.cache_dir = os.path.join(shared.opts.control_dir, 'xs') lite.cache_dir = os.path.join(shared.opts.control_dir, 'lite') @@ -44,7 +43,7 @@ def initialize(): os.makedirs(t2iadapter.cache_dir, exist_ok=True) os.makedirs(processors.cache_dir, exist_ok=True) scripts.scripts_current = scripts.scripts_control - scripts.scripts_control.initialize_scripts(is_img2img=True) + scripts.scripts_current.initialize_scripts(is_img2img=True) def return_controls(res): @@ -70,9 +69,7 @@ def generate_click(job_id: str, active_tab: str, *args): while busy: time.sleep(0.01) from modules.control.run import control_run - shared.log.debug(f'Control: tab={active_tab} job={job_id} args={args}') - if active_tab not in ['controlnet', 'xs', 'adapter', 'reference', 'lite']: - return None, None, None, None, f'Control: Unknown mode: {active_tab} args={args}' + shared.log.debug(f'Control: tab="{active_tab}" job={job_id} args={args}') shared.state.begin('control') progress.add_task_to_queue(job_id) with call_queue.queue_lock: @@ -310,8 +307,6 @@ def create_ui(_blocks: gr.Blocks=None): with gr.Row(elem_id='control_settings'): with gr.Accordion(open=False, label="Input", elem_id="control_input", elem_classes=["small-accordion"]): - with gr.Row(): - show_ip = gr.Checkbox(label="Enable IP adapter", value=False, elem_id="control_show_ip") with gr.Row(): show_preview = gr.Checkbox(label="Show preview", value=True, elem_id="control_show_preview") with gr.Row(): @@ -350,7 +345,7 @@ def create_ui(_blocks: gr.Blocks=None): video_type.change(fn=video_type_change, inputs=[video_type], outputs=[video_duration, video_loop, video_pad, video_interpolate]) with gr.Accordion(open=False, label="Extensions", elem_id="control_extensions", elem_classes=["small-accordion"]): - input_script_args = scripts.scripts_control.setup_ui(accordion=False) + input_script_args = scripts.scripts_current.setup_ui(parent='control', accordion=False) with gr.Row(): override_settings = ui_common.create_override_inputs('control') @@ -391,17 +386,6 @@ def create_ui(_blocks: gr.Blocks=None): init_batch = gr.File(label="Input", show_label=False, file_count='multiple', file_types=['image'], type='file', interactive=True, height=gr_height) with gr.Tab('Folder', id='init-folder') as tab_folder_init: init_folder = gr.File(label="Input", show_label=False, file_count='directory', file_types=['image'], type='file', interactive=True, height=gr_height) - with gr.Column(scale=9, elem_id='control-init-column', visible=False) as column_ip: - gr.HTML('IP Adapter

') - with gr.Tabs(elem_classes=['control-tabs'], elem_id='control-tab-ip'): - with gr.Tab('Image', id='init-image') as tab_image_init: - ip_image = gr.Image(label="Input", show_label=False, type="pil", source="upload", interactive=True, tool="editor", height=gr_height) - with gr.Row(): - ip_adapter = gr.Dropdown(label='Adapter', choices=ipadapter.ADAPTERS, value='none') - ip_scale = gr.Slider(label='Scale', minimum=0.0, maximum=1.0, step=0.01, value=0.5) - with gr.Row(): - ip_type = gr.Radio(label="Input type", choices=['Init image same as control', 'Separate init image'], value='Init image same as control', type='index', elem_id='control_ip_type') - ip_image.change(fn=lambda x: gr.update(value='Init image same as control' if x is None else 'Separate init image'), inputs=[ip_image], outputs=[ip_type]) with gr.Column(scale=9, elem_id='control-output-column', visible=True) as _column_output: gr.HTML('Output

') with gr.Tabs(elem_classes=['control-tabs'], elem_id='control-tab-output') as output_tabs: @@ -464,7 +448,16 @@ def create_ui(_blocks: gr.Blocks=None): units[-1].enabled = True # enable first unit in group num_controlnet_units.change(fn=display_units, inputs=[num_controlnet_units], outputs=controlnet_ui_units) - with gr.Tab('Adapter') as _tab_adapter: + with gr.Tab('IP Adapter') as _tab_ipadapter: + with gr.Row(): + with gr.Column(): + gr.HTML('T2I-Adapter') + ip_adapter = gr.Dropdown(label='Adapter', choices=ipadapter.ADAPTERS, value='none') + ip_scale = gr.Slider(label='Scale', minimum=0.0, maximum=1.0, step=0.01, value=0.5) + with gr.Column(): + ip_image = gr.Image(label="Input", show_label=False, type="pil", source="upload", interactive=True, tool="editor", height=256, width=256) + + with gr.Tab('T2I Adapter') as _tab_t2iadapter: gr.HTML('T2I-Adapter') with gr.Row(): extra_controls = [ @@ -680,7 +673,6 @@ def create_ui(_blocks: gr.Blocks=None): btn.click(fn=transfer_input, inputs=[btn], outputs=[input_image, input_resize, input_inpaint] + input_buttons) show_preview.change(fn=lambda x: gr.update(visible=x), inputs=[show_preview], outputs=[column_preview]) - show_ip.change(fn=lambda x: gr.update(visible=x), inputs=[show_ip], outputs=[column_ip]) input_type.change(fn=lambda x: gr.update(visible=x == 2), inputs=[input_type], outputs=[column_init]) btn_prompt_counter.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[prompt, steps], outputs=[prompt_counter]) btn_negative_counter.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[negative, steps], outputs=[negative_counter]) @@ -714,7 +706,7 @@ def create_ui(_blocks: gr.Blocks=None): resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after, denoising_strength, batch_count, batch_size, mask_blur, mask_overlap, video_skip_frames, video_type, video_duration, video_loop, video_pad, video_interpolate, - ip_adapter, ip_scale, ip_image, ip_type, + ip_adapter, ip_scale, ip_image, ] output_fields = [ preview_process, diff --git a/modules/ui_extra_networks_textual_inversion.py b/modules/ui_extra_networks_textual_inversion.py index 3d9c17da1..d94ce39dc 100644 --- a/modules/ui_extra_networks_textual_inversion.py +++ b/modules/ui_extra_networks_textual_inversion.py @@ -48,7 +48,7 @@ class ExtraNetworksPageTextualInversion(ui_extra_networks.ExtraNetworksPage): def list_items(self): def list_folder(folder): - for filename in shared.listdir(folder): + for filename in os.listdir(folder): fn = os.path.join(folder, filename) if os.path.isfile(fn) and (fn.lower().endswith(".pt") or fn.lower().endswith(".safetensors")): embedding = Embedding(vec=0, name=os.path.basename(fn), filename=fn) diff --git a/modules/ui_img2img.py b/modules/ui_img2img.py index 7cf2e45b2..f3d29065a 100644 --- a/modules/ui_img2img.py +++ b/modules/ui_img2img.py @@ -20,7 +20,7 @@ def process_interrogate(interrogation_function, mode, ii_input_files, ii_input_d if not os.path.isdir(ii_input_dir): shared.log.error(f"Interrogate: Input directory not found: {ii_input_dir}") return [gr.update(), None] - images = shared.listdir(ii_input_dir) + images = os.listdir(ii_input_dir) if ii_output_dir != "": os.makedirs(ii_output_dir, exist_ok=True) else: @@ -48,6 +48,7 @@ def interrogate_deepbooru(image): def create_ui(): + shared.log.debug('UI initialize: img2img') import modules.img2img # pylint: disable=redefined-outer-name modules.scripts.scripts_current = modules.scripts.scripts_img2img modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True) @@ -169,7 +170,7 @@ def create_ui(): override_settings = ui_common.create_override_inputs('img2img') with FormGroup(elem_id="img2img_script_container"): - img2img_script_inputs = modules.scripts.scripts_img2img.setup_ui() + img2img_script_inputs = modules.scripts.scripts_img2img.setup_ui(parent='img2img', accordion=True) img2img_gallery, img2img_generation_info, img2img_html_info, _img2img_html_info_formatted, img2img_html_log = ui_common.create_output_panel("img2img") diff --git a/modules/ui_interrogate.py b/modules/ui_interrogate.py index 196cfee68..98fbe9a2f 100644 --- a/modules/ui_interrogate.py +++ b/modules/ui_interrogate.py @@ -119,7 +119,7 @@ def batch_process(batch_files, batch_folder, batch_str, mode, clip_model, write) if batch_folder is not None: files += [f.name for f in batch_folder] if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str): - files += [os.path.join(batch_str, f) for f in shared.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))] + files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))] if len(files) == 0: shared.log.error('Interrogate batch no images') return '' diff --git a/modules/ui_symbols.py b/modules/ui_symbols.py index 509df747b..2db3e6dc5 100644 --- a/modules/ui_symbols.py +++ b/modules/ui_symbols.py @@ -21,6 +21,10 @@ info = 'ℹ' # noqa reset = '🔄' upload = '⬆️' preview = '🔍' +mark_diag = '※' +mark_flag = '⁜' +int_clip = '✎' +int_blip = '✐' """ refresh = '🔄' close = '🛗' diff --git a/modules/ui_txt2img.py b/modules/ui_txt2img.py index eac47b4b7..224443f10 100644 --- a/modules/ui_txt2img.py +++ b/modules/ui_txt2img.py @@ -16,6 +16,7 @@ def calc_resolution_hires(width, height, hr_scale, hr_resize_x, hr_resize_y, hr_ def create_ui(): + shared.log.debug('UI initialize: txt2img') import modules.txt2img # pylint: disable=redefined-outer-name modules.scripts.scripts_current = modules.scripts.scripts_txt2img modules.scripts.scripts_txt2img.initialize_scripts(is_img2img=False) @@ -48,7 +49,7 @@ def create_ui(): enable_hr, hr_sampler_index, denoising_strength, hr_final_resolution, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img') override_settings = ui_common.create_override_inputs('txt2img') - txt2img_script_inputs = modules.scripts.scripts_txt2img.setup_ui() + txt2img_script_inputs = modules.scripts.scripts_txt2img.setup_ui(parent='txt2img', accordion=True) hr_resolution_preview_inputs = [width, height, hr_scale, hr_resize_x, hr_resize_y, hr_upscaler] for preview_input in hr_resolution_preview_inputs: diff --git a/modules/upscaler.py b/modules/upscaler.py index 37b5fde93..3e982d1b0 100644 --- a/modules/upscaler.py +++ b/modules/upscaler.py @@ -52,7 +52,7 @@ class Upscaler: pass def find_folder(self, folder, scalers, loaded): - for fn in modules.shared.listdir(folder): # from folder + for fn in os.listdir(folder): # from folder file_name = os.path.join(folder, fn) if os.path.isdir(file_name): self.find_folder(file_name, scalers, loaded) diff --git a/scripts/ipadapter.py b/scripts/ipadapter.py index 3a77e2258..0527e1a59 100644 --- a/scripts/ipadapter.py +++ b/scripts/ipadapter.py @@ -34,6 +34,104 @@ ADAPTERS = { } +def apply(pipe, p: processing.StableDiffusionProcessing, adapter_name, scale, image): # pylint: disable=arguments-differ + # overrides + adapter = ADAPTERS.get(adapter_name, None) + if hasattr(p, 'ip_adapter_name'): + adapter = p.ip_adapter_name + if hasattr(p, 'ip_adapter_scale'): + scale = p.ip_adapter_scale + if hasattr(p, 'ip_adapter_image'): + image = p.ip_adapter_image + if adapter is None: + return False + # init code + global loaded, checkpoint, image_encoder, image_encoder_type, image_encoder_name # pylint: disable=global-statement + if pipe is None: + return False + if shared.backend != shared.Backend.DIFFUSERS: + shared.log.warning('IP adapter: not in diffusers mode') + return + if image is None and adapter != 'none': + shared.log.error('IP adapter: no image provided') + adapter = 'none' # unload adapter if previously loaded as it will cause runtime errors + if adapter == 'none': + if hasattr(pipe, 'set_ip_adapter_scale'): + pipe.set_ip_adapter_scale(0) + if loaded is not None: + shared.log.debug('IP adapter: unload attention processor') + pipe.unet.config.encoder_hid_dim_type = None + loaded = None + return False + if not hasattr(pipe, 'load_ip_adapter'): + import diffusers + diffusers.StableDiffusionPipeline.load_ip_adapter() + shared.log.error(f'IP adapter: pipeline not supported: {pipe.__class__.__name__}') + return False + + # which clip to use + if 'ViT' not in adapter_name: + clip_repo = base_repo + subfolder = 'models/image_encoder' if shared.sd_model_type == 'sd' else 'sdxl_models/image_encoder' # defaults per model + elif 'ViT-H' in adapter_name: + clip_repo = base_repo + subfolder = 'models/image_encoder' # this is vit-h + elif 'ViT-G' in adapter_name: + clip_repo = base_repo + subfolder = 'sdxl_models/image_encoder' # this is vit-g + else: + shared.log.error(f'IP adapter: unknown model type: {adapter_name}') + return False + + # load image encoder used by ip adapter + if getattr(pipe, 'image_encoder', None) is None or image_encoder_name != clip_repo + '/' + subfolder: + if image_encoder is None or image_encoder_type != shared.sd_model_type or checkpoint != shared.opts.sd_model_checkpoint or image_encoder_name != clip_repo + '/' + subfolder: + if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl': + shared.log.error(f'IP adapter: unsupported model type: {shared.sd_model_type}') + return False + try: + from transformers import CLIPVisionModelWithProjection + shared.log.debug(f'IP adapter: load image encoder: {clip_repo}/{subfolder}') + image_encoder = CLIPVisionModelWithProjection.from_pretrained(clip_repo, subfolder=subfolder, torch_dtype=devices.dtype, cache_dir=shared.opts.diffusers_dir, use_safetensors=True).to(devices.device) + image_encoder_type = shared.sd_model_type + image_encoder_name = clip_repo + '/' + subfolder + except Exception as e: + shared.log.error(f'IP adapter: failed to load image encoder: {e}') + return + if getattr(pipe, 'feature_extractor', None) is None: + from transformers import CLIPImageProcessor + shared.log.debug('IP adapter: load feature extractor') + pipe.feature_extractor = CLIPImageProcessor() + + # main code + # subfolder = 'models' if 'sd15' in adapter else 'sdxl_models' + if adapter != loaded or getattr(pipe.unet.config, 'encoder_hid_dim_type', None) is None or checkpoint != shared.opts.sd_model_checkpoint: + t0 = time.time() + if loaded is not None: + shared.log.debug('IP adapter: reset attention processor') + loaded = None + else: + shared.log.debug('IP adapter: load attention processor') + pipe.image_encoder = image_encoder + subfolder = 'models' if shared.sd_model_type == 'sd' else 'sdxl_models' + pipe.load_ip_adapter(base_repo, subfolder=subfolder, weight_name=adapter) + t1 = time.time() + shared.log.info(f'IP adapter load: adapter="{adapter}" scale={scale} image={image} time={t1-t0:.2f}') + loaded = adapter + checkpoint = shared.opts.sd_model_checkpoint + else: + shared.log.debug(f'IP adapter cache: adapter="{adapter}" scale={scale} image={image}') + pipe.set_ip_adapter_scale(scale) + + if isinstance(image, str): + from modules.api.api import decode_base64_to_image + image = decode_base64_to_image(image) + + p.task_args['ip_adapter_image'] = p.batch_size * [image] + p.extra_generation_params["IP Adapter"] = f'{adapter}:{scale}' + return True + + class Script(scripts.Script): def title(self): return 'IP Adapter' @@ -51,97 +149,4 @@ class Script(scripts.Script): return [adapter, scale, image] def process(self, p: processing.StableDiffusionProcessing, adapter_name, scale, image): # pylint: disable=arguments-differ - # overrides - adapter = ADAPTERS.get(adapter_name, None) - if hasattr(p, 'ip_adapter_name'): - adapter = p.ip_adapter_name - if hasattr(p, 'ip_adapter_scale'): - scale = p.ip_adapter_scale - if hasattr(p, 'ip_adapter_image'): - image = p.ip_adapter_image - if adapter is None: - return - # init code - global loaded, checkpoint, image_encoder, image_encoder_type, image_encoder_name # pylint: disable=global-statement - if shared.sd_model is None: - return - if shared.backend != shared.Backend.DIFFUSERS: - shared.log.warning('IP adapter: not in diffusers mode') - return - if image is None and adapter != 'none': - shared.log.error('IP adapter: no image provided') - adapter = 'none' # unload adapter if previously loaded as it will cause runtime errors - if adapter == 'none': - if hasattr(shared.sd_model, 'set_ip_adapter_scale'): - shared.sd_model.set_ip_adapter_scale(0) - if loaded is not None: - shared.log.debug('IP adapter: unload attention processor') - shared.sd_model.unet.config.encoder_hid_dim_type = None - loaded = None - return - if not hasattr(shared.sd_model, 'load_ip_adapter'): - import diffusers - diffusers.StableDiffusionPipeline.load_ip_adapter() - shared.log.error(f'IP adapter: pipeline not supported: {shared.sd_model.__class__.__name__}') - return - - # which clip to use - if 'ViT' not in adapter_name: - clip_repo = base_repo - subfolder = 'models/image_encoder' if shared.sd_model_type == 'sd' else 'sdxl_models/image_encoder' # defaults per model - elif 'ViT-H' in adapter_name: - clip_repo = base_repo - subfolder = 'models/image_encoder' # this is vit-h - elif 'ViT-G' in adapter_name: - clip_repo = base_repo - subfolder = 'sdxl_models/image_encoder' # this is vit-g - else: - shared.log.error(f'IP adapter: unknown model type: {adapter_name}') - return - - # load image encoder used by ip adapter - if getattr(shared.sd_model, 'image_encoder', None) is None or image_encoder_name != clip_repo + '/' + subfolder: - if image_encoder is None or image_encoder_type != shared.sd_model_type or checkpoint != shared.opts.sd_model_checkpoint or image_encoder_name != clip_repo + '/' + subfolder: - if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl': - shared.log.error(f'IP adapter: unsupported model type: {shared.sd_model_type}') - return - try: - from transformers import CLIPVisionModelWithProjection - shared.log.debug(f'IP adapter: load image encoder: {clip_repo}/{subfolder}') - image_encoder = CLIPVisionModelWithProjection.from_pretrained(clip_repo, subfolder=subfolder, torch_dtype=devices.dtype, cache_dir=shared.opts.diffusers_dir, use_safetensors=True).to(devices.device) - image_encoder_type = shared.sd_model_type - image_encoder_name = clip_repo + '/' + subfolder - except Exception as e: - shared.log.error(f'IP adapter: failed to load image encoder: {e}') - return - if getattr(shared.sd_model, 'feature_extractor', None) is None: - from transformers import CLIPImageProcessor - shared.log.debug('IP adapter: load feature extractor') - shared.sd_model.feature_extractor = CLIPImageProcessor() - - # main code - # subfolder = 'models' if 'sd15' in adapter else 'sdxl_models' - if adapter != loaded or getattr(shared.sd_model.unet.config, 'encoder_hid_dim_type', None) is None or checkpoint != shared.opts.sd_model_checkpoint: - t0 = time.time() - if loaded is not None: - shared.log.debug('IP adapter: reset attention processor') - loaded = None - else: - shared.log.debug('IP adapter: load attention processor') - shared.sd_model.image_encoder = image_encoder - subfolder = 'models' if shared.sd_model_type == 'sd' else 'sdxl_models' - shared.sd_model.load_ip_adapter(base_repo, subfolder=subfolder, weight_name=adapter) - t1 = time.time() - shared.log.info(f'IP adapter load: adapter="{adapter}" scale={scale} image={image} time={t1-t0:.2f}') - loaded = adapter - checkpoint = shared.opts.sd_model_checkpoint - else: - shared.log.debug(f'IP adapter cache: adapter="{adapter}" scale={scale} image={image}') - shared.sd_model.set_ip_adapter_scale(scale) - - if isinstance(image, str): - from modules.api.api import decode_base64_to_image - image = decode_base64_to_image(image) - - p.task_args['ip_adapter_image'] = p.batch_size * [image] - p.extra_generation_params["IP Adapter"] = f'{adapter}:{scale}' + apply(shared.sd_model, p, adapter_name, scale, image) diff --git a/webui.py b/webui.py index 000b33f7a..8b87989b8 100644 --- a/webui.py +++ b/webui.py @@ -311,13 +311,13 @@ def webui(restart=False): log.info(f"Startup time: {timer.startup.summary()}") debug = log.trace if os.environ.get('SD_SCRIPT_DEBUG', None) is not None else lambda *args, **kwargs: None debug('Trace: SCRIPTS') - debug('Loaded scripts:') for m in modules.scripts.scripts_data: debug(f' {m}') debug('Loaded postprocessing scripts:') for m in modules.scripts.postprocessing_scripts_data: debug(f' {m}') timer.startup.reset() + modules.script_callbacks.print_timers() if not restart: # override all loggers to use the same handlers as the main logger