fix folder enum and refactor control ipadapter

pull/2701/head
Vladimir Mandic 2024-01-07 16:06:03 -05:00
parent 119424399f
commit d2940a05d1
21 changed files with 237 additions and 309 deletions

View File

@ -21,6 +21,9 @@ And it also includes fixes for all reported issues so far
since hires is only used for txt2img, control reuses existing resize functionality
any image size is used as txt2img target size
but if resize scale is also set its used to additionally upscale image after initial txt2img and for hires pass
- add support for **scripts** and **extensions**
you can now combine control workflow with your favorite script or extension
*note* extensions that are hard-coded for txt2img or img2img tabs may not work until they are updated
- add **marigold** depth map processor
this is state-of-the-art depth estimation model, but its quite heavy on resources
- add **openpose xl** controlnet
@ -30,6 +33,7 @@ And it also includes fixes for all reported issues so far
- more compact unit layout
- reduce usage of temp files
- add context menu to action buttons
- move ip-adapter implementation to control tabs
- resize by now applies to input image or frame individually
allows for processing where input images are of different sizes
- fix input image size
@ -38,15 +42,17 @@ And it also includes fixes for all reported issues so far
- fix batch/folder/video modes
- fix pipeline switching between different modes
- [FaceID](https://huggingface.co/h94/IP-Adapter-FaceID)
full implementation for *SD15* and *SD-XL*, to use simply select from *Scripts*
- **Base** (93MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-H-14* (2.5GB) as image encoder
- **SXDL** (1022MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-bigG-14* (3.7GB) as image encoder
- **Plus** (150MB) uses *InsightFace* to generate face embeds and *CLIP-ViT-H-14-laion2B* (3.8GB) as image encoder
*note*: all models are downloaded on first use
- full implementation for *SD15* and *SD-XL*, to use simply select from *Scripts*
**Base** (93MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-H-14* (2.5GB) as image encoder
**SXDL** (1022MB) uses *InsightFace* to generate face embeds and *OpenCLIP-ViT-bigG-14* (3.7GB) as image encoder
**Plus** (150MB) uses *InsightFace* to generate face embeds and *CLIP-ViT-H-14-laion2B* (3.8GB) as image encoder
- *note*: all models are downloaded on first use
- enable use via api, thanks @trojaner
- [IPAdapter](https://huggingface.co/h94/IP-Adapter)
additional models for *SD15* and *SD-XL*, to use simply select from *Scripts*:
- **SD15**: Base, Base ViT-G, Light, Plus, Plus Face, Full Face
- **SDXL**: Base SXDL, Base ViT-H SXDL, Plus ViT-H SXDL, Plus Face ViT-H SXDL
- additional models for *SD15* and *SD-XL*, to use simply select from *Scripts*:
**SD15**: Base, Base ViT-G, Light, Plus, Plus Face, Full Face
**SDXL**: Base SXDL, Base ViT-H SXDL, Plus ViT-H SXDL, Plus Face ViT-H SXDL
- enable use via api, thanks @trojaner
- **Improvements**
- **ui**
- globally configurable font size
@ -111,6 +117,7 @@ And it also includes fixes for all reported issues so far
- img2img: clip and blip interrogate
- img2img: sampler selection offset
- api: return current image in progress api if requested
- api: sanitize response object
- sampler: guard against invalid sampler index
- config: reset default cfg scale to 6.0
- processing: correct display metadata

View File

@ -14,8 +14,8 @@ from modules.control.units import xs # VisLearn ControlNet-XS
from modules.control.units import lite # Kohya ControlLLLite
from modules.control.units import t2iadapter # TencentARC T2I-Adapter
from modules.control.units import reference # ControlNet-Reference
from modules.control.units import ipadapter # IP-Adapter
from modules import devices, shared, errors, processing, images, sd_models, scripts
from scripts import ipadapter # pylint: disable=no-name-in-module
from modules import devices, shared, errors, processing, images, sd_models, scripts # pylint: disable=ungrouped-imports
debug = shared.log.trace if os.environ.get('SD_CONTROL_DEBUG', None) is not None else lambda *args, **kwargs: None
@ -82,7 +82,7 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_
resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after,
denoising_strength, batch_count, batch_size, mask_blur, mask_overlap,
video_skip_frames, video_type, video_duration, video_loop, video_pad, video_interpolate,
ip_adapter, ip_scale, ip_image, ip_type,
ip_adapter, ip_scale, ip_image,
*input_script_args
):
global pipe, original_pipeline # pylint: disable=global-statement
@ -209,7 +209,7 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_
debug(f'Control: run type={unit_type} models={has_models}')
if unit_type == 'adapter' and has_models:
p.extra_generation_params["Control mode"] = 'Adapter'
p.extra_generation_params["Control mode"] = 'T2I-Adapter'
p.extra_generation_params["Control conditioning"] = use_conditioning
p.task_args['adapter_conditioning_scale'] = use_conditioning
instance = t2iadapter.AdapterPipeline(selected_models, shared.sd_model)
@ -255,11 +255,10 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_
pipe = instance.pipeline
if inits is not None:
shared.log.warning('Control: ControlNet-XS does not support separate init image')
else: # run in img2img mode
else: # run in txt2img/img2img mode
if len(active_strength) > 0:
p.strength = active_strength[0]
pipe = diffusers.AutoPipelineForText2Image.from_pipe(shared.sd_model) # use set_diffuser_pipe
# pipe = diffusers.AutoPipelineForImage2Image.from_pipe(shared.sd_model) # use set_diffuser_pipe
instance = None
debug(f'Control pipeline: class={pipe.__class__} args={vars(p)}')
@ -280,9 +279,6 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_
debug(f'Control device={devices.device} dtype={devices.dtype}')
sd_models.copy_diffuser_options(shared.sd_model, original_pipeline) # copy options from original pipeline
sd_models.set_diffuser_options(shared.sd_model)
if ipadapter.apply_ip_adapter(shared.sd_model, p, ip_adapter, ip_scale, ip_image, reset=True):
original_pipeline.feature_extractor = shared.sd_model.feature_extractor
original_pipeline.image_encoder = shared.sd_model.image_encoder
try:
with devices.inference_context():
@ -429,9 +425,6 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_
else:
p.task_args['image'] = init_image
if ip_type == 1 and ip_adapter != 'none':
p.task_args['ip_adapter_image'] = input_image
if is_generator:
image_txt = f'{processed_image.width}x{processed_image.height}' if processed_image is not None else 'None'
msg = f'process | {index} of {frames if video is not None else len(inputs)} | {"Image" if video is None else "Frame"} {image_txt}'
@ -477,6 +470,11 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_
if unit_type == 'lite':
instance.apply(selected_models, p.image, use_conditioning)
# ip adapter
if ipadapter.apply(shared.sd_model, p, ip_adapter, ip_scale, ip_image or input_image):
original_pipeline.feature_extractor = shared.sd_model.feature_extractor
original_pipeline.image_encoder = shared.sd_model.image_encoder
# pipeline
output = None
if pipe is not None: # run new pipeline
@ -485,7 +483,6 @@ def control_run(units: List[unit.Unit], inputs, inits, mask, unit_type: str, is_
debug(f'Control exec pipeline: args={p.task_args} image={p.task_args.get("image", None)} control={p.task_args.get("control_image", None)} mask={p.task_args.get("mask_image", None)} ref={p.task_args.get("ref_image", None)}')
p.scripts = scripts.scripts_control
p.script_args = input_script_args
print('HERE', p.script_args)
processed = p.scripts.run(p, *input_script_args)
if processed is None:
processed: processing.Processed = processing.process_images(p) # run actual pipeline

View File

@ -119,6 +119,8 @@ class Unit(): # mashup of gradio controls and mapping to actual implementation c
self.controlnet = lite.ControlLLLite(device=default_device, dtype=default_dtype)
elif self.type == 'reference':
pass
elif self.type == 'ip':
pass
else:
log.error(f'Control unknown type: unit={unit_type}')
return

View File

@ -1,88 +0,0 @@
import time
from PIL import Image
from modules import shared, processing, devices
image_encoder = None
image_encoder_type = None
loaded = None
ADAPTERS = [
'none',
'ip-adapter_sd15',
'ip-adapter_sd15_light',
'ip-adapter-plus_sd15',
'ip-adapter-plus-face_sd15',
'ip-adapter-full-face_sd15',
# 'models/ip-adapter_sd15_vit-G', # RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x1024 and 1280x3072)
'ip-adapter_sdxl',
# 'sdxl_models/ip-adapter_sdxl_vit-h',
# 'sdxl_models/ip-adapter-plus_sdxl_vit-h',
# 'sdxl_models/ip-adapter-plus-face_sdxl_vit-h',
]
def apply_ip_adapter(pipe, p: processing.StableDiffusionProcessing, adapter, scale, image, reset=False): # pylint: disable=arguments-differ
from transformers import CLIPVisionModelWithProjection
# overrides
if hasattr(p, 'ip_adapter_name'):
adapter = p.ip_adapter_name
if hasattr(p, 'ip_adapter_scale'):
scale = p.ip_adapter_scale
if hasattr(p, 'ip_adapter_image'):
image = p.ip_adapter_image
# init code
global loaded, image_encoder, image_encoder_type # pylint: disable=global-statement
if pipe is None:
return
if shared.backend != shared.Backend.DIFFUSERS:
shared.log.warning('IP adapter: not in diffusers mode')
return False
if adapter == 'none':
if hasattr(pipe, 'set_ip_adapter_scale'):
pipe.set_ip_adapter_scale(0)
if loaded is not None:
shared.log.debug('IP adapter: unload attention processor')
pipe.unet.config.encoder_hid_dim_type = None
loaded = None
return False
if image is None:
image = Image.new('RGB', (512, 512), (0, 0, 0))
if not hasattr(pipe, 'load_ip_adapter'):
shared.log.error(f'IP adapter: pipeline not supported: {pipe.__class__.__name__}')
return False
if getattr(pipe, 'image_encoder', None) is None or getattr(pipe, 'image_encoder', None) == (None, None):
if shared.sd_model_type == 'sd':
subfolder = 'models/image_encoder'
elif shared.sd_model_type == 'sdxl':
subfolder = 'sdxl_models/image_encoder'
else:
shared.log.error(f'IP adapter: unsupported model type: {shared.sd_model_type}')
return False
if image_encoder is None or image_encoder_type != shared.sd_model_type:
try:
image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder=subfolder, torch_dtype=devices.dtype, cache_dir=shared.opts.diffusers_dir, use_safetensors=True).to(devices.device)
image_encoder_type = shared.sd_model_type
except Exception as e:
shared.log.error(f'IP adapter: failed to load image encoder: {e}')
return False
pipe.image_encoder = image_encoder
# main code
subfolder = 'models' if 'sd15' in adapter else 'sdxl_models'
if adapter != loaded or getattr(pipe.unet.config, 'encoder_hid_dim_type', None) is None or reset:
t0 = time.time()
if loaded is not None:
# shared.log.debug('IP adapter: reset attention processor')
loaded = None
else:
shared.log.debug('IP adapter: load attention processor')
pipe.load_ip_adapter("h94/IP-Adapter", subfolder=subfolder, weight_name=f'{adapter}.safetensors')
t1 = time.time()
shared.log.info(f'IP adapter load: adapter="{adapter}" scale={scale} image={image} time={t1-t0:.2f}')
loaded = adapter
else:
shared.log.debug(f'IP adapter cache: adapter="{adapter}" scale={scale} image={image}')
pipe.set_ip_adapter_scale(scale)
p.task_args['ip_adapter_image'] = p.batch_size * [image]
p.extra_generation_params["IP Adapter"] = f'{adapter}:{scale}'
return True

View File

@ -284,7 +284,7 @@ class Hypernetwork:
def list_hypernetworks(path):
res = {}
def list_folder(folder):
for filename in shared.listdir(folder):
for filename in os.listdir(folder):
fn = os.path.join(folder, filename)
if os.path.isfile(fn) and fn.lower().endswith(".pt"):
name = os.path.splitext(os.path.basename(fn))[0]

View File

@ -504,7 +504,7 @@ def get_next_sequence_number(path, basename):
prefix_length = len(basename)
if not os.path.isdir(path):
return 0
for p in shared.listdir(path):
for p in os.listdir(path):
if p.startswith(basename):
parts = os.path.splitext(p[prefix_length:])[0].split('-') # splits the filename (removing the basename first if one is defined, so the sequence number is always the first element)
try:

View File

@ -22,10 +22,10 @@ def process_batch(p, input_files, input_dir, output_dir, inpaint_mask_dir, args)
if not os.path.isdir(input_dir):
shared.log.error(f"Process batch: directory not found: {input_dir}")
return
image_files = shared.listdir(input_dir)
image_files = os.listdir(input_dir)
is_inpaint_batch = False
if inpaint_mask_dir:
inpaint_masks = shared.listdir(inpaint_mask_dir)
inpaint_masks = os.listdir(inpaint_mask_dir)
is_inpaint_batch = len(inpaint_masks) > 0
if is_inpaint_batch:
shared.log.info(f"Process batch: inpaint batch masks={len(inpaint_masks)}")

View File

@ -272,7 +272,7 @@ def load_diffusers_models(model_path: str, command_path: str = None, clear=True)
if not os.path.isfile(os.path.join(cache_path, "hidden")):
output.append(str(r.repo_id))
"""
for folder in shared.listdir(place):
for folder in os.listdir(place):
try:
if "--" not in folder:
continue
@ -282,7 +282,7 @@ def load_diffusers_models(model_path: str, command_path: str = None, clear=True)
name = name.replace("--", "/")
folder = os.path.join(place, folder)
friendly = os.path.join(place, name)
snapshots = shared.listdir(os.path.join(folder, "snapshots"))
snapshots = os.listdir(os.path.join(folder, "snapshots"))
if len(snapshots) == 0:
shared.log.warning(f"Diffusers folder has no snapshots: location={place} folder={folder} name={name}")
continue
@ -579,7 +579,7 @@ def move_files(src_path: str, dest_path: str, ext_filter: str = None):
if not os.path.exists(dest_path):
os.makedirs(dest_path)
if os.path.exists(src_path):
for file in shared.listdir(src_path):
for file in os.listdir(src_path):
fullpath = os.path.join(src_path, file)
if os.path.isfile(fullpath):
if ext_filter is not None:

View File

@ -38,7 +38,7 @@ def run_postprocessing(extras_mode, image, image_folder: List[tempfile.NamedTemp
elif extras_mode == 2:
assert not shared.cmd_opts.hide_ui_dir_config, '--hide-ui-dir-config option must be disabled'
assert input_dir, 'input directory not selected'
image_list = shared.listdir(input_dir)
image_list = os.listdir(input_dir)
for filename in image_list:
try:
image = Image.open(filename)

View File

@ -1,3 +1,4 @@
import os
import sys
import time
from collections import namedtuple
@ -113,12 +114,19 @@ callback_map = dict(
callbacks_on_reload=[],
)
timers = {}
def timer(t0: float, script, callback: str):
t1 = time.time()
s = round(t1 - t0, 2)
if s > 0.1:
errors.log.debug(f'Script: {s} {callback} {script}')
k = f'{os.path.basename(script)}:{callback}'
if k not in timers:
timers[k] = 0
timers[k] += t1 - t0
def print_timers():
for k, v in timers.items():
if v > 0.05:
errors.log.debug(f'Script: time={v:.2f} {k}')
def clear_callbacks():

View File

@ -11,6 +11,7 @@ from installer import log
AlwaysVisible = object()
time_component = {}
time_setup = {}
debug = log.trace if os.environ.get('SD_SCRIPT_DEBUG', None) is not None else lambda *args, **kwargs: None
class PostprocessImageArgs:
@ -24,6 +25,7 @@ class PostprocessBatchListArgs:
class Script:
parent = None
name = None
filename = None
args_from = None
@ -172,11 +174,8 @@ class Script:
def elem_id(self, item_id):
"""helper function to generate id for a HTML element, constructs final id out of script name, tab and user-supplied item_id"""
need_tabname = self.show(True) == self.show(False)
tabkind = 'img2img' if self.is_img2img else 'txt2txt'
tabname = f"{tabkind}_" if need_tabname else ""
title = re.sub(r'[^a-z_0-9]', '', re.sub(r'\s', '_', self.title().lower()))
return f'script_{tabname}{title}_{item_id}'
return f'script_{self.parent}_{title}_{item_id}'
current_basedir = paths.script_path
@ -224,7 +223,7 @@ def list_scripts(scriptdirname, extension):
else:
priority = priority + script.priority
priority_list.append(ScriptFile(script.basedir, script.filename, script.path, priority))
# log.debug(f'Adding script: {script.basedir} {script.filename} {script.path} {priority}')
debug(f'Adding script: {script.basedir} {script.filename} {script.path} {priority}')
priority_sort = sorted(priority_list, key=lambda item: item.priority + item.path.lower(), reverse=False)
return priority_sort
@ -255,7 +254,7 @@ def load_scripts():
for script_class in module.__dict__.values():
if type(script_class) != type:
continue
# log.debug(f'Registering script: {scriptfile.path}')
debug(f'Registering script: {scriptfile.path}')
if issubclass(script_class, Script):
scripts_data.append(ScriptClassData(script_class, scriptfile.path, scriptfile.basedir, module))
elif issubclass(script_class, scripts_postprocessing.ScriptPostprocessing):
@ -356,7 +355,7 @@ class ScriptRunner:
log.error(f'Script initialize: {path} {e}')
"""
def create_script_ui(self, script): # TODO this is legacy implementation
def create_script_ui(self, script):
import modules.api.models as api_models
script.args_from = len(self.inputs)
script.args_to = len(self.inputs)
@ -401,67 +400,69 @@ class ScriptRunner:
def prepare_ui(self):
self.inputs = [None]
def create_script_ui(self, script, inputs = [], inputs_alwayson = []): # noqa
def setup_ui(self, parent='unknown', accordion=True):
import modules.api.models as api_models
script.args_from = len(inputs)
script.args_to = len(inputs)
controls = wrap_call(script.ui, script.filename, "ui", script.is_img2img)
if controls is None:
return
script.name = wrap_call(script.title, script.filename, "title", default=script.filename).lower()
api_args = []
for control in controls:
if not isinstance(control, gr.components.IOComponent):
log.error(f'Invalid script control: "{script.filename}" control={control}')
continue
control.custom_script_source = os.path.basename(script.filename)
arg_info = api_models.ScriptArg(label=control.label or "")
for field in ("value", "minimum", "maximum", "step", "choices"):
v = getattr(control, field, None)
if v is not None:
setattr(arg_info, field, v)
api_args.append(arg_info)
script.api_info = api_models.ScriptInfo(
name=script.name,
is_img2img=script.is_img2img,
is_alwayson=script.alwayson,
args=api_args,
)
if script.infotext_fields is not None:
self.infotext_fields += script.infotext_fields
if script.paste_field_names is not None:
self.paste_field_names += script.paste_field_names
inputs += controls
inputs_alwayson += [script.alwayson for _ in controls]
script.args_to = len(inputs)
def select_script(self, script_index):
selected_script = self.selectable_scripts[script_index - 1] if script_index > 0 else None
return [gr.update(visible=selected_script == s) for s in self.selectable_scripts]
def init_field(self, title):
if title == 'None': # called when an initial value is set from ui-config.json to show script's UI components
return
script_index = self.titles.index(title)
self.selectable_scripts[script_index].group.visible = True
def setup_ui(self, accordion=True):
self.titles = [wrap_call(script.title, script.filename, "title") or f"{script.filename} [error]" for script in self.selectable_scripts]
inputs = []
inputs_alwayson = [True]
dropdown = gr.Dropdown(label="Script", elem_id="script_list", choices=["None"] + self.titles, value="None", type="index")
def create_script_ui(script: Script, inputs, inputs_alwayson):
script.parent = parent
script.args_from = len(inputs)
script.args_to = len(inputs)
controls = wrap_call(script.ui, script.filename, "ui", script.is_img2img)
if controls is None:
return
script.name = wrap_call(script.title, script.filename, "title", default=script.filename).lower()
api_args = []
for control in controls:
debug(f'Script control: parent={script.parent} script="{script.name}" label="{control.label}" type={control} id={control.elem_id}')
if not isinstance(control, gr.components.IOComponent):
log.error(f'Invalid script control: "{script.filename}" control={control}')
continue
control.custom_script_source = os.path.basename(script.filename)
arg_info = api_models.ScriptArg(label=control.label or "")
for field in ("value", "minimum", "maximum", "step", "choices"):
v = getattr(control, field, None)
if v is not None:
setattr(arg_info, field, v)
api_args.append(arg_info)
script.api_info = api_models.ScriptInfo(
name=script.name,
is_img2img=script.is_img2img,
is_alwayson=script.alwayson,
args=api_args,
)
if script.infotext_fields is not None:
self.infotext_fields += script.infotext_fields
if script.paste_field_names is not None:
self.paste_field_names += script.paste_field_names
inputs += controls
inputs_alwayson += [script.alwayson for _ in controls]
script.args_to = len(inputs)
dropdown = gr.Dropdown(label="Script", elem_id=f'{parent}_script_list', choices=["None"] + self.titles, value="None", type="index")
inputs.insert(0, dropdown)
for script in self.selectable_scripts:
with gr.Group(visible=False) as group:
t0 = time.time()
self.create_script_ui(script, inputs, inputs_alwayson)
create_script_ui(script, inputs, inputs_alwayson)
time_setup[script.title()] = time_setup.get(script.title(), 0) + (time.time()-t0)
script.group = group
dropdown.init_field = self.init_field
dropdown.change(fn=self.select_script, inputs=[dropdown], outputs=[script.group for script in self.selectable_scripts])
def select_script(script_index):
selected_script = self.selectable_scripts[script_index - 1] if script_index > 0 else None
return [gr.update(visible=selected_script == s) for s in self.selectable_scripts]
def init_field(title):
if title == 'None': # called when an initial value is set from ui-config.json to show script's UI components
return
script_index = self.titles.index(title)
self.selectable_scripts[script_index].group.visible = True
dropdown.init_field = init_field
dropdown.change(fn=select_script, inputs=[dropdown], outputs=[script.group for script in self.selectable_scripts])
def onload_script_visibility(params):
title = params.get('Script', None)
@ -473,13 +474,11 @@ class ScriptRunner:
else:
return gr.update(visible=False)
# with gr.Group(elem_id='scripts_alwayson_img2img' if self.is_img2img else 'scripts_alwayson_txt2img'):
with gr.Accordion(label="Extensions", elem_id='scripts_alwayson_img2img' if self.is_img2img else 'scripts_alwayson_txt2img') if accordion else gr.Group():
with gr.Accordion(label="Extensions", elem_id=f'{parent}_script_alwayson') if accordion else gr.Group():
for script in self.alwayson_scripts:
t0 = time.time()
elem_id = f'script_{"txt2img" if script.is_txt2img else "img2img"}_{script.title().lower().replace(" ", "_")}'
with gr.Group(elem_id=elem_id, elem_classes=['extension-script']) as group:
self.create_script_ui(script, inputs, inputs_alwayson)
with gr.Group(elem_id=f'{parent}_script_{script.title().lower().replace(" ", "_")}', elem_classes=['extension-script']) as group:
create_script_ui(script, inputs, inputs_alwayson)
script.group = group
time_setup[script.title()] = time_setup.get(script.title(), 0) + (time.time()-t0)

View File

@ -135,7 +135,7 @@ def preprocess_work(process_src, process_dst, process_width, process_height, pre
overlap_ratio = max(0.0, min(0.9, overlap_ratio))
assert src != dst, 'same directory specified as source and destination'
os.makedirs(dst, exist_ok=True)
files = shared.listdir(src)
files = os.listdir(src)
shared.state.job = "preprocess"
shared.state.textinfo = "Preprocessing..."
shared.state.job_count = len(files)

View File

@ -11,9 +11,8 @@ from modules.control.units import xs # vislearn ControlNet-XS
from modules.control.units import lite # vislearn ControlNet-XS
from modules.control.units import t2iadapter # TencentARC T2I-Adapter
from modules.control.units import reference # reference pipeline
from modules.control.units import ipadapter # reference pipeline
from modules import errors, shared, progress, sd_samplers, ui_components, ui_symbols, ui_common, ui_sections, generation_parameters_copypaste, call_queue, scripts
from modules.ui_components import FormGroup
from scripts import ipadapter # pylint: disable=no-name-in-module
from modules import errors, shared, progress, sd_samplers, ui_components, ui_symbols, ui_common, ui_sections, generation_parameters_copypaste, call_queue, scripts # pylint: disable=ungrouped-imports
gr_height = 512
@ -29,7 +28,7 @@ busy = False # used to synchronize select_input and generate_click
def initialize():
from modules import devices
shared.log.debug(f'Control initialize: models={shared.opts.control_dir}')
shared.log.debug(f'UI initialize: control models={shared.opts.control_dir}')
controlnet.cache_dir = os.path.join(shared.opts.control_dir, 'controlnet')
xs.cache_dir = os.path.join(shared.opts.control_dir, 'xs')
lite.cache_dir = os.path.join(shared.opts.control_dir, 'lite')
@ -44,7 +43,7 @@ def initialize():
os.makedirs(t2iadapter.cache_dir, exist_ok=True)
os.makedirs(processors.cache_dir, exist_ok=True)
scripts.scripts_current = scripts.scripts_control
scripts.scripts_control.initialize_scripts(is_img2img=True)
scripts.scripts_current.initialize_scripts(is_img2img=True)
def return_controls(res):
@ -70,9 +69,7 @@ def generate_click(job_id: str, active_tab: str, *args):
while busy:
time.sleep(0.01)
from modules.control.run import control_run
shared.log.debug(f'Control: tab={active_tab} job={job_id} args={args}')
if active_tab not in ['controlnet', 'xs', 'adapter', 'reference', 'lite']:
return None, None, None, None, f'Control: Unknown mode: {active_tab} args={args}'
shared.log.debug(f'Control: tab="{active_tab}" job={job_id} args={args}')
shared.state.begin('control')
progress.add_task_to_queue(job_id)
with call_queue.queue_lock:
@ -310,8 +307,6 @@ def create_ui(_blocks: gr.Blocks=None):
with gr.Row(elem_id='control_settings'):
with gr.Accordion(open=False, label="Input", elem_id="control_input", elem_classes=["small-accordion"]):
with gr.Row():
show_ip = gr.Checkbox(label="Enable IP adapter", value=False, elem_id="control_show_ip")
with gr.Row():
show_preview = gr.Checkbox(label="Show preview", value=True, elem_id="control_show_preview")
with gr.Row():
@ -350,7 +345,7 @@ def create_ui(_blocks: gr.Blocks=None):
video_type.change(fn=video_type_change, inputs=[video_type], outputs=[video_duration, video_loop, video_pad, video_interpolate])
with gr.Accordion(open=False, label="Extensions", elem_id="control_extensions", elem_classes=["small-accordion"]):
input_script_args = scripts.scripts_control.setup_ui(accordion=False)
input_script_args = scripts.scripts_current.setup_ui(parent='control', accordion=False)
with gr.Row():
override_settings = ui_common.create_override_inputs('control')
@ -391,17 +386,6 @@ def create_ui(_blocks: gr.Blocks=None):
init_batch = gr.File(label="Input", show_label=False, file_count='multiple', file_types=['image'], type='file', interactive=True, height=gr_height)
with gr.Tab('Folder', id='init-folder') as tab_folder_init:
init_folder = gr.File(label="Input", show_label=False, file_count='directory', file_types=['image'], type='file', interactive=True, height=gr_height)
with gr.Column(scale=9, elem_id='control-init-column', visible=False) as column_ip:
gr.HTML('<span id="control-init-button">IP Adapter</p>')
with gr.Tabs(elem_classes=['control-tabs'], elem_id='control-tab-ip'):
with gr.Tab('Image', id='init-image') as tab_image_init:
ip_image = gr.Image(label="Input", show_label=False, type="pil", source="upload", interactive=True, tool="editor", height=gr_height)
with gr.Row():
ip_adapter = gr.Dropdown(label='Adapter', choices=ipadapter.ADAPTERS, value='none')
ip_scale = gr.Slider(label='Scale', minimum=0.0, maximum=1.0, step=0.01, value=0.5)
with gr.Row():
ip_type = gr.Radio(label="Input type", choices=['Init image same as control', 'Separate init image'], value='Init image same as control', type='index', elem_id='control_ip_type')
ip_image.change(fn=lambda x: gr.update(value='Init image same as control' if x is None else 'Separate init image'), inputs=[ip_image], outputs=[ip_type])
with gr.Column(scale=9, elem_id='control-output-column', visible=True) as _column_output:
gr.HTML('<span id="control-output-button">Output</p>')
with gr.Tabs(elem_classes=['control-tabs'], elem_id='control-tab-output') as output_tabs:
@ -464,7 +448,16 @@ def create_ui(_blocks: gr.Blocks=None):
units[-1].enabled = True # enable first unit in group
num_controlnet_units.change(fn=display_units, inputs=[num_controlnet_units], outputs=controlnet_ui_units)
with gr.Tab('Adapter') as _tab_adapter:
with gr.Tab('IP Adapter') as _tab_ipadapter:
with gr.Row():
with gr.Column():
gr.HTML('<a href="https://github.com/TencentARC/T2I-Adapter">T2I-Adapter</a>')
ip_adapter = gr.Dropdown(label='Adapter', choices=ipadapter.ADAPTERS, value='none')
ip_scale = gr.Slider(label='Scale', minimum=0.0, maximum=1.0, step=0.01, value=0.5)
with gr.Column():
ip_image = gr.Image(label="Input", show_label=False, type="pil", source="upload", interactive=True, tool="editor", height=256, width=256)
with gr.Tab('T2I Adapter') as _tab_t2iadapter:
gr.HTML('<a href="https://github.com/TencentARC/T2I-Adapter">T2I-Adapter</a>')
with gr.Row():
extra_controls = [
@ -680,7 +673,6 @@ def create_ui(_blocks: gr.Blocks=None):
btn.click(fn=transfer_input, inputs=[btn], outputs=[input_image, input_resize, input_inpaint] + input_buttons)
show_preview.change(fn=lambda x: gr.update(visible=x), inputs=[show_preview], outputs=[column_preview])
show_ip.change(fn=lambda x: gr.update(visible=x), inputs=[show_ip], outputs=[column_ip])
input_type.change(fn=lambda x: gr.update(visible=x == 2), inputs=[input_type], outputs=[column_init])
btn_prompt_counter.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[prompt, steps], outputs=[prompt_counter])
btn_negative_counter.click(fn=call_queue.wrap_queued_call(ui_common.update_token_counter), inputs=[negative, steps], outputs=[negative_counter])
@ -714,7 +706,7 @@ def create_ui(_blocks: gr.Blocks=None):
resize_mode_after, resize_name_after, width_after, height_after, scale_by_after, selected_scale_tab_after,
denoising_strength, batch_count, batch_size, mask_blur, mask_overlap,
video_skip_frames, video_type, video_duration, video_loop, video_pad, video_interpolate,
ip_adapter, ip_scale, ip_image, ip_type,
ip_adapter, ip_scale, ip_image,
]
output_fields = [
preview_process,

View File

@ -48,7 +48,7 @@ class ExtraNetworksPageTextualInversion(ui_extra_networks.ExtraNetworksPage):
def list_items(self):
def list_folder(folder):
for filename in shared.listdir(folder):
for filename in os.listdir(folder):
fn = os.path.join(folder, filename)
if os.path.isfile(fn) and (fn.lower().endswith(".pt") or fn.lower().endswith(".safetensors")):
embedding = Embedding(vec=0, name=os.path.basename(fn), filename=fn)

View File

@ -20,7 +20,7 @@ def process_interrogate(interrogation_function, mode, ii_input_files, ii_input_d
if not os.path.isdir(ii_input_dir):
shared.log.error(f"Interrogate: Input directory not found: {ii_input_dir}")
return [gr.update(), None]
images = shared.listdir(ii_input_dir)
images = os.listdir(ii_input_dir)
if ii_output_dir != "":
os.makedirs(ii_output_dir, exist_ok=True)
else:
@ -48,6 +48,7 @@ def interrogate_deepbooru(image):
def create_ui():
shared.log.debug('UI initialize: img2img')
import modules.img2img # pylint: disable=redefined-outer-name
modules.scripts.scripts_current = modules.scripts.scripts_img2img
modules.scripts.scripts_img2img.initialize_scripts(is_img2img=True)
@ -169,7 +170,7 @@ def create_ui():
override_settings = ui_common.create_override_inputs('img2img')
with FormGroup(elem_id="img2img_script_container"):
img2img_script_inputs = modules.scripts.scripts_img2img.setup_ui()
img2img_script_inputs = modules.scripts.scripts_img2img.setup_ui(parent='img2img', accordion=True)
img2img_gallery, img2img_generation_info, img2img_html_info, _img2img_html_info_formatted, img2img_html_log = ui_common.create_output_panel("img2img")

View File

@ -119,7 +119,7 @@ def batch_process(batch_files, batch_folder, batch_str, mode, clip_model, write)
if batch_folder is not None:
files += [f.name for f in batch_folder]
if batch_str is not None and len(batch_str) > 0 and os.path.exists(batch_str) and os.path.isdir(batch_str):
files += [os.path.join(batch_str, f) for f in shared.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))]
files += [os.path.join(batch_str, f) for f in os.listdir(batch_str) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.webp'))]
if len(files) == 0:
shared.log.error('Interrogate batch no images')
return ''

View File

@ -21,6 +21,10 @@ info = '' # noqa
reset = '🔄'
upload = '⬆️'
preview = '🔍'
mark_diag = ''
mark_flag = ''
int_clip = ''
int_blip = ''
"""
refresh = '🔄'
close = '🛗'

View File

@ -16,6 +16,7 @@ def calc_resolution_hires(width, height, hr_scale, hr_resize_x, hr_resize_y, hr_
def create_ui():
shared.log.debug('UI initialize: txt2img')
import modules.txt2img # pylint: disable=redefined-outer-name
modules.scripts.scripts_current = modules.scripts.scripts_txt2img
modules.scripts.scripts_txt2img.initialize_scripts(is_img2img=False)
@ -48,7 +49,7 @@ def create_ui():
enable_hr, hr_sampler_index, denoising_strength, hr_final_resolution, hr_upscaler, hr_force, hr_second_pass_steps, hr_scale, hr_resize_x, hr_resize_y, refiner_steps, refiner_start, refiner_prompt, refiner_negative = ui_sections.create_hires_inputs('txt2img')
override_settings = ui_common.create_override_inputs('txt2img')
txt2img_script_inputs = modules.scripts.scripts_txt2img.setup_ui()
txt2img_script_inputs = modules.scripts.scripts_txt2img.setup_ui(parent='txt2img', accordion=True)
hr_resolution_preview_inputs = [width, height, hr_scale, hr_resize_x, hr_resize_y, hr_upscaler]
for preview_input in hr_resolution_preview_inputs:

View File

@ -52,7 +52,7 @@ class Upscaler:
pass
def find_folder(self, folder, scalers, loaded):
for fn in modules.shared.listdir(folder): # from folder
for fn in os.listdir(folder): # from folder
file_name = os.path.join(folder, fn)
if os.path.isdir(file_name):
self.find_folder(file_name, scalers, loaded)

View File

@ -34,6 +34,104 @@ ADAPTERS = {
}
def apply(pipe, p: processing.StableDiffusionProcessing, adapter_name, scale, image): # pylint: disable=arguments-differ
# overrides
adapter = ADAPTERS.get(adapter_name, None)
if hasattr(p, 'ip_adapter_name'):
adapter = p.ip_adapter_name
if hasattr(p, 'ip_adapter_scale'):
scale = p.ip_adapter_scale
if hasattr(p, 'ip_adapter_image'):
image = p.ip_adapter_image
if adapter is None:
return False
# init code
global loaded, checkpoint, image_encoder, image_encoder_type, image_encoder_name # pylint: disable=global-statement
if pipe is None:
return False
if shared.backend != shared.Backend.DIFFUSERS:
shared.log.warning('IP adapter: not in diffusers mode')
return
if image is None and adapter != 'none':
shared.log.error('IP adapter: no image provided')
adapter = 'none' # unload adapter if previously loaded as it will cause runtime errors
if adapter == 'none':
if hasattr(pipe, 'set_ip_adapter_scale'):
pipe.set_ip_adapter_scale(0)
if loaded is not None:
shared.log.debug('IP adapter: unload attention processor')
pipe.unet.config.encoder_hid_dim_type = None
loaded = None
return False
if not hasattr(pipe, 'load_ip_adapter'):
import diffusers
diffusers.StableDiffusionPipeline.load_ip_adapter()
shared.log.error(f'IP adapter: pipeline not supported: {pipe.__class__.__name__}')
return False
# which clip to use
if 'ViT' not in adapter_name:
clip_repo = base_repo
subfolder = 'models/image_encoder' if shared.sd_model_type == 'sd' else 'sdxl_models/image_encoder' # defaults per model
elif 'ViT-H' in adapter_name:
clip_repo = base_repo
subfolder = 'models/image_encoder' # this is vit-h
elif 'ViT-G' in adapter_name:
clip_repo = base_repo
subfolder = 'sdxl_models/image_encoder' # this is vit-g
else:
shared.log.error(f'IP adapter: unknown model type: {adapter_name}')
return False
# load image encoder used by ip adapter
if getattr(pipe, 'image_encoder', None) is None or image_encoder_name != clip_repo + '/' + subfolder:
if image_encoder is None or image_encoder_type != shared.sd_model_type or checkpoint != shared.opts.sd_model_checkpoint or image_encoder_name != clip_repo + '/' + subfolder:
if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl':
shared.log.error(f'IP adapter: unsupported model type: {shared.sd_model_type}')
return False
try:
from transformers import CLIPVisionModelWithProjection
shared.log.debug(f'IP adapter: load image encoder: {clip_repo}/{subfolder}')
image_encoder = CLIPVisionModelWithProjection.from_pretrained(clip_repo, subfolder=subfolder, torch_dtype=devices.dtype, cache_dir=shared.opts.diffusers_dir, use_safetensors=True).to(devices.device)
image_encoder_type = shared.sd_model_type
image_encoder_name = clip_repo + '/' + subfolder
except Exception as e:
shared.log.error(f'IP adapter: failed to load image encoder: {e}')
return
if getattr(pipe, 'feature_extractor', None) is None:
from transformers import CLIPImageProcessor
shared.log.debug('IP adapter: load feature extractor')
pipe.feature_extractor = CLIPImageProcessor()
# main code
# subfolder = 'models' if 'sd15' in adapter else 'sdxl_models'
if adapter != loaded or getattr(pipe.unet.config, 'encoder_hid_dim_type', None) is None or checkpoint != shared.opts.sd_model_checkpoint:
t0 = time.time()
if loaded is not None:
shared.log.debug('IP adapter: reset attention processor')
loaded = None
else:
shared.log.debug('IP adapter: load attention processor')
pipe.image_encoder = image_encoder
subfolder = 'models' if shared.sd_model_type == 'sd' else 'sdxl_models'
pipe.load_ip_adapter(base_repo, subfolder=subfolder, weight_name=adapter)
t1 = time.time()
shared.log.info(f'IP adapter load: adapter="{adapter}" scale={scale} image={image} time={t1-t0:.2f}')
loaded = adapter
checkpoint = shared.opts.sd_model_checkpoint
else:
shared.log.debug(f'IP adapter cache: adapter="{adapter}" scale={scale} image={image}')
pipe.set_ip_adapter_scale(scale)
if isinstance(image, str):
from modules.api.api import decode_base64_to_image
image = decode_base64_to_image(image)
p.task_args['ip_adapter_image'] = p.batch_size * [image]
p.extra_generation_params["IP Adapter"] = f'{adapter}:{scale}'
return True
class Script(scripts.Script):
def title(self):
return 'IP Adapter'
@ -51,97 +149,4 @@ class Script(scripts.Script):
return [adapter, scale, image]
def process(self, p: processing.StableDiffusionProcessing, adapter_name, scale, image): # pylint: disable=arguments-differ
# overrides
adapter = ADAPTERS.get(adapter_name, None)
if hasattr(p, 'ip_adapter_name'):
adapter = p.ip_adapter_name
if hasattr(p, 'ip_adapter_scale'):
scale = p.ip_adapter_scale
if hasattr(p, 'ip_adapter_image'):
image = p.ip_adapter_image
if adapter is None:
return
# init code
global loaded, checkpoint, image_encoder, image_encoder_type, image_encoder_name # pylint: disable=global-statement
if shared.sd_model is None:
return
if shared.backend != shared.Backend.DIFFUSERS:
shared.log.warning('IP adapter: not in diffusers mode')
return
if image is None and adapter != 'none':
shared.log.error('IP adapter: no image provided')
adapter = 'none' # unload adapter if previously loaded as it will cause runtime errors
if adapter == 'none':
if hasattr(shared.sd_model, 'set_ip_adapter_scale'):
shared.sd_model.set_ip_adapter_scale(0)
if loaded is not None:
shared.log.debug('IP adapter: unload attention processor')
shared.sd_model.unet.config.encoder_hid_dim_type = None
loaded = None
return
if not hasattr(shared.sd_model, 'load_ip_adapter'):
import diffusers
diffusers.StableDiffusionPipeline.load_ip_adapter()
shared.log.error(f'IP adapter: pipeline not supported: {shared.sd_model.__class__.__name__}')
return
# which clip to use
if 'ViT' not in adapter_name:
clip_repo = base_repo
subfolder = 'models/image_encoder' if shared.sd_model_type == 'sd' else 'sdxl_models/image_encoder' # defaults per model
elif 'ViT-H' in adapter_name:
clip_repo = base_repo
subfolder = 'models/image_encoder' # this is vit-h
elif 'ViT-G' in adapter_name:
clip_repo = base_repo
subfolder = 'sdxl_models/image_encoder' # this is vit-g
else:
shared.log.error(f'IP adapter: unknown model type: {adapter_name}')
return
# load image encoder used by ip adapter
if getattr(shared.sd_model, 'image_encoder', None) is None or image_encoder_name != clip_repo + '/' + subfolder:
if image_encoder is None or image_encoder_type != shared.sd_model_type or checkpoint != shared.opts.sd_model_checkpoint or image_encoder_name != clip_repo + '/' + subfolder:
if shared.sd_model_type != 'sd' and shared.sd_model_type != 'sdxl':
shared.log.error(f'IP adapter: unsupported model type: {shared.sd_model_type}')
return
try:
from transformers import CLIPVisionModelWithProjection
shared.log.debug(f'IP adapter: load image encoder: {clip_repo}/{subfolder}')
image_encoder = CLIPVisionModelWithProjection.from_pretrained(clip_repo, subfolder=subfolder, torch_dtype=devices.dtype, cache_dir=shared.opts.diffusers_dir, use_safetensors=True).to(devices.device)
image_encoder_type = shared.sd_model_type
image_encoder_name = clip_repo + '/' + subfolder
except Exception as e:
shared.log.error(f'IP adapter: failed to load image encoder: {e}')
return
if getattr(shared.sd_model, 'feature_extractor', None) is None:
from transformers import CLIPImageProcessor
shared.log.debug('IP adapter: load feature extractor')
shared.sd_model.feature_extractor = CLIPImageProcessor()
# main code
# subfolder = 'models' if 'sd15' in adapter else 'sdxl_models'
if adapter != loaded or getattr(shared.sd_model.unet.config, 'encoder_hid_dim_type', None) is None or checkpoint != shared.opts.sd_model_checkpoint:
t0 = time.time()
if loaded is not None:
shared.log.debug('IP adapter: reset attention processor')
loaded = None
else:
shared.log.debug('IP adapter: load attention processor')
shared.sd_model.image_encoder = image_encoder
subfolder = 'models' if shared.sd_model_type == 'sd' else 'sdxl_models'
shared.sd_model.load_ip_adapter(base_repo, subfolder=subfolder, weight_name=adapter)
t1 = time.time()
shared.log.info(f'IP adapter load: adapter="{adapter}" scale={scale} image={image} time={t1-t0:.2f}')
loaded = adapter
checkpoint = shared.opts.sd_model_checkpoint
else:
shared.log.debug(f'IP adapter cache: adapter="{adapter}" scale={scale} image={image}')
shared.sd_model.set_ip_adapter_scale(scale)
if isinstance(image, str):
from modules.api.api import decode_base64_to_image
image = decode_base64_to_image(image)
p.task_args['ip_adapter_image'] = p.batch_size * [image]
p.extra_generation_params["IP Adapter"] = f'{adapter}:{scale}'
apply(shared.sd_model, p, adapter_name, scale, image)

View File

@ -311,13 +311,13 @@ def webui(restart=False):
log.info(f"Startup time: {timer.startup.summary()}")
debug = log.trace if os.environ.get('SD_SCRIPT_DEBUG', None) is not None else lambda *args, **kwargs: None
debug('Trace: SCRIPTS')
debug('Loaded scripts:')
for m in modules.scripts.scripts_data:
debug(f' {m}')
debug('Loaded postprocessing scripts:')
for m in modules.scripts.postprocessing_scripts_data:
debug(f' {m}')
timer.startup.reset()
modules.script_callbacks.print_timers()
if not restart:
# override all loggers to use the same handlers as the main logger