diff --git a/TODO.md b/TODO.md index 389be8963..2b99c8c72 100644 --- a/TODO.md +++ b/TODO.md @@ -4,8 +4,10 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma ## Future Candidates -- Remote TE -- Unified `CLIPTextModelWithProjection` loader +- Remote TE +- Mobile ModernUI +- [Canvas](https://konvajs.org/) + - [Modular pipelines and guiders](https://github.com/huggingface/diffusers/issues/11915) - Refactor: Sampler options - Refactor: [GGUF](https://huggingface.co/docs/diffusers/main/en/quantization/gguf) @@ -40,7 +42,6 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma - Remove: CodeFormer - Remove: GFPGAN - ModernUI: Lite vs Expert mode -- [Canvas](https://konvajs.org/) ### Future Considerations - [TensorRT](https://github.com/huggingface/diffusers/pull/11173) diff --git a/modules/modeldata.py b/modules/modeldata.py index 0cdbcb19d..dced4f564 100644 --- a/modules/modeldata.py +++ b/modules/modeldata.py @@ -60,6 +60,8 @@ def get_model_type(pipe): model_type = 'bria' elif 'Qwen' in name: model_type = 'qwen' + elif 'NextStep' in name: + model_type = 'nextstep' # video models elif "CogVideo" in name: model_type = 'cogvideo' diff --git a/modules/modelloader.py b/modules/modelloader.py index c4a1d3e00..b185475e3 100644 --- a/modules/modelloader.py +++ b/modules/modelloader.py @@ -27,12 +27,13 @@ def hf_login(token=None): log.debug('HF login: no token provided') return False if os.environ.get('HUGGING_FACE_HUB_TOKEN', None) is not None: - log.warning('HF login: removing existing env variable: HUGGING_FACE_HUB_TOKEN') + # log.warning('HF login: removing existing env variable: HUGGING_FACE_HUB_TOKEN') del os.environ['HUGGING_FACE_HUB_TOKEN'] if os.environ.get('HF_TOKEN', None) is not None: - log.warning('HF login: removing existing env variable: HF_TOKEN') + # log.warning('HF login: removing existing env variable: HF_TOKEN') del os.environ['HF_TOKEN'] if loggedin != token: + os.environ.setdefault('HF_TOKEN', token) stdout = io.StringIO() with contextlib.redirect_stdout(stdout): hf.logout() diff --git a/modules/postprocess/yolo.py b/modules/postprocess/yolo.py index 35638df78..1cd7f552b 100644 --- a/modules/postprocess/yolo.py +++ b/modules/postprocess/yolo.py @@ -275,7 +275,7 @@ class YoloRestorer(Detailer): orig_negative: str = orig_p.get('all_negative_prompts', [''])[0] prompt: str = orig_p.get('detailer_prompt', '') negative: str = orig_p.get('detailer_negative', '') - if len(prompt) == 0: + if prompt is None or len(prompt) == 0: prompt = orig_prompt else: prompt = prompt.replace('[PROMPT]', orig_prompt) diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py index 3f50c40d5..31947ee7b 100644 --- a/modules/prompt_parser_diffusers.py +++ b/modules/prompt_parser_diffusers.py @@ -182,6 +182,10 @@ class PromptEmbedder: self.negative_prompt_attention_masks[batchidx].append(self.negative_prompt_attention_masks[batchidx][idx]) def encode(self, pipe, positive_prompt, negative_prompt, batchidx): + if positive_prompt is None: + positive_prompt = '' + if negative_prompt is None: + negative_prompt = '' global last_attention # pylint: disable=global-statement self.attention = shared.opts.prompt_attention last_attention = self.attention @@ -543,6 +547,10 @@ def split_prompts(pipe, prompt, SD3 = False): def get_weighted_text_embeddings(pipe, prompt: str = "", neg_prompt: str = "", clip_skip: int = None): device = devices.device + if prompt is None: + prompt = '' + if neg_prompt is None: + neg_prompt = '' SD3 = bool(hasattr(pipe, 'text_encoder_3') and not hasattr(pipe, 'text_encoder_4')) prompt, prompt_2, prompt_3, prompt_4 = split_prompts(pipe, prompt, SD3) neg_prompt, neg_prompt_2, neg_prompt_3, neg_prompt_4 = split_prompts(pipe, neg_prompt, SD3) diff --git a/modules/sd_detect.py b/modules/sd_detect.py index 52d32f113..6aef51a0b 100644 --- a/modules/sd_detect.py +++ b/modules/sd_detect.py @@ -103,6 +103,8 @@ def guess_by_name(fn, current_guess): return 'Bria' elif 'qwen' in fn.lower(): return 'Qwen' + elif 'nextstep' in fn.lower(): + return 'NextStep' elif 'kandinsky-2-1' in fn.lower(): return 'Kandinsky 2.1' elif 'kandinsky-2-2' in fn.lower(): diff --git a/modules/sd_models.py b/modules/sd_models.py index f386fb66b..f772cf53b 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -402,6 +402,10 @@ def load_diffuser_force(model_type, checkpoint_info, diffusers_load_config, op=' from pipelines.model_kandinsky import load_kandinsky3 sd_model = load_kandinsky3(checkpoint_info, diffusers_load_config) allow_post_quant = False + elif model_type in ['NextStep']: + from pipelines.model_nextstep import load_nextstep + sd_model = load_nextstep(checkpoint_info, diffusers_load_config) # pylint: disable=assignment-from-none + allow_post_quant = False except Exception as e: shared.log.error(f'Load {op}: path="{checkpoint_info.path}" {e}') if debug_load: diff --git a/modules/styles.py b/modules/styles.py index f8bf3e916..cbfffbb39 100644 --- a/modules/styles.py +++ b/modules/styles.py @@ -97,7 +97,7 @@ def apply_file_wildcards(prompt, replaced = [], not_found = [], recursion=0, see def apply_wildcards_to_prompt(prompt, all_wildcards, seed=-1, silent=False): - if len(prompt) == 0: + if prompt is None or len(prompt) == 0: return prompt old_state = None if seed > 0 and len(all_wildcards) > 0: diff --git a/pipelines/model_nextstep.py b/pipelines/model_nextstep.py new file mode 100644 index 000000000..ba8a98382 --- /dev/null +++ b/pipelines/model_nextstep.py @@ -0,0 +1,63 @@ +# import transformers +from modules import shared, devices, sd_models, model_quant # pylint: disable=unused-import +from pipelines import generic # pylint: disable=unused-import + + +def load_nextstep(checkpoint_info, diffusers_load_config={}): # pylint: disable=unused-argument + repo_id = sd_models.path_to_repo(checkpoint_info) + sd_models.hf_auth_check(checkpoint_info) + + shared.log.error(f'Load model: type=NextStep model="{checkpoint_info.name}" repo="{repo_id}" not supported') + + """ + load_args, _quant_args = model_quant.get_dit_args(diffusers_load_config, module='Model') + shared.log.debug(f'Load model: type=NextStep model="{checkpoint_info.name}" repo="{repo_id}" offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype} args={load_args}') + + from pipelines.nextstep import NextStepPipeline, NextStep + + def __call__(self, + prompt = None, + image = None, + height = 1024, + width = 1024, + num_inference_steps: int = 20, + guidance_scale: float = 1.0, + generator = None, + ): + return self.generate_image(self, + captions = prompt, + images = [image] if image is not None else None, + num_images_per_caption = 1, + positive_prompt = None, + negative_prompt = None, + hw = (height, width), + use_norm = False, + cfg = guidance_scale, + cfg_img = 1.0, + cfg_schedule = "constant", # "linear", "constant" + num_sampling_steps = num_inference_steps, + timesteps_shift = 1.0, + seed = generator.initial_seed(), + progress = True, + ) + + NextStepPipeline.__call__ = __call__ + + # tokenizer = transformers.AutoTokenizer.from_pretrained(HF_HUB, local_files_only=True, trust_remote_code=True) + model = generic.load_transformer(repo_id, cls_name=NextStep, load_config=diffusers_load_config) + pipe = NextStepPipeline( + repo_id, + model=model, + cache_dir=shared.opts.diffusers_dir, + **load_args, + ) + + from modules.video_models import video_vae + pipe.vae.orig_decode = pipe.vae.decode + pipe.vae.decode = video_vae.hijack_vae_decode + + devices.torch_gc() + return pipe + """ + + return None