fix hidream loader

Signed-off-by: Vladimir Mandic <mandic00@live.com>
pull/3892/head
Vladimir Mandic 2025-04-22 11:04:35 -04:00
parent 397c69c8b0
commit a9969491ff
6 changed files with 37 additions and 11 deletions

View File

@ -1,6 +1,6 @@
# Change Log for SD.Next
## Update for 2025-04-21
## Update for 2025-04-22
- **Features**
- [Nunchaku](https://github.com/mit-han-lab/nunchaku) inference engine with custom **SVDQuant** 4-bit execution
@ -22,7 +22,7 @@
enable and configure in *settings -> pipeline modifiers -> cfg zero*
experiment with CFGZero support in XYZ-grid
- **Optimizations**
- **HiDream** optimized offloading and prompt-encode caching
- **HiDream-I1** optimized offloading and prompt-encode caching
it now works in 12GB VRAM / 26GB RAM!
- **CogView3** and **CogView4** model loader optimizations
- **Sana** model loader optimizations
@ -30,7 +30,10 @@
configure in *settings -> text encoder -> offload*
- **Other**
- **HiDream-I1, FLUX.1, SD3.x** add HF gated access auth check
- **HiDream** add LLM into to metadata
- **HiDream-I1** LoRA support
currently limited to diffusers-only LoRAs, CivitAI LoRA support is TBD
- **HiDream-I1** add LLM info to image metadata
- add `model_type` as option for image filename pattern
- add **UniPC FlowMatch** scheduler
- add **LCM FlowMatch** scheduler
- networks: set which networks to skip when scanning civitai

View File

@ -13,6 +13,7 @@ from transformers import (
)
from diffusers.image_processor import VaeImageProcessor
from diffusers.loaders import HiDreamImageLoraLoaderMixin
from diffusers.models import AutoencoderKL, HiDreamImageTransformer2DModel
from diffusers.schedulers import FlowMatchEulerDiscreteScheduler, UniPCMultistepScheduler
from diffusers.utils import is_torch_xla_available, logging
@ -165,7 +166,7 @@ def retrieve_timesteps(
return timesteps, num_inference_steps
class HiDreamImageCFGZeroPipeline(DiffusionPipeline):
class HiDreamImageCFGZeroPipeline(DiffusionPipeline, HiDreamImageLoraLoaderMixin):
model_cpu_offload_seq = "text_encoder->text_encoder_2->text_encoder_3->text_encoder_4->transformer->vae"
_callback_tensor_inputs = ["latents", "prompt_embeds"]
@ -699,10 +700,11 @@ class HiDreamImageCFGZeroPipeline(DiffusionPipeline):
noise_pred = self.transformer(
hidden_states=latent_model_input,
timesteps=timestep,
encoder_hidden_states=prompt_embeds,
encoder_hidden_states_t5=prompt_embeds[0],
encoder_hidden_states_llama3=prompt_embeds[1],
pooled_embeds=pooled_prompt_embeds,
img_sizes=img_sizes,
img_ids=img_ids,
# img_sizes=img_sizes,
# img_ids=img_ids,
return_dict=False,
)[0]
noise_pred = -noise_pred

View File

@ -37,6 +37,7 @@ class FilenameGenerator:
'model': lambda self: shared.sd_model.sd_checkpoint_info.title if shared.sd_loaded and getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None else '',
'model_shortname': lambda self: shared.sd_model.sd_checkpoint_info.model_name if shared.sd_loaded and getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None else '',
'model_name': lambda self: shared.sd_model.sd_checkpoint_info.model_name if shared.sd_loaded and getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None else '',
'model_type': lambda self: shared.sd_model_type if shared.sd_loaded else '',
'model_hash': lambda self: shared.sd_model.sd_checkpoint_info.shorthash if shared.sd_loaded and getattr(shared.sd_model, 'sd_checkpoint_info', None) is not None else '',
'prompt': lambda self: self.prompt_full(),

View File

@ -139,5 +139,16 @@ except ImportError:
except ImportError:
pass # shrug...
deprecate_diffusers = diffusers.utils.deprecation_utils.deprecate
def deprecate_warn(*args, **kwargs):
try:
deprecate_diffusers(*args, **kwargs)
except Exception as e:
errors.log.warning(f'Deprecation: {e}')
diffusers.utils.deprecation_utils.deprecate = deprecate_warn
diffusers.utils.deprecate = deprecate_warn
errors.log.info(f'Torch: torch=={torch.__version__} torchvision=={torchvision.__version__}')
errors.log.info(f'Packages: diffusers=={diffusers.__version__} transformers=={transformers.__version__} accelerate=={accelerate.__version__} gradio=={gradio.__version__} pydantic=={pydantic.__version__}')

View File

@ -39,8 +39,13 @@ def load_diffusers(name, network_on_disk, lora_scale=shared.opts.extra_networks_
errors.display(e, "LoRA")
return None
if name not in diffuser_loaded:
diffuser_loaded.append(name)
diffuser_scales.append(lora_scale)
list_adapters = shared.sd_model.get_list_adapters()
list_adapters = {adapter for adapters in list_adapters.values() for adapter in adapters}
if name not in list_adapters:
shared.log.error(f'Network load: type=LoRA name="{name}" adapters={list_adapters} not loaded')
else:
diffuser_loaded.append(name)
diffuser_scales.append(lora_scale)
net = network.Network(name, network_on_disk)
net.mtime = os.path.getmtime(network_on_disk.filename)
l.timer.activate += time.time() - t0
@ -269,6 +274,9 @@ def network_load(names, te_multipliers=None, unet_multipliers=None, dyn_dims=Non
shared.log.debug(f'Network load: type=LoRA loaded={diffuser_loaded} available={shared.sd_model.get_list_adapters()} active={shared.sd_model.get_active_adapters()} scales={diffuser_scales}')
try:
t1 = time.time()
if l.debug:
shared.log.trace(f'Network load: type=LoRA list={shared.sd_model.get_list_adapters()}')
shared.log.trace(f'Network load: type=LoRA active={shared.sd_model.get_active_adapters()}')
shared.sd_model.set_adapters(adapter_names=diffuser_loaded, adapter_weights=diffuser_scales)
if shared.opts.lora_fuse_diffusers and not lora_overrides.check_fuse():
shared.sd_model.fuse_lora(adapter_names=diffuser_loaded, lora_scale=1.0, fuse_unet=True, fuse_text_encoder=True) # diffusers with fuse uses fixed scale since later apply does the scaling

View File

@ -58,8 +58,9 @@ def load_text_encoders(repo_id, diffusers_load_config={}):
llama_repo = shared.opts.model_h1_llama_repo if shared.opts.model_h1_llama_repo != 'Default' else 'meta-llama/Meta-Llama-3.1-8B-Instruct'
shared.log.debug(f'Load model: type=HiDream te4="{llama_repo}" quant="{model_quant.get_quant_type(quant_args)}" args={load_args}')
auth_check(llama_repo)
text_encoder_4 = transformers.LlamaForCausalLM.from_pretrained(
shared.opts.model_h1_llama_repo,
llama_repo,
output_hidden_states=True,
output_attentions=True,
cache_dir=shared.opts.hfcache_dir,
@ -67,7 +68,7 @@ def load_text_encoders(repo_id, diffusers_load_config={}):
**quant_args,
)
tokenizer_4 = transformers.PreTrainedTokenizerFast.from_pretrained(
shared.opts.model_h1_llama_repo,
llama_repo,
cache_dir=shared.opts.hfcache_dir,
**load_args,
)