pull/3221/head
Vladimir Mandic 2024-06-12 21:02:43 -04:00
parent 5f41181576
commit 6e01d510d4
41 changed files with 327666 additions and 44 deletions

View File

@ -1,15 +1,15 @@
# Change Log for SD.Next
## TODO
- StableDiffusion 3
## Update for 2024-06-11
*Note*: New features require `diffusers==0.29.0.dev`
## Update for 2024-06-12
### New Models
- [StabilityAI Stable Diffusion 3 Medium](https://stability.ai/news/stable-diffusion-3-medium)
yup, supported!
quote: "Stable Diffusion 3 Medium is a multimodal diffusion transformer (MMDiT) model that features improved performance in image quality, typography, complex prompt understanding, and resource-efficiency"
sdnext also supports switching optional T5 text encoder on-the-fly as well as loading model from either diffusers repo or safetensors single-file
for details, see [Wiki](https://github.com/vladmandic/automatic/wiki/SD3)
- [Tenecent HunyuanDiT](https://github.com/Tencent/HunyuanDiT) bilingual english/chinese diffusion transformer model
note: this is a very large model at ~17GB, but can be used with less VRAM using model offloading
simply select from networks -> models -> reference, model will be auto-downloaded on first use
@ -49,6 +49,7 @@
- add torch **full deterministic mode**
enable in settings -> compute -> use deterministic mode
typical differences are not large and its disabled by default as it does have some performance impact
- new sampler: **Euler FlowMatch**
### Improvements

View File

@ -8,7 +8,6 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
- animatediff-sdxl <https://github.com/huggingface/diffusers/pull/6721>
- async lowvram: <https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14855>
- fp8: <https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14031>
- profiling: <https://github.com/lllyasviel/stable-diffusion-webui-forge/discussions/716>
- init latents: variations, img2img
- diffusers public callbacks
- include reference styles

View File

@ -0,0 +1,41 @@
{
"_class_name": "StableDiffusion3Pipeline",
"_diffusers_version": "0.29.0.dev0",
"_name_or_path": "stabilityai/stable-diffusion-3-medium",
"scheduler": [
"diffusers",
"FlowMatchEulerDiscreteScheduler"
],
"text_encoder": [
"transformers",
"CLIPTextModelWithProjection"
],
"text_encoder_2": [
"transformers",
"CLIPTextModelWithProjection"
],
"text_encoder_3": [
"transformers",
"T5EncoderModel"
],
"tokenizer": [
"transformers",
"CLIPTokenizer"
],
"tokenizer_2": [
"transformers",
"CLIPTokenizer"
],
"tokenizer_3": [
"transformers",
"T5TokenizerFast"
],
"transformer": [
"diffusers",
"SD3Transformer2DModel"
],
"vae": [
"diffusers",
"AutoencoderKL"
]
}

View File

@ -0,0 +1,6 @@
{
"_class_name": "FlowMatchEulerDiscreteScheduler",
"_diffusers_version": "0.29.0.dev0",
"num_train_timesteps": 1000,
"shift": 3.0
}

View File

@ -0,0 +1,25 @@
{
"_name_or_path": "/raid/.cache/huggingface/models--stabilityai--stable-diffusion-3-medium/snapshots/84a9ff37a0a30f7252e21daae69cfd0134198d27/text_encoder",
"architectures": [
"CLIPTextModelWithProjection"
],
"attention_dropout": 0.0,
"bos_token_id": 0,
"dropout": 0.0,
"eos_token_id": 2,
"hidden_act": "quick_gelu",
"hidden_size": 768,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 77,
"model_type": "clip_text_model",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 1,
"projection_dim": 768,
"torch_dtype": "float32",
"transformers_version": "4.41.0.dev0",
"vocab_size": 49408
}

View File

@ -0,0 +1,25 @@
{
"_name_or_path": "/raid/.cache/huggingface/models--stabilityai--stable-diffusion-3-medium/snapshots/84a9ff37a0a30f7252e21daae69cfd0134198d27/text_encoder_2",
"architectures": [
"CLIPTextModelWithProjection"
],
"attention_dropout": 0.0,
"bos_token_id": 0,
"dropout": 0.0,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_size": 1280,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 5120,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 77,
"model_type": "clip_text_model",
"num_attention_heads": 20,
"num_hidden_layers": 32,
"pad_token_id": 1,
"projection_dim": 1280,
"torch_dtype": "float32",
"transformers_version": "4.41.0.dev0",
"vocab_size": 49408
}

View File

@ -0,0 +1,32 @@
{
"_name_or_path": "/raid/.cache/huggingface/models--stabilityai--stable-diffusion-3-medium/snapshots/84a9ff37a0a30f7252e21daae69cfd0134198d27/text_encoder_3",
"architectures": [
"T5EncoderModel"
],
"classifier_dropout": 0.0,
"d_ff": 10240,
"d_kv": 64,
"d_model": 4096,
"decoder_start_token_id": 0,
"dense_act_fn": "gelu_new",
"dropout_rate": 0.1,
"eos_token_id": 1,
"feed_forward_proj": "gated-gelu",
"initializer_factor": 1.0,
"is_encoder_decoder": true,
"is_gated_act": true,
"layer_norm_epsilon": 1e-06,
"model_type": "t5",
"num_decoder_layers": 24,
"num_heads": 64,
"num_layers": 24,
"output_past": true,
"pad_token_id": 0,
"relative_attention_max_distance": 128,
"relative_attention_num_buckets": 32,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.41.0.dev0",
"use_cache": true,
"vocab_size": 32128
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
{
"bos_token": {
"content": "<|startoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

View File

@ -0,0 +1,30 @@
{
"add_prefix_space": false,
"added_tokens_decoder": {
"49406": {
"content": "<|startoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"49407": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<|startoftext|>",
"clean_up_tokenization_spaces": true,
"do_lower_case": true,
"eos_token": "<|endoftext|>",
"errors": "replace",
"model_max_length": 77,
"pad_token": "<|endoftext|>",
"tokenizer_class": "CLIPTokenizer",
"unk_token": "<|endoftext|>"
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
{
"bos_token": {
"content": "<|startoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "!",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

View File

@ -0,0 +1,38 @@
{
"add_prefix_space": false,
"added_tokens_decoder": {
"0": {
"content": "!",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49406": {
"content": "<|startoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"49407": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<|startoftext|>",
"clean_up_tokenization_spaces": true,
"do_lower_case": true,
"eos_token": "<|endoftext|>",
"errors": "replace",
"model_max_length": 77,
"pad_token": "!",
"tokenizer_class": "CLIPTokenizer",
"unk_token": "<|endoftext|>"
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,125 @@
{
"additional_special_tokens": [
"<extra_id_0>",
"<extra_id_1>",
"<extra_id_2>",
"<extra_id_3>",
"<extra_id_4>",
"<extra_id_5>",
"<extra_id_6>",
"<extra_id_7>",
"<extra_id_8>",
"<extra_id_9>",
"<extra_id_10>",
"<extra_id_11>",
"<extra_id_12>",
"<extra_id_13>",
"<extra_id_14>",
"<extra_id_15>",
"<extra_id_16>",
"<extra_id_17>",
"<extra_id_18>",
"<extra_id_19>",
"<extra_id_20>",
"<extra_id_21>",
"<extra_id_22>",
"<extra_id_23>",
"<extra_id_24>",
"<extra_id_25>",
"<extra_id_26>",
"<extra_id_27>",
"<extra_id_28>",
"<extra_id_29>",
"<extra_id_30>",
"<extra_id_31>",
"<extra_id_32>",
"<extra_id_33>",
"<extra_id_34>",
"<extra_id_35>",
"<extra_id_36>",
"<extra_id_37>",
"<extra_id_38>",
"<extra_id_39>",
"<extra_id_40>",
"<extra_id_41>",
"<extra_id_42>",
"<extra_id_43>",
"<extra_id_44>",
"<extra_id_45>",
"<extra_id_46>",
"<extra_id_47>",
"<extra_id_48>",
"<extra_id_49>",
"<extra_id_50>",
"<extra_id_51>",
"<extra_id_52>",
"<extra_id_53>",
"<extra_id_54>",
"<extra_id_55>",
"<extra_id_56>",
"<extra_id_57>",
"<extra_id_58>",
"<extra_id_59>",
"<extra_id_60>",
"<extra_id_61>",
"<extra_id_62>",
"<extra_id_63>",
"<extra_id_64>",
"<extra_id_65>",
"<extra_id_66>",
"<extra_id_67>",
"<extra_id_68>",
"<extra_id_69>",
"<extra_id_70>",
"<extra_id_71>",
"<extra_id_72>",
"<extra_id_73>",
"<extra_id_74>",
"<extra_id_75>",
"<extra_id_76>",
"<extra_id_77>",
"<extra_id_78>",
"<extra_id_79>",
"<extra_id_80>",
"<extra_id_81>",
"<extra_id_82>",
"<extra_id_83>",
"<extra_id_84>",
"<extra_id_85>",
"<extra_id_86>",
"<extra_id_87>",
"<extra_id_88>",
"<extra_id_89>",
"<extra_id_90>",
"<extra_id_91>",
"<extra_id_92>",
"<extra_id_93>",
"<extra_id_94>",
"<extra_id_95>",
"<extra_id_96>",
"<extra_id_97>",
"<extra_id_98>",
"<extra_id_99>"
],
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

Binary file not shown.

View File

@ -0,0 +1,4 @@
[ZoneTransfer]
ZoneId=3
ReferrerUrl=https://huggingface.co/
HostUrl=https://cdn-lfs-us-1.huggingface.co/repos/87/5b/875b54ebbe0a756f9b6b63b20f1dfbf2c59e3f0504063ab6084e000dd99636cd/d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27spiece.model%3B+filename%3D%22spiece.model%22%3B&Expires=1718478490&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxODQ3ODQ5MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzg3LzViLzg3NWI1NGViYmUwYTc1NmY5YjZiNjNiMjBmMWRmYmYyYzU5ZTNmMDUwNDA2M2FiNjA4NGUwMDBkZDk5NjM2Y2QvZDYwYWNiMTI4Y2Y3YjdmMjUzNmU4ZjM4YTViMThhMDU1MzVjOWUxNGM3YTM1NTkwNDI3MGUxNWIwOTQ1ZWE4Nj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=B4x0TP9UV0G-O57AIv6GSM2BApN7qq4Oxyseb0d7SpgCshJ8pD2JSCxAuNClH%7Ezogiwwh17Pl-0uqAr4yF2K%7EGjX0DkwvK9lmYq2N8qtzf5k0SIRD6v1tTrmquz%7EhTfPJrJBvviSZK9529-Vx3j1cnzEbfcuN7qSYgEKYEIxC1sS2rCpCbma8P0JE6C4oARC5kM-EoiQ0Ka4Efn6%7EnX8FgJ2C4ECmHs3QrBQ81M75Mmyl91MwydBD-tNKAlL-NyADaNwl1XQgE6n76vjrdARTKhRuf5J5vHO9vfirQnpYqKgzwMT6s9dkYuqB-3UxDUF0R3iPhrptz%7EORnOvHkojkA__&Key-Pair-Id=K2FPYV99P2N66Q

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,940 @@
{
"add_prefix_space": true,
"added_tokens_decoder": {
"0": {
"content": "<pad>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"32000": {
"content": "<extra_id_99>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32001": {
"content": "<extra_id_98>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32002": {
"content": "<extra_id_97>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32003": {
"content": "<extra_id_96>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32004": {
"content": "<extra_id_95>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32005": {
"content": "<extra_id_94>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32006": {
"content": "<extra_id_93>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32007": {
"content": "<extra_id_92>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32008": {
"content": "<extra_id_91>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32009": {
"content": "<extra_id_90>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32010": {
"content": "<extra_id_89>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32011": {
"content": "<extra_id_88>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32012": {
"content": "<extra_id_87>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32013": {
"content": "<extra_id_86>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32014": {
"content": "<extra_id_85>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32015": {
"content": "<extra_id_84>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32016": {
"content": "<extra_id_83>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32017": {
"content": "<extra_id_82>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32018": {
"content": "<extra_id_81>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32019": {
"content": "<extra_id_80>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32020": {
"content": "<extra_id_79>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32021": {
"content": "<extra_id_78>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32022": {
"content": "<extra_id_77>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32023": {
"content": "<extra_id_76>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32024": {
"content": "<extra_id_75>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32025": {
"content": "<extra_id_74>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32026": {
"content": "<extra_id_73>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32027": {
"content": "<extra_id_72>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32028": {
"content": "<extra_id_71>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32029": {
"content": "<extra_id_70>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32030": {
"content": "<extra_id_69>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32031": {
"content": "<extra_id_68>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32032": {
"content": "<extra_id_67>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32033": {
"content": "<extra_id_66>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32034": {
"content": "<extra_id_65>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32035": {
"content": "<extra_id_64>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32036": {
"content": "<extra_id_63>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32037": {
"content": "<extra_id_62>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32038": {
"content": "<extra_id_61>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32039": {
"content": "<extra_id_60>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32040": {
"content": "<extra_id_59>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32041": {
"content": "<extra_id_58>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32042": {
"content": "<extra_id_57>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32043": {
"content": "<extra_id_56>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32044": {
"content": "<extra_id_55>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32045": {
"content": "<extra_id_54>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32046": {
"content": "<extra_id_53>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32047": {
"content": "<extra_id_52>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32048": {
"content": "<extra_id_51>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32049": {
"content": "<extra_id_50>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32050": {
"content": "<extra_id_49>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32051": {
"content": "<extra_id_48>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32052": {
"content": "<extra_id_47>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32053": {
"content": "<extra_id_46>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32054": {
"content": "<extra_id_45>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32055": {
"content": "<extra_id_44>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32056": {
"content": "<extra_id_43>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32057": {
"content": "<extra_id_42>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32058": {
"content": "<extra_id_41>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32059": {
"content": "<extra_id_40>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32060": {
"content": "<extra_id_39>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32061": {
"content": "<extra_id_38>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32062": {
"content": "<extra_id_37>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32063": {
"content": "<extra_id_36>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32064": {
"content": "<extra_id_35>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32065": {
"content": "<extra_id_34>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32066": {
"content": "<extra_id_33>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32067": {
"content": "<extra_id_32>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32068": {
"content": "<extra_id_31>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32069": {
"content": "<extra_id_30>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32070": {
"content": "<extra_id_29>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32071": {
"content": "<extra_id_28>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32072": {
"content": "<extra_id_27>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32073": {
"content": "<extra_id_26>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32074": {
"content": "<extra_id_25>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32075": {
"content": "<extra_id_24>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32076": {
"content": "<extra_id_23>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32077": {
"content": "<extra_id_22>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32078": {
"content": "<extra_id_21>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32079": {
"content": "<extra_id_20>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32080": {
"content": "<extra_id_19>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32081": {
"content": "<extra_id_18>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32082": {
"content": "<extra_id_17>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32083": {
"content": "<extra_id_16>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32084": {
"content": "<extra_id_15>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32085": {
"content": "<extra_id_14>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32086": {
"content": "<extra_id_13>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32087": {
"content": "<extra_id_12>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32088": {
"content": "<extra_id_11>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32089": {
"content": "<extra_id_10>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32090": {
"content": "<extra_id_9>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32091": {
"content": "<extra_id_8>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32092": {
"content": "<extra_id_7>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32093": {
"content": "<extra_id_6>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32094": {
"content": "<extra_id_5>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32095": {
"content": "<extra_id_4>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32096": {
"content": "<extra_id_3>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32097": {
"content": "<extra_id_2>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32098": {
"content": "<extra_id_1>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
},
"32099": {
"content": "<extra_id_0>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<extra_id_0>",
"<extra_id_1>",
"<extra_id_2>",
"<extra_id_3>",
"<extra_id_4>",
"<extra_id_5>",
"<extra_id_6>",
"<extra_id_7>",
"<extra_id_8>",
"<extra_id_9>",
"<extra_id_10>",
"<extra_id_11>",
"<extra_id_12>",
"<extra_id_13>",
"<extra_id_14>",
"<extra_id_15>",
"<extra_id_16>",
"<extra_id_17>",
"<extra_id_18>",
"<extra_id_19>",
"<extra_id_20>",
"<extra_id_21>",
"<extra_id_22>",
"<extra_id_23>",
"<extra_id_24>",
"<extra_id_25>",
"<extra_id_26>",
"<extra_id_27>",
"<extra_id_28>",
"<extra_id_29>",
"<extra_id_30>",
"<extra_id_31>",
"<extra_id_32>",
"<extra_id_33>",
"<extra_id_34>",
"<extra_id_35>",
"<extra_id_36>",
"<extra_id_37>",
"<extra_id_38>",
"<extra_id_39>",
"<extra_id_40>",
"<extra_id_41>",
"<extra_id_42>",
"<extra_id_43>",
"<extra_id_44>",
"<extra_id_45>",
"<extra_id_46>",
"<extra_id_47>",
"<extra_id_48>",
"<extra_id_49>",
"<extra_id_50>",
"<extra_id_51>",
"<extra_id_52>",
"<extra_id_53>",
"<extra_id_54>",
"<extra_id_55>",
"<extra_id_56>",
"<extra_id_57>",
"<extra_id_58>",
"<extra_id_59>",
"<extra_id_60>",
"<extra_id_61>",
"<extra_id_62>",
"<extra_id_63>",
"<extra_id_64>",
"<extra_id_65>",
"<extra_id_66>",
"<extra_id_67>",
"<extra_id_68>",
"<extra_id_69>",
"<extra_id_70>",
"<extra_id_71>",
"<extra_id_72>",
"<extra_id_73>",
"<extra_id_74>",
"<extra_id_75>",
"<extra_id_76>",
"<extra_id_77>",
"<extra_id_78>",
"<extra_id_79>",
"<extra_id_80>",
"<extra_id_81>",
"<extra_id_82>",
"<extra_id_83>",
"<extra_id_84>",
"<extra_id_85>",
"<extra_id_86>",
"<extra_id_87>",
"<extra_id_88>",
"<extra_id_89>",
"<extra_id_90>",
"<extra_id_91>",
"<extra_id_92>",
"<extra_id_93>",
"<extra_id_94>",
"<extra_id_95>",
"<extra_id_96>",
"<extra_id_97>",
"<extra_id_98>",
"<extra_id_99>"
],
"clean_up_tokenization_spaces": true,
"eos_token": "</s>",
"extra_ids": 100,
"legacy": true,
"model_max_length": 512,
"pad_token": "<pad>",
"sp_model_kwargs": {},
"tokenizer_class": "T5Tokenizer",
"unk_token": "<unk>"
}

View File

@ -0,0 +1,16 @@
{
"_class_name": "SD3Transformer2DModel",
"_diffusers_version": "0.29.0.dev0",
"_name_or_path": "/raid/.cache/huggingface/models--stabilityai--stable-diffusion-3-medium/snapshots/84a9ff37a0a30f7252e21daae69cfd0134198d27/transformer",
"attention_head_dim": 64,
"caption_projection_dim": 1536,
"in_channels": 16,
"joint_attention_dim": 4096,
"num_attention_heads": 24,
"num_layers": 24,
"out_channels": 16,
"patch_size": 2,
"pooled_projection_dim": 2048,
"pos_embed_max_size": 192,
"sample_size": 128
}

View File

@ -0,0 +1,37 @@
{
"_class_name": "AutoencoderKL",
"_diffusers_version": "0.29.0.dev0",
"_name_or_path": "/raid/.cache/huggingface/models--stabilityai--stable-diffusion-3-medium/snapshots/84a9ff37a0a30f7252e21daae69cfd0134198d27/vae",
"act_fn": "silu",
"block_out_channels": [
128,
256,
512,
512
],
"down_block_types": [
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D"
],
"force_upcast": true,
"in_channels": 3,
"latent_channels": 16,
"latents_mean": null,
"latents_std": null,
"layers_per_block": 2,
"norm_num_groups": 32,
"out_channels": 3,
"sample_size": 1024,
"scaling_factor": 1.5305,
"shift_factor": 0.0609,
"up_block_types": [
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D"
],
"use_post_quant_conv": false,
"use_quant_conv": false
}

View File

@ -103,6 +103,15 @@
"preview": "stabilityai--stable-cascade.jpg",
"extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 4.0, image_cfg_scale: 1.0"
},
"StabilityAI Stable Diffusion 3 Medium": {
"path": "huggingface/stabilityai/stable-diffusion-3-medium-diffusers",
"skip": true,
"variant": "fp16",
"te3": null,
"desc": "Stable Diffusion 3 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency",
"preview": "stabilityai--stable-diffusion-3.jpg",
"extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 7.0"
},
"Segmind Vega": {
"path": "huggingface/segmind/Segmind-Vega",

Binary file not shown.

After

Width:  |  Height:  |  Size: 137 KiB

156
modules/model_sd3.py Normal file
View File

@ -0,0 +1,156 @@
import os
import warnings
import torch
import diffusers
import transformers
import rich.traceback
rich.traceback.install()
warnings.filterwarnings(action="ignore", category=FutureWarning)
cache_dir = '/mnt/models/Diffusers'
model_fn = '/mnt/models/stable-diffusion/sd3/sd3_medium_incl_clips.safetensors'
def load_sd3(te3=None, fn=None):
repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers'
model_id = 'stabilityai/stable-diffusion-3-medium-diffusers'
dtype = torch.float16
if fn is not None and fn.endswith('.safetensors') and os.path.exists(fn):
model_id = fn
loader = diffusers.StableDiffusion3Pipeline.from_single_file
reload_te = True
else:
model_id = repo_id
loader = diffusers.StableDiffusion3Pipeline.from_pretrained
reload_te = False
if te3 == 'fp16':
text_encoder_3 = transformers.T5EncoderModel.from_pretrained(
repo_id,
subfolder='text_encoder_3',
torch_dtype=dtype,
cache_dir=cache_dir,
)
pipe = loader(
model_id,
torch_dtype=dtype,
text_encoder_3=text_encoder_3,
cache_dir=cache_dir,
)
elif te3 == 'fp8':
quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True)
text_encoder_3 = transformers.T5EncoderModel.from_pretrained(
repo_id,
subfolder='text_encoder_3',
quantization_config=quantization_config,
cache_dir=cache_dir,
)
pipe = loader(
model_id,
text_encoder_3=text_encoder_3,
device_map='balanced',
torch_dtype=dtype,
cache_dir=cache_dir,
)
else:
pipe = loader(
model_id,
torch_dtype=dtype,
text_encoder_3=None,
# tokenizer_3=None,
cache_dir=cache_dir,
)
if reload_te:
pipe.text_encoder = transformers.CLIPTextModelWithProjection.from_pretrained(
repo_id,
subfolder='text_encoder',
cache_dir=cache_dir,
torch_dtype=pipe.vae.dtype,
)
pipe.text_encoder_2 = transformers.CLIPTextModelWithProjection.from_pretrained(
repo_id,
subfolder='text_encoder_2',
cache_dir=cache_dir,
torch_dtype=pipe.vae.dtype,
)
return pipe
def load_te3(pipe, te3=None):
repo_id = 'stabilityai/stable-diffusion-3-medium-diffusers'
if pipe is None or not hasattr(pipe, 'text_encoder_3'):
return pipe
if 'fp16' in te3.lower():
pipe.text_encoder_3 = transformers.T5EncoderModel.from_pretrained(
repo_id,
subfolder='text_encoder_3',
# torch_dtype=dtype,
cache_dir=cache_dir,
torch_dtype=pipe.text_encoder.dtype,
)
elif 'fp8' in te3.lower():
from installer import install
install('bitsandbytes', quiet=True)
quantization_config = transformers.BitsAndBytesConfig(load_in_8bit=True)
pipe.text_encoder_3 = transformers.T5EncoderModel.from_pretrained(
repo_id,
subfolder='text_encoder_3',
quantization_config=quantization_config,
cache_dir=cache_dir,
torch_dtype=pipe.text_encoder.dtype,
)
else:
pipe.text_encoder_3 = None
if getattr(pipe, 'text_encoder_3', None) is not None and getattr(pipe, 'tokenizer_3', None) is None:
pipe.tokenizer_3 = transformers.T5TokenizerFast.from_pretrained(
repo_id,
subfolder='tokenizer_3',
cache_dir=cache_dir,
)
def stats():
s = torch.cuda.mem_get_info()
system = { 'free': s[0], 'used': s[1] - s[0], 'total': s[1] }
s = dict(torch.cuda.memory_stats('cuda'))
allocated = { 'current': s['allocated_bytes.all.current'], 'peak': s['allocated_bytes.all.peak'] }
reserved = { 'current': s['reserved_bytes.all.current'], 'peak': s['reserved_bytes.all.peak'] }
active = { 'current': s['active_bytes.all.current'], 'peak': s['active_bytes.all.peak'] }
inactive = { 'current': s['inactive_split_bytes.all.current'], 'peak': s['inactive_split_bytes.all.peak'] }
cuda = {
'system': system,
'active': active,
'allocated': allocated,
'reserved': reserved,
'inactive': inactive,
}
return cuda
if __name__ == '__main__':
import time
import logging
logging.basicConfig(level=logging.INFO)
log = logging.getLogger('sd')
t0 = time.time()
pipeline = load_sd3(te3='fp16', fn='')
# pipeline.to('cuda')
t1 = time.time()
log.info(f'Loaded: time={t1-t0:.3f}')
log.info(f'Stats: {stats()}')
# pipeline.scheduler = diffusers.schedulers.EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config)
log.info(f'Scheduler, {pipeline.scheduler}')
image = pipeline(
prompt='a photo of a cute robot holding a sign above his head that says sdnext, high detailed',
negative_prompt='',
num_inference_steps=50,
height=1024,
width=1024,
guidance_scale=7.0,
).images[0]
t2 = time.time()
log.info(f'Generated: time={t2-t1:.3f}')
log.info(f'Stats: {stats()}')
image.save("/tmp/sd3.png")

View File

@ -76,38 +76,32 @@ def load_cascade_combined(checkpoint_info, diffusers_load_config):
from modules.sd_unet import unet_dict
diffusers_load_config.pop("vae", None)
if 'stabilityai' in checkpoint_info.name:
if 'cascade' in checkpoint_info.name.lower():
diffusers_load_config["variant"] = 'bf16'
if shared.opts.sd_unet != "None" or 'stabilityai' in checkpoint_info.name:
if 'stabilityai' in checkpoint_info.name and ('lite' in checkpoint_info.name or (checkpoint_info.hash is not None and 'abc818bb0d' in checkpoint_info.hash)):
if shared.opts.sd_unet != "None" or 'stabilityai' in checkpoint_info.name.lower():
if 'cascade' in checkpoint_info.name and ('lite' in checkpoint_info.name or (checkpoint_info.hash is not None and 'abc818bb0d' in checkpoint_info.hash)):
decoder_folder = 'decoder_lite'
prior_folder = 'prior_lite'
else:
decoder_folder = 'decoder'
prior_folder = 'prior'
if 'stabilityai' in checkpoint_info.name:
if 'cascade' in checkpoint_info.name.lower():
decoder_unet = StableCascadeUNet.from_pretrained("stabilityai/stable-cascade", subfolder=decoder_folder, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
decoder = StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", cache_dir=shared.opts.diffusers_dir, decoder=decoder_unet, text_encoder=None, **diffusers_load_config)
else:
decoder = StableCascadeDecoderPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, text_encoder=None, **diffusers_load_config)
shared.log.debug(f'StableCascade {decoder_folder}: scale={decoder.latent_dim_scale}')
prior_text_encoder = None
if shared.opts.sd_unet != "None":
prior_unet, prior_text_encoder = load_prior(unet_dict[shared.opts.sd_unet])
else:
prior_unet = StableCascadeUNet.from_pretrained("stabilityai/stable-cascade-prior", subfolder=prior_folder, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config)
if prior_text_encoder is not None:
prior = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", cache_dir=shared.opts.diffusers_dir, prior=prior_unet, text_encoder=prior_text_encoder, image_encoder=None, feature_extractor=None, **diffusers_load_config)
else:
prior = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", cache_dir=shared.opts.diffusers_dir, prior=prior_unet, image_encoder=None, feature_extractor=None, **diffusers_load_config)
shared.log.debug(f'StableCascade {prior_folder}: scale={prior.resolution_multiple}')
sd_model = StableCascadeCombinedPipeline(
tokenizer=decoder.tokenizer,
text_encoder=None,

View File

@ -83,6 +83,8 @@ class Shared(sys.modules[__name__].__class__):
return model_type
if not shared.native:
model_type = 'ldm'
elif "StableDiffusion3" in self.sd_refiner.__class__.__name__:
model_type = 'sd3'
elif "StableDiffusionXL" in self.sd_model.__class__.__name__:
model_type = 'sdxl'
elif "StableDiffusion" in self.sd_model.__class__.__name__:
@ -112,6 +114,8 @@ class Shared(sys.modules[__name__].__class__):
return model_type
if not shared.native:
model_type = 'ldm'
elif "StableDiffusion3" in self.sd_refiner.__class__.__name__:
model_type = 'sd3'
elif "StableDiffusionXL" in self.sd_refiner.__class__.__name__:
model_type = 'sdxl'
elif "StableDiffusion" in self.sd_refiner.__class__.__name__:

View File

@ -289,8 +289,9 @@ def find_diffuser(name: str):
return None
def get_reference_opts(name: str):
def get_reference_opts(name: str, quiet=False):
model_opts = {}
name = name.replace('Diffusers/', 'huggingface/')
for k, v in shared.reference_models.items():
model_name = os.path.splitext(v.get('path', '').split('@')[0])[0]
if k == name or model_name == name:
@ -299,7 +300,8 @@ def get_reference_opts(name: str):
if not model_opts:
# shared.log.error(f'Reference: model="{name}" not found')
return {}
shared.log.debug(f'Reference: model="{name}" {model_opts.get("extras", None)}')
if not quiet:
shared.log.debug(f'Reference: model="{name}" {model_opts.get("extras", None)}')
return model_opts

View File

@ -106,7 +106,7 @@ def set_pipeline_args(p, model, prompts: list, negative_prompts: list, prompts_2
shared.log.error(f'Sampler timesteps: {e}')
else:
shared.log.warning(f'Sampler: sampler={model.scheduler.__class__.__name__} timesteps not supported')
if shared.opts.prompt_attention != 'Fixed attention' and ('StableDiffusion' in model.__class__.__name__ or 'StableCascade' in model.__class__.__name__) and 'Onnx' not in model.__class__.__name__:
if shared.opts.prompt_attention != 'Fixed attention' and ('StableDiffusion' in model.__class__.__name__ or 'StableCascade' in model.__class__.__name__) and 'Onnx' not in model.__class__.__name__ and 'StableDiffusion3' not in model.__class__.__name__:
try:
prompt_parser_diffusers.encode_prompts(model, p, prompts, negative_prompts, steps=steps, clip_skip=clip_skip)
parser = shared.opts.prompt_attention

View File

@ -85,7 +85,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
shared.sd_model = update_pipeline(shared.sd_model, p)
shared.log.info(f'Base: class={shared.sd_model.__class__.__name__}')
update_sampler(p, shared.sd_model)
update_sampler(p, shared.sd_model) # TODO SD3
base_args = set_pipeline_args(
p=p,
model=shared.sd_model,

View File

@ -44,7 +44,7 @@ def full_vae_decode(latents, model):
upcast = (model.vae.dtype == torch.float16) and getattr(model.vae.config, 'force_upcast', False) and hasattr(model, 'upcast_vae')
if upcast: # this is done by diffusers automatically if output_type != 'latent'
model.upcast_vae()
if hasattr(model.vae, "post_quant_conv"):
if getattr(model.vae, "post_quant_conv", None) is not None:
latents = latents.to(next(iter(model.vae.post_quant_conv.parameters())).dtype)
# normalize latents

View File

@ -590,6 +590,10 @@ def detect_pipeline(f: str, op: str = 'model', warning=True):
if not shared.native:
warn(f'Model detected as Segmind Vega model, but attempting to load using backend=original: {op}={f} size={size} MB')
guess = 'Stable Diffusion XL'
elif size > 5692 and size < 5698:
if not shared.native:
warn(f'Model detected as Stable Diffusion 3 model, but attempting to load using backend=original: {op}={f} size={size} MB')
guess = 'Stable Diffusion 3'
# guess by name
"""
if 'LCM_' in f.upper() or 'LCM-' in f.upper() or '_LCM' in f.upper() or '-LCM' in f.upper():
@ -613,13 +617,17 @@ def detect_pipeline(f: str, op: str = 'model', warning=True):
if not shared.native:
warn(f'Model detected as PixArt Alpha model, but attempting to load using backend=original: {op}={f} size={size} MB')
guess = 'PixArt-Alpha'
if 'stable-diffusion-3' in f.lower():
if not shared.native:
warn(f'Model detected as Stable Diffusion 3 model, but attempting to load using backend=original: {op}={f} size={size} MB')
guess = 'Stable Diffusion 3'
if 'stable-cascade' in f.lower() or 'stablecascade' in f.lower() or 'wuerstchen3' in f.lower():
if not shared.native:
warn(f'Model detected as Stable Cascade model, but attempting to load using backend=original: {op}={f} size={size} MB')
if devices.dtype == torch.float16:
warn('Stable Cascade does not support Float16')
guess = 'Stable Cascade'
if 'pixart_sigma' in f.lower():
if 'pixart-sigma' in f.lower():
if not shared.native:
warn(f'Model detected as PixArt-Sigma model, but attempting to load using backend=original: {op}={f} size={size} MB')
guess = 'PixArt-Sigma'
@ -803,6 +811,8 @@ def move_model(model, device=None, force=False):
if os.environ.get('SD_MOVE_DEBUG', None):
shared.log.warning(f'Model move meta: module={module.__class__}')
module.to_empty(device=device)
elif 'enable_sequential_cpu_offload' in str(e0):
pass # ignore model move if sequential offload is enabled
else:
raise e0
if hasattr(model, "prior_pipe"):
@ -834,6 +844,8 @@ def get_load_config(model_file, model_type, config_type='yaml'):
return 'configs/sd15'
if model_type == 'Stable Diffusion XL':
return 'configs/sdxl'
if model_type == 'Stable Diffusion 3':
return 'configs/sd3'
return None
@ -954,7 +966,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
diffusers_load_config['variant'] = 'fp16'
if model_type in ['Stable Cascade']: # forced pipeline
try:
from modules.sd_cascade import load_cascade_combined
from modules.model_stablecascade import load_cascade_combined
sd_model = load_cascade_combined(checkpoint_info, diffusers_load_config)
except Exception as e:
shared.log.error(f'Diffusers Failed loading {op}: {checkpoint_info.path} {e}')
@ -995,6 +1007,17 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
if debug_load:
errors.display(e, 'Load')
return
elif model_type in ['Stable Diffusion 3']:
try:
from modules.model_sd3 import load_sd3
shared.log.debug('Loading: model="Stable Diffusion 3" variant=medium type=diffusers')
shared.opts.scheduler = 'Default'
sd_model = load_sd3()
except Exception as e:
shared.log.error(f'Diffusers Failed loading {op}: {checkpoint_info.path} {e}')
if debug_load:
errors.display(e, 'Load')
return
elif model_type is not None and pipeline is not None and 'ONNX' in model_type: # forced pipeline
try:
sd_model = pipeline.from_pretrained(checkpoint_info.path)
@ -1060,8 +1083,11 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
diffusers_load_config['original_config_file'] = get_load_config(checkpoint_info.path, model_type, config_type='yaml')
else:
diffusers_load_config['config'] = get_load_config(checkpoint_info.path, model_type, config_type='json')
if hasattr(pipeline, 'from_single_file'):
diffusers.loaders.single_file_utils.CHECKPOINT_KEY_NAMES["clip"] = "cond_stage_model.transformer.text_model.embeddings.position_embedding.weight" # TODO patch for diffusers==0.28.0
if model_type.startswith('Stable Diffusion 3'):
from modules.model_sd3 import load_sd3
sd_model = load_sd3(fn=checkpoint_info.path)
elif hasattr(pipeline, 'from_single_file'):
diffusers.loaders.single_file_utils.CHECKPOINT_KEY_NAMES["clip"] = "cond_stage_model.transformer.text_model.embeddings.position_embedding.weight" # patch for diffusers==0.28.0
diffusers_load_config['use_safetensors'] = True
diffusers_load_config['cache_dir'] = shared.opts.hfcache_dir # use hfcache instead of diffusers dir as this is for config only in case of single-file
if shared.opts.disable_accelerate:
@ -1072,7 +1098,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
else:
sd_hijack_accelerate.restore_accelerate()
sd_model = pipeline.from_single_file(checkpoint_info.path, **diffusers_load_config)
sd_model = patch_diffuser_config(sd_model, checkpoint_info.path)
# sd_model = patch_diffuser_config(sd_model, checkpoint_info.path)
elif hasattr(pipeline, 'from_ckpt'):
diffusers_load_config['cache_dir'] = shared.opts.hfcache_dir
sd_model = pipeline.from_ckpt(checkpoint_info.path, **diffusers_load_config)
@ -1116,8 +1142,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
if hasattr(sd_model, "set_progress_bar_config"):
sd_model.set_progress_bar_config(bar_format='Progress {rate_fmt}{postfix} {bar} {percentage:3.0f}% {n_fmt}/{total_fmt} {elapsed} {remaining}', ncols=80, colour='#327fba')
if "StableCascade" not in sd_model.__class__.__name__:
sd_unet.load_unet(sd_model)
sd_unet.load_unet(sd_model)
timer.record("load")
if op == 'refiner':
@ -1140,6 +1165,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
move_model(sd_model, devices.device)
timer.record("move")
reload_text_encoder()
if shared.opts.ipex_optimize:
sd_model = sd_models_compile.ipex_optimize(sd_model)
@ -1154,8 +1181,6 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
shared.log.error("Failed to load diffusers model")
errors.display(e, "loading Diffusers model")
devices.torch_gc(force=True)
if shared.cmd_opts.profile:
errors.profile(pr, 'Load')
@ -1351,6 +1376,8 @@ def set_diffusers_attention(pipe):
modules = [getattr(pipe, n, None) for n in module_names]
modules = [m for m in modules if isinstance(m, torch.nn.Module) and hasattr(m, "set_attn_processor")]
for module in modules:
if 'SD3Transformer2DModel' in module.__class__.__name__: # TODO SD3
continue
module.set_attn_processor(attention)
if shared.opts.cross_attention_optimization == "Disabled":
@ -1494,6 +1521,13 @@ def load_model(checkpoint_info=None, already_loaded_state_dict=None, timer=None,
shared.log.info(f'Model load finished: {memory_stats()} cached={len(checkpoints_loaded.keys())}')
def reload_text_encoder():
if hasattr(shared.sd_model, 'text_encoder_3'):
from modules.model_sd3 import load_te3
shared.log.debug(f'Load: TE3={shared.opts.sd_te3}')
load_te3(shared.sd_model, shared.opts.sd_te3)
def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model', force=False):
load_dict = shared.opts.sd_model_dict != model_data.sd_dict
from modules import lowvram, sd_hijack

View File

@ -32,6 +32,7 @@ try:
LMSDiscreteScheduler,
PNDMScheduler,
SASolverScheduler,
FlowMatchEulerDiscreteScheduler,
)
except Exception as e:
import diffusers
@ -65,6 +66,7 @@ config = {
'Euler EDM': { },
'DPM++ 2M EDM': { 'solver_order': 2, 'solver_type': 'midpoint', 'final_sigmas_type': 'zero', 'algorithm_type': 'dpmsolver++' },
'CMSI': { }, #{ 'sigma_min': 0.002, 'sigma_max': 80.0, 'sigma_data': 0.5, 's_noise': 1.0, 'rho': 7.0, 'clip_denoised': True },
'Euler FlowMatch': { },
'IPNDM': { },
}
@ -96,6 +98,7 @@ samplers_data_diffusers = [
sd_samplers_common.SamplerData('LCM', lambda model: DiffusionSampler('LCM', LCMScheduler, model), [], {}),
sd_samplers_common.SamplerData('TCD', lambda model: DiffusionSampler('TCD', TCDScheduler, model), [], {}),
sd_samplers_common.SamplerData('CMSI', lambda model: DiffusionSampler('CMSI', CMStochasticIterativeScheduler, model), [], {}),
sd_samplers_common.SamplerData('Euler FlowMatch', lambda model: DiffusionSampler('Euler FlowMatch', FlowMatchEulerDiscreteScheduler, model), [], {}),
sd_samplers_common.SamplerData('Same as primary', None, [], {}),
]
@ -179,7 +182,7 @@ class DiffusionSampler:
possible = signature.parameters.keys()
for key in self.config.copy().keys():
if key not in possible:
shared.log.warning(f'Sampler: sampler="{name}" config={self.config} invalid={key}')
# shared.log.warning(f'Sampler: sampler="{name}" config={self.config} invalid={key}')
del self.config[key]
debug(f'Sampler: name="{name}"')
debug(f'Sampler: config={self.config}')

View File

@ -8,6 +8,8 @@ unet_dict = {}
def load_unet(model):
if shared.opts.sd_unet == 'None':
return
if "StableCascade" in model.__class__.__name__:
return
if shared.opts.sd_unet not in list(unet_dict):
shared.log.error(f'UNet model not found: {shared.opts.sd_unet}')
return
@ -22,7 +24,7 @@ def load_unet(model):
config_file = 'default'
try:
if "StableCascade" in model.__class__.__name__:
from modules.sd_cascade import load_prior
from modules.model_stablecascade import load_prior
prior_unet, prior_text_encoder = load_prior(unet_dict[shared.opts.sd_unet], config_file=config_file)
model.prior_pipe.prior = model.prior_prior = None # Prevent OOM
model.prior_pipe.prior = model.prior_prior = prior_unet.to(devices.device, dtype=devices.dtype_unet)

View File

@ -390,6 +390,7 @@ options_templates.update(options_section(('sd', "Execution & Models"), {
"sd_model_refiner": OptionInfo('None', "Refiner model", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints),
"sd_vae": OptionInfo("Automatic", "VAE model", gr.Dropdown, lambda: {"choices": shared_items.sd_vae_items()}, refresh=shared_items.refresh_vae_list),
"sd_unet": OptionInfo("None", "UNET model", gr.Dropdown, lambda: {"choices": shared_items.sd_unet_items()}, refresh=shared_items.refresh_unet_list),
"sd_te3": OptionInfo('None', "Text encoder model", gr.Dropdown, lambda: {"choices": ['None', 'T5 FP8', 'T5 FP16']}),
"sd_checkpoint_autoload": OptionInfo(True, "Model autoload on start"),
"sd_model_dict": OptionInfo('None', "Use separate base dict", gr.Dropdown, lambda: {"choices": ['None'] + list_checkpoint_tiles()}, refresh=refresh_checkpoints),
"stream_load": OptionInfo(False, "Load models using stream loading method", gr.Checkbox, {"visible": not native }),

View File

@ -92,6 +92,9 @@ def get_pipelines():
pipelines['PixArt-Sigma'] = getattr(diffusers, 'PixArtSigmaPipeline', None)
if hasattr(diffusers, 'HunyuanDiTPipeline'):
pipelines['HunyuanDiT'] = getattr(diffusers, 'HunyuanDiTPipeline', None)
if hasattr(diffusers, 'StableDiffusion3Pipeline'):
pipelines['Stable Diffusion 3'] = getattr(diffusers, 'StableDiffusion3Pipeline', None)
pipelines['Stable Diffusion 3 Img2Img'] = getattr(diffusers, 'StableDiffusion3Img2ImgPipeline', None)
for k, v in pipelines.items():
if k != 'Autodetect' and v is None:

View File

@ -28,12 +28,11 @@ class ExtraNetworksPageCheckpoints(ui_extra_networks.ExtraNetworksPage):
shared.log.debug(f'Extra networks experimental: model="{k}"')
else:
continue
name = os.path.join(reference_dir, k)
preview = v.get('preview', v['path'])
yield {
"type": 'Model',
"name": name,
"title": name,
"name": os.path.join(reference_dir, k),
"title": os.path.join(reference_dir, k),
"filename": url,
"preview": self.find_preview(os.path.join(reference_dir, preview)),
"local_preview": self.find_preview_file(os.path.join(reference_dir, preview)),

View File

@ -27,7 +27,7 @@ fasteners
orjson
invisible-watermark
pi-heif
diffusers==0.28.1
diffusers==0.29.0
safetensors==0.4.3
tensordict==0.1.2
peft==0.11.1
@ -53,7 +53,7 @@ pandas
protobuf==4.25.3
pytorch_lightning==1.9.4
tokenizers==0.19.1
transformers==4.41.1
transformers==4.41.2
urllib3==1.26.18
Pillow==10.3.0
timm==0.9.16

View File

@ -49,10 +49,7 @@ class Script(scripts.Script):
return 'MuLan'
def show(self, is_img2img):
if shared.cmd_opts.experimental:
return True if shared.native else False
else:
return False
return True if shared.native else False
def ui(self, _is_img2img):
with gr.Row():

View File

@ -169,6 +169,7 @@ def load_model():
thread_refiner.join()
shared.opts.onchange("sd_model_checkpoint", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(op='model')), call=False)
shared.opts.onchange("sd_model_refiner", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(op='refiner')), call=False)
shared.opts.onchange("sd_te3", wrap_queued_call(lambda: modules.sd_models.reload_text_encoder()), call=False)
shared.opts.onchange("sd_model_dict", wrap_queued_call(lambda: modules.sd_models.reload_model_weights(op='dict')), call=False)
shared.opts.onchange("sd_vae", wrap_queued_call(lambda: modules.sd_vae.reload_vae_weights()), call=False)
shared.opts.onchange("sd_backend", wrap_queued_call(lambda: modules.sd_models.change_backend()), call=False)

2
wiki

@ -1 +1 @@
Subproject commit 709796f9753081ebe74d723118b2482bf633fae5
Subproject commit ebe57463ba9b5f8055bd9f54f84c987a0b4ce4c2