diff --git a/.gitignore b/.gitignore index a29980f78..7141c13ec 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,7 @@ package-lock.json *.rar *.7z *.pyc +*.out /*.bat /*.sh /*.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 21bf94b25..22d6ba5fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,9 @@ ## Update for 2026-04-15 - **Models** + - [Baidu ERNIE-Image](https://huggingface.co/baidu/ERNIE-Image) text-to-image FlowMatch diffusion transformer model with Mistral3 text encoding + includes *ERNIE-Image* (base) and *ERNIE-Image-Turbo* (distilled) variants + uses *ErnieImageTransformer2DModel* with *AutoencoderKLFlux2* latent decoding at 1024px - [Zeta-Chroma](https://huggingface.co/lodestones/Zeta-Chroma) pixel-space diffusion transformer image model generates images directly in RGB space using NextDiT-style architecture *note*: requires large number of steps to achieve sane results @@ -72,10 +75,10 @@ - UI CSS fixes, thanks @awsr - detect/warn if space present in system path - add `ftfy` to requirements - - fix upscaler init error should not block server + - upscaler init error should not block server - improve torch nvidia arch detection - add torch amd arch detection - - fix prompt weighted lists and internal wildcards + - prompt weighted lists and internal wildcards - improve `path_to_repo` handling for custom paths - eliminate `api` auth security bypass - multiple `schedulers` signature corrections @@ -89,6 +92,7 @@ - patch `unipc` for timesteps device placement, thanks @resonantsky - `civitai` search and base-model discovery improvements - validate all `reference` jsons + - ui log formatting ## Update for 2026-04-01 diff --git a/data/reference-distilled.json b/data/reference-distilled.json index 7de9c1fbe..ab40f8fe6 100644 --- a/data/reference-distilled.json +++ b/data/reference-distilled.json @@ -56,6 +56,16 @@ "tags": "distilled", "date": "2025 August" }, + "Baidu ERNIE-Image-Turbo": { + "path": "baidu/ERNIE-Image-Turbo", + "preview": "baidu--ERNIE-Image-Turbo.jpg", + "desc": "ERNIE-Image-Turbo is a distilled ERNIE-Image variant optimized for fast generation with fewer denoising steps.", + "skip": true, + "extras": "sampler: Default, cfg_scale: 1.0, steps: 8", + "size": 0, + "tags": "distilled", + "date": "2026 April" + }, "Qwen-Image-Lightning-Edit": { "path": "vladmandic/Qwen-Lightning-Edit", "preview": "vladmandic--Qwen-Lightning-Edit.jpg", diff --git a/data/reference.json b/data/reference.json index 78ba210d9..ffe31b02a 100644 --- a/data/reference.json +++ b/data/reference.json @@ -162,6 +162,16 @@ "date": "2025 November" }, + "Baidu ERNIE-Image": { + "path": "baidu/ERNIE-Image", + "preview": "baidu--ERNIE-Image.jpg", + "desc": "ERNIE-Image is a text-to-image diffusion transformer model that combines a Mistral3 text encoder with a FlowMatch transformer and Flux2-style VAE for 1024px image generation.", + "skip": true, + "extras": "sampler: Default, cfg_scale: 4.0, steps: 50", + "size": 0, + "date": "2026 April" + }, + "Qwen-Image": { "path": "Qwen/Qwen-Image", "preview": "Qwen--Qwen-Image.jpg", diff --git a/javascript/logMonitor.js b/javascript/logMonitor.js index 70472ced0..186c9dc73 100644 --- a/javascript/logMonitor.js +++ b/javascript/logMonitor.js @@ -30,8 +30,9 @@ async function logMonitor() { const level = `${l.level}`; if (l.level === 'WARNING') logWarnings++; if (l.level === 'ERROR') logErrors++; - const module = `${l.module}`; - row.innerHTML = `${dateToStr(l.created)}${level}${l.facility}${module}${htmlEscape(l.msg)}`; + const module = `${l.module}`; + const facility = l.facility !== 'sd' ? `${l.facility}` : ''; + row.innerHTML = `${dateToStr(l.created)}${level}${facility}${module}${htmlEscape(l.msg)}`; logMonitorEl.appendChild(row); } catch (e) { error(`logMonitor: ${e}\n${line}`); diff --git a/models/Reference/baidu--ERNIE-Image-Turbo.jpg b/models/Reference/baidu--ERNIE-Image-Turbo.jpg new file mode 100644 index 000000000..e69de29bb diff --git a/models/Reference/baidu--ERNIE-Image.jpg b/models/Reference/baidu--ERNIE-Image.jpg new file mode 100644 index 000000000..e69de29bb diff --git a/modules/modeldata.py b/modules/modeldata.py index 7e2ece774..a4e47c625 100644 --- a/modules/modeldata.py +++ b/modules/modeldata.py @@ -88,6 +88,8 @@ def get_model_type(pipe): model_type = 'meissonic' elif 'Qwen' in name: model_type = 'qwen' + elif 'ErnieImage' in name or 'ERNIE-Image' in name: + model_type = 'ernieimage' elif 'NextStep' in name: model_type = 'nextstep' elif 'XOmni' in name or 'X-Omni' in name: diff --git a/modules/sd_detect.py b/modules/sd_detect.py index bde8e5891..d901ea0fe 100644 --- a/modules/sd_detect.py +++ b/modules/sd_detect.py @@ -142,6 +142,8 @@ def guess_by_name(fn, current_guess): new_guess = 'PRX' elif 'gemini-' in fn.lower() and 'image' in fn.lower(): new_guess = 'NanoBanana' + elif 'ernie-image' in fn.lower(): + new_guess = 'ERNIE-Image' elif 'z-image' in fn.lower() or 'z_image' in fn.lower(): new_guess = 'Z-Image' elif 'longcat-image' in fn.lower(): diff --git a/modules/sd_hijack_te.py b/modules/sd_hijack_te.py index 5b779ec28..14793f30d 100644 --- a/modules/sd_hijack_te.py +++ b/modules/sd_hijack_te.py @@ -4,6 +4,10 @@ from modules import shared, errors, timer, sd_models from modules.logger import log +debug_output = os.environ.get('SD_PROMPT_DEBUG', None) +debug = log.trace if debug_output is not None else lambda *args, **kwargs: None + + def hijack_encode_prompt(*args, **kwargs): jobid = shared.state.begin('TE Encode') t0 = time.time() @@ -18,8 +22,7 @@ def hijack_encode_prompt(*args, **kwargs): prompt = args_copy[0] patch_prompt = True res = prompt - if prompt is not None: - log.debug(f'Encode: prompt="{prompt}" hijack=True') + debug(f'Encode: prompt="{prompt}" hijack=True') if hasattr(shared.sd_model, 'before_prompt_encode'): log.debug(f'Encode: prompt="{prompt}" op=before') diff --git a/modules/sd_models.py b/modules/sd_models.py index b379ef03e..5bdc895a0 100644 --- a/modules/sd_models.py +++ b/modules/sd_models.py @@ -509,6 +509,10 @@ def load_diffuser_force(detected_model_type, checkpoint_info, diffusers_load_con from pipelines.model_prx import load_prx sd_model = load_prx(checkpoint_info, diffusers_load_config) allow_post_quant = False + elif model_type in ['ERNIE-Image']: + from pipelines.model_ernie import load_ernie_image + sd_model = load_ernie_image(checkpoint_info, diffusers_load_config) + allow_post_quant = False elif model_type in ['Z-Image']: from pipelines.model_z_image import load_z_image sd_model = load_z_image(checkpoint_info, diffusers_load_config) @@ -1293,10 +1297,14 @@ def set_diffuser_pipe(pipe, new_pipe_type): def add_noise_pred_to_diffusers_callback(pipe): + print('HERE1', hasattr(pipe, "_callback_tensor_inputs")) if not hasattr(pipe, "_callback_tensor_inputs"): return pipe if pipe.__class__.__name__.startswith("Anima"): return pipe + if pipe.__class__.__name__.startswith("ErnieImage"): + print('HERE2') + return pipe if pipe.__class__.__name__.startswith("StableCascade") and ("predicted_image_embedding" not in pipe._callback_tensor_inputs): # pylint: disable=protected-access pipe.prior_pipe._callback_tensor_inputs.append("predicted_image_embedding") # pylint: disable=protected-access elif "noise_pred" not in pipe._callback_tensor_inputs: # pylint: disable=protected-access diff --git a/modules/shared_items.py b/modules/shared_items.py index 8d2c6af40..9f54d4a91 100644 --- a/modules/shared_items.py +++ b/modules/shared_items.py @@ -48,6 +48,7 @@ pipelines = { 'WanAI': getattr(diffusers, 'WanPipeline', None), 'Qwen': getattr(diffusers, 'QwenImagePipeline', None), 'HunyuanImage': getattr(diffusers, 'HunyuanImagePipeline', None), + 'ERNIE-Image': getattr(diffusers, 'ErnieImagePipeline', None), 'Z-Image': getattr(diffusers, 'ZImagePipeline', None), 'FLUX2': getattr(diffusers, 'Flux2Pipeline', None), 'FLUX2 Klein': getattr(diffusers, 'Flux2KleinPipeline', None), diff --git a/pipelines/model_ernie.py b/pipelines/model_ernie.py new file mode 100644 index 000000000..1b6cdb9de --- /dev/null +++ b/pipelines/model_ernie.py @@ -0,0 +1,45 @@ +import diffusers +import transformers +from modules import shared, devices, sd_models, model_quant, sd_hijack_te, sd_hijack_vae +from modules.logger import log +from pipelines import generic + + +def load_ernie_image(checkpoint_info, diffusers_load_config=None): + if diffusers_load_config is None: + diffusers_load_config = {} + repo_id = sd_models.path_to_repo(checkpoint_info) + sd_models.hf_auth_check(checkpoint_info) + + load_args, _quant_args = model_quant.get_dit_args(diffusers_load_config, allow_quant=False) + log.debug(f'Load model: type=ERNIE-Image repo="{repo_id}" offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype} args={load_args}') + + transformer = generic.load_transformer( + repo_id, + cls_name=diffusers.ErnieImageTransformer2DModel, + load_config=diffusers_load_config, + ) + text_encoder = generic.load_text_encoder( + repo_id, + cls_name=transformers.Mistral3Model, + load_config=diffusers_load_config, + ) + + pipe = diffusers.ErnieImagePipeline.from_pretrained( + repo_id, + cache_dir=shared.opts.diffusers_dir, + transformer=transformer, + text_encoder=text_encoder, + **load_args, + ) + pipe.task_args = { + 'output_type': 'np', + } + + del transformer + del text_encoder + sd_hijack_te.init_hijack(pipe) + sd_hijack_vae.init_hijack(pipe) + + devices.torch_gc(force=True, reason='load') + return pipe