mirror of https://github.com/vladmandic/automatic
parent
d29c133891
commit
db8c6e9243
|
|
@ -37,6 +37,7 @@ package-lock.json
|
|||
*.rar
|
||||
*.7z
|
||||
*.pyc
|
||||
*.out
|
||||
/*.bat
|
||||
/*.sh
|
||||
/*.txt
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@
|
|||
## Update for 2026-04-15
|
||||
|
||||
- **Models**
|
||||
- [Baidu ERNIE-Image](https://huggingface.co/baidu/ERNIE-Image) text-to-image FlowMatch diffusion transformer model with Mistral3 text encoding
|
||||
includes *ERNIE-Image* (base) and *ERNIE-Image-Turbo* (distilled) variants
|
||||
uses *ErnieImageTransformer2DModel* with *AutoencoderKLFlux2* latent decoding at 1024px
|
||||
- [Zeta-Chroma](https://huggingface.co/lodestones/Zeta-Chroma) pixel-space diffusion transformer image model
|
||||
generates images directly in RGB space using NextDiT-style architecture
|
||||
*note*: requires large number of steps to achieve sane results
|
||||
|
|
@ -72,10 +75,10 @@
|
|||
- UI CSS fixes, thanks @awsr
|
||||
- detect/warn if space present in system path
|
||||
- add `ftfy` to requirements
|
||||
- fix upscaler init error should not block server
|
||||
- upscaler init error should not block server
|
||||
- improve torch nvidia arch detection
|
||||
- add torch amd arch detection
|
||||
- fix prompt weighted lists and internal wildcards
|
||||
- prompt weighted lists and internal wildcards
|
||||
- improve `path_to_repo` handling for custom paths
|
||||
- eliminate `api` auth security bypass
|
||||
- multiple `schedulers` signature corrections
|
||||
|
|
@ -89,6 +92,7 @@
|
|||
- patch `unipc` for timesteps device placement, thanks @resonantsky
|
||||
- `civitai` search and base-model discovery improvements
|
||||
- validate all `reference` jsons
|
||||
- ui log formatting
|
||||
|
||||
## Update for 2026-04-01
|
||||
|
||||
|
|
|
|||
|
|
@ -56,6 +56,16 @@
|
|||
"tags": "distilled",
|
||||
"date": "2025 August"
|
||||
},
|
||||
"Baidu ERNIE-Image-Turbo": {
|
||||
"path": "baidu/ERNIE-Image-Turbo",
|
||||
"preview": "baidu--ERNIE-Image-Turbo.jpg",
|
||||
"desc": "ERNIE-Image-Turbo is a distilled ERNIE-Image variant optimized for fast generation with fewer denoising steps.",
|
||||
"skip": true,
|
||||
"extras": "sampler: Default, cfg_scale: 1.0, steps: 8",
|
||||
"size": 0,
|
||||
"tags": "distilled",
|
||||
"date": "2026 April"
|
||||
},
|
||||
"Qwen-Image-Lightning-Edit": {
|
||||
"path": "vladmandic/Qwen-Lightning-Edit",
|
||||
"preview": "vladmandic--Qwen-Lightning-Edit.jpg",
|
||||
|
|
|
|||
|
|
@ -162,6 +162,16 @@
|
|||
"date": "2025 November"
|
||||
},
|
||||
|
||||
"Baidu ERNIE-Image": {
|
||||
"path": "baidu/ERNIE-Image",
|
||||
"preview": "baidu--ERNIE-Image.jpg",
|
||||
"desc": "ERNIE-Image is a text-to-image diffusion transformer model that combines a Mistral3 text encoder with a FlowMatch transformer and Flux2-style VAE for 1024px image generation.",
|
||||
"skip": true,
|
||||
"extras": "sampler: Default, cfg_scale: 4.0, steps: 50",
|
||||
"size": 0,
|
||||
"date": "2026 April"
|
||||
},
|
||||
|
||||
"Qwen-Image": {
|
||||
"path": "Qwen/Qwen-Image",
|
||||
"preview": "Qwen--Qwen-Image.jpg",
|
||||
|
|
|
|||
|
|
@ -30,8 +30,9 @@ async function logMonitor() {
|
|||
const level = `<td style="color: var(--color-${l.level.toLowerCase()})">${l.level}</td>`;
|
||||
if (l.level === 'WARNING') logWarnings++;
|
||||
if (l.level === 'ERROR') logErrors++;
|
||||
const module = `<td style="color: var(--var(--neutral-400))">${l.module}</td>`;
|
||||
row.innerHTML = `<td>${dateToStr(l.created)}</td>${level}<td>${l.facility}</td>${module}<td>${htmlEscape(l.msg)}</td>`;
|
||||
const module = `<td style="color: var(--neutral-400)">${l.module}</td>`;
|
||||
const facility = l.facility !== 'sd' ? `<td>${l.facility}</td>` : '<td></td>';
|
||||
row.innerHTML = `<td>${dateToStr(l.created)}</td>${level}${facility}${module}<td>${htmlEscape(l.msg)}</td>`;
|
||||
logMonitorEl.appendChild(row);
|
||||
} catch (e) {
|
||||
error(`logMonitor: ${e}\n${line}`);
|
||||
|
|
|
|||
|
|
@ -88,6 +88,8 @@ def get_model_type(pipe):
|
|||
model_type = 'meissonic'
|
||||
elif 'Qwen' in name:
|
||||
model_type = 'qwen'
|
||||
elif 'ErnieImage' in name or 'ERNIE-Image' in name:
|
||||
model_type = 'ernieimage'
|
||||
elif 'NextStep' in name:
|
||||
model_type = 'nextstep'
|
||||
elif 'XOmni' in name or 'X-Omni' in name:
|
||||
|
|
|
|||
|
|
@ -142,6 +142,8 @@ def guess_by_name(fn, current_guess):
|
|||
new_guess = 'PRX'
|
||||
elif 'gemini-' in fn.lower() and 'image' in fn.lower():
|
||||
new_guess = 'NanoBanana'
|
||||
elif 'ernie-image' in fn.lower():
|
||||
new_guess = 'ERNIE-Image'
|
||||
elif 'z-image' in fn.lower() or 'z_image' in fn.lower():
|
||||
new_guess = 'Z-Image'
|
||||
elif 'longcat-image' in fn.lower():
|
||||
|
|
|
|||
|
|
@ -4,6 +4,10 @@ from modules import shared, errors, timer, sd_models
|
|||
from modules.logger import log
|
||||
|
||||
|
||||
debug_output = os.environ.get('SD_PROMPT_DEBUG', None)
|
||||
debug = log.trace if debug_output is not None else lambda *args, **kwargs: None
|
||||
|
||||
|
||||
def hijack_encode_prompt(*args, **kwargs):
|
||||
jobid = shared.state.begin('TE Encode')
|
||||
t0 = time.time()
|
||||
|
|
@ -18,8 +22,7 @@ def hijack_encode_prompt(*args, **kwargs):
|
|||
prompt = args_copy[0]
|
||||
patch_prompt = True
|
||||
res = prompt
|
||||
if prompt is not None:
|
||||
log.debug(f'Encode: prompt="{prompt}" hijack=True')
|
||||
debug(f'Encode: prompt="{prompt}" hijack=True')
|
||||
|
||||
if hasattr(shared.sd_model, 'before_prompt_encode'):
|
||||
log.debug(f'Encode: prompt="{prompt}" op=before')
|
||||
|
|
|
|||
|
|
@ -509,6 +509,10 @@ def load_diffuser_force(detected_model_type, checkpoint_info, diffusers_load_con
|
|||
from pipelines.model_prx import load_prx
|
||||
sd_model = load_prx(checkpoint_info, diffusers_load_config)
|
||||
allow_post_quant = False
|
||||
elif model_type in ['ERNIE-Image']:
|
||||
from pipelines.model_ernie import load_ernie_image
|
||||
sd_model = load_ernie_image(checkpoint_info, diffusers_load_config)
|
||||
allow_post_quant = False
|
||||
elif model_type in ['Z-Image']:
|
||||
from pipelines.model_z_image import load_z_image
|
||||
sd_model = load_z_image(checkpoint_info, diffusers_load_config)
|
||||
|
|
@ -1293,10 +1297,14 @@ def set_diffuser_pipe(pipe, new_pipe_type):
|
|||
|
||||
|
||||
def add_noise_pred_to_diffusers_callback(pipe):
|
||||
print('HERE1', hasattr(pipe, "_callback_tensor_inputs"))
|
||||
if not hasattr(pipe, "_callback_tensor_inputs"):
|
||||
return pipe
|
||||
if pipe.__class__.__name__.startswith("Anima"):
|
||||
return pipe
|
||||
if pipe.__class__.__name__.startswith("ErnieImage"):
|
||||
print('HERE2')
|
||||
return pipe
|
||||
if pipe.__class__.__name__.startswith("StableCascade") and ("predicted_image_embedding" not in pipe._callback_tensor_inputs): # pylint: disable=protected-access
|
||||
pipe.prior_pipe._callback_tensor_inputs.append("predicted_image_embedding") # pylint: disable=protected-access
|
||||
elif "noise_pred" not in pipe._callback_tensor_inputs: # pylint: disable=protected-access
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ pipelines = {
|
|||
'WanAI': getattr(diffusers, 'WanPipeline', None),
|
||||
'Qwen': getattr(diffusers, 'QwenImagePipeline', None),
|
||||
'HunyuanImage': getattr(diffusers, 'HunyuanImagePipeline', None),
|
||||
'ERNIE-Image': getattr(diffusers, 'ErnieImagePipeline', None),
|
||||
'Z-Image': getattr(diffusers, 'ZImagePipeline', None),
|
||||
'FLUX2': getattr(diffusers, 'Flux2Pipeline', None),
|
||||
'FLUX2 Klein': getattr(diffusers, 'Flux2KleinPipeline', None),
|
||||
|
|
|
|||
|
|
@ -0,0 +1,45 @@
|
|||
import diffusers
|
||||
import transformers
|
||||
from modules import shared, devices, sd_models, model_quant, sd_hijack_te, sd_hijack_vae
|
||||
from modules.logger import log
|
||||
from pipelines import generic
|
||||
|
||||
|
||||
def load_ernie_image(checkpoint_info, diffusers_load_config=None):
|
||||
if diffusers_load_config is None:
|
||||
diffusers_load_config = {}
|
||||
repo_id = sd_models.path_to_repo(checkpoint_info)
|
||||
sd_models.hf_auth_check(checkpoint_info)
|
||||
|
||||
load_args, _quant_args = model_quant.get_dit_args(diffusers_load_config, allow_quant=False)
|
||||
log.debug(f'Load model: type=ERNIE-Image repo="{repo_id}" offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype} args={load_args}')
|
||||
|
||||
transformer = generic.load_transformer(
|
||||
repo_id,
|
||||
cls_name=diffusers.ErnieImageTransformer2DModel,
|
||||
load_config=diffusers_load_config,
|
||||
)
|
||||
text_encoder = generic.load_text_encoder(
|
||||
repo_id,
|
||||
cls_name=transformers.Mistral3Model,
|
||||
load_config=diffusers_load_config,
|
||||
)
|
||||
|
||||
pipe = diffusers.ErnieImagePipeline.from_pretrained(
|
||||
repo_id,
|
||||
cache_dir=shared.opts.diffusers_dir,
|
||||
transformer=transformer,
|
||||
text_encoder=text_encoder,
|
||||
**load_args,
|
||||
)
|
||||
pipe.task_args = {
|
||||
'output_type': 'np',
|
||||
}
|
||||
|
||||
del transformer
|
||||
del text_encoder
|
||||
sd_hijack_te.init_hijack(pipe)
|
||||
sd_hijack_vae.init_hijack(pipe)
|
||||
|
||||
devices.torch_gc(force=True, reason='load')
|
||||
return pipe
|
||||
Loading…
Reference in New Issue