mirror of https://github.com/vladmandic/automatic
parent
27e6d17c9a
commit
3a65d561a7
|
|
@ -9,8 +9,10 @@ Merge commit: `f903a36d9`
|
|||
### Highlights for 2025-12-09
|
||||
|
||||
New native [kanvas](https://vladmandic.github.io/sdnext-docs/Kanvas/) module for image manipulation that fully replaces *img2img*, *inpaint* and *outpaint* controls, massive update to **Captioning/VQA** models and features
|
||||
New generation of **Flux.2** large image model, new **Z-Image** model that is creating a lot of buzz, new **Kandinsky 5 Lite** image model and a first cloud model with **Google's Nano Banana** *2.5 Flash and 3.0 Pro*, new **Photoroom PRX** model
|
||||
Also new are **HunyuanVideo 1.5** and **Kandinsky 5 Pro** video models, plus a lot of internal improvements and fixes
|
||||
New generation of **Flux.2** large image model, new **Z-Image** model that is creating a lot of buzz, new **Kandinsky 5 Lite** image model and new **Photoroom PRX** model
|
||||
And first cloud models with **Google Nano Banana** *2.5 Flash and 3.0 Pro* and **Google Veo** *3.1* video model
|
||||
Also new are **HunyuanVideo 1.5** and **Kandinsky 5 Pro** video models
|
||||
Plus a lot of internal improvements and fixes
|
||||
|
||||

|
||||
|
||||
|
|
@ -42,6 +44,8 @@ Also new are **HunyuanVideo 1.5** and **Kandinsky 5 Pro** video models, plus a l
|
|||
distilled variants provide faster generation with slightly reduced quality
|
||||
- [Kandinsky 5.0 Pro Video](https://huggingface.co/kandinskylab/Kandinsky-5.0-T2V-Pro-sft-5s-Diffusers) in T2V and I2V variants
|
||||
larger 19B (and more powerful version) of previously released Lite 2B models
|
||||
- [Google Veo 3.1](https://gemini.google/us/overview/video-generation/) for T2V and I2V workflows
|
||||
*note*: need to set `GOOGLE_API_KEY` environment variable with your key to use this model
|
||||
- **Kanvas**: new module for native canvas-based image manipulation
|
||||
kanvas is a full replacement for *img2img, inpaint and outpaint* controls
|
||||
see [docs](https://vladmandic.github.io/sdnext-docs/Kanvas/) for details
|
||||
|
|
|
|||
|
|
@ -782,7 +782,7 @@
|
|||
"Kandinsky 5.0 T2I Lite": {
|
||||
"path": "kandinskylab/Kandinsky-5.0-T2I-Lite-sft-Diffusers",
|
||||
"desc": "Kandinsky 5.0 Image Lite is a 6B image generation models 1K resulution, high visual quality and strong text-writing",
|
||||
"preview": "kandinskylab--Kandinsky-5.0-T2I-Lite-sft-Diffusers.jpg",
|
||||
"preview": "kandinsky-community--kandinsky-3.jpg",
|
||||
"skip": true,
|
||||
"size": 33.20,
|
||||
"date": "2025 November"
|
||||
|
|
@ -790,7 +790,7 @@
|
|||
"Kandinsky 5.0 I2I Lite": {
|
||||
"path": "kandinskylab/Kandinsky-5.0-I2I-Lite-sft-Diffusers",
|
||||
"desc": "Kandinsky 5.0 Image Lite is a 6B image editing models 1K resulution, high visual quality and strong text-writing",
|
||||
"preview": "kandinskylab--Kandinsky-5.0-I2I-Lite-sft-Diffusers.jpg",
|
||||
"preview": "kandinsky-community--kandinsky-3.jpg",
|
||||
"skip": true,
|
||||
"size": 33.20,
|
||||
"date": "2025 November"
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ def get_model_type(pipe):
|
|||
# video models
|
||||
elif "CogVideo" in name:
|
||||
model_type = 'cogvideo'
|
||||
elif 'HunyuanVideo15':
|
||||
elif 'HunyuanVideo15' in name:
|
||||
model_type = 'hunyuanvideo15'
|
||||
elif 'HunyuanVideoPipeline' in name or 'HunyuanSkyreels' in name:
|
||||
model_type = 'hunyuanvideo'
|
||||
|
|
@ -101,6 +101,8 @@ def get_model_type(pipe):
|
|||
elif 'HunyuanImage' in name:
|
||||
model_type = 'hunyuanimage'
|
||||
# cloud models
|
||||
elif 'GoogleVeo' in name:
|
||||
model_type = 'veo3'
|
||||
elif 'NanoBanana' in name:
|
||||
model_type = 'nanobanana'
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ processed = None # last known processed results
|
|||
|
||||
|
||||
class Processed:
|
||||
def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info=None, subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments=""):
|
||||
def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info=None, subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments="", binary=None):
|
||||
self.sd_model_hash = getattr(shared.sd_model, 'sd_model_hash', '') if model_data.sd_model is not None else ''
|
||||
|
||||
self.prompt = p.prompt or ''
|
||||
|
|
@ -40,6 +40,7 @@ class Processed:
|
|||
self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0]
|
||||
self.styles = p.styles
|
||||
|
||||
self.bytes = binary
|
||||
self.images = images_list
|
||||
self.width = p.width if hasattr(p, 'width') else (self.images[0].width if len(self.images) > 0 else 0)
|
||||
self.height = p.height if hasattr(p, 'height') else (self.images[0].height if len(self.images) > 0 else 0)
|
||||
|
|
@ -275,6 +276,8 @@ def process_init(p: StableDiffusionProcessing):
|
|||
def process_samples(p: StableDiffusionProcessing, samples):
|
||||
out_images = []
|
||||
out_infotexts = []
|
||||
if not isinstance(samples, list):
|
||||
return samples, []
|
||||
for i, sample in enumerate(samples):
|
||||
debug(f'Processing result: index={i+1}/{len(samples)}')
|
||||
p.batch_index = i
|
||||
|
|
@ -394,6 +397,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
|
|||
comments = {}
|
||||
infotexts = []
|
||||
output_images = []
|
||||
output_binary = None
|
||||
|
||||
process_init(p)
|
||||
if p.scripts is not None and isinstance(p.scripts, scripts_manager.ScriptRunner):
|
||||
|
|
@ -471,11 +475,14 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
|
|||
p.scripts.postprocess_batch_list(p, batch_params, batch_number=n)
|
||||
samples = batch_params.images
|
||||
|
||||
batch_images, batch_infotexts = process_samples(p, samples)
|
||||
for batch_image, batch_infotext in zip(batch_images, batch_infotexts):
|
||||
if batch_image is not None and batch_image not in output_images:
|
||||
output_images.append(batch_image)
|
||||
infotexts.append(batch_infotext)
|
||||
if hasattr(samples, 'bytes') and samples.bytes is not None:
|
||||
output_binary = samples.bytes
|
||||
else:
|
||||
batch_images, batch_infotexts = process_samples(p, samples)
|
||||
for batch_image, batch_infotext in zip(batch_images, batch_infotexts):
|
||||
if batch_image is not None and batch_image not in output_images:
|
||||
output_images.append(batch_image)
|
||||
infotexts.append(batch_infotext)
|
||||
|
||||
if shared.cmd_opts.lowvram:
|
||||
devices.torch_gc(force=True, reason='lowvram')
|
||||
|
|
@ -508,6 +515,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
|
|||
results = get_processed(
|
||||
p,
|
||||
images_list=output_images,
|
||||
binary=output_binary,
|
||||
seed=p.all_seeds[0],
|
||||
info=infotexts[0] if len(infotexts) > 0 else '',
|
||||
comments="\n".join(comments),
|
||||
|
|
|
|||
|
|
@ -435,6 +435,9 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
|
|||
def process_decode(p: processing.StableDiffusionProcessing, output):
|
||||
shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model, exclude=['vae'])
|
||||
if output is not None:
|
||||
if hasattr(output, 'bytes') and output.bytes is not None:
|
||||
shared.log.debug(f'Generated: bytes={len(output.bytes)}')
|
||||
return output
|
||||
if not hasattr(output, 'images') and hasattr(output, 'frames'):
|
||||
shared.log.debug(f'Generated: frames={len(output.frames[0])}')
|
||||
output.images = output.frames[0]
|
||||
|
|
@ -508,6 +511,8 @@ def validate_pipeline(p: processing.StableDiffusionProcessing):
|
|||
for m in video_models[family]:
|
||||
if m.repo_cls is not None:
|
||||
models_cls.append(m.repo_cls.__name__)
|
||||
if m.custom is not None:
|
||||
models_cls.append(m.custom)
|
||||
is_video_model = shared.sd_model.__class__.__name__ in models_cls
|
||||
override_video_pipelines = ['WanPipeline', 'WanImageToVideoPipeline', 'WanVACEPipeline']
|
||||
is_video_pipeline = ('video' in p.__class__.__name__.lower()) or (shared.sd_model.__class__.__name__ in override_video_pipelines)
|
||||
|
|
@ -569,7 +574,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
|
|||
images, _index=shared.history.selected
|
||||
output = SimpleNamespace(images=images)
|
||||
|
||||
if (output is None or len(output.images) == 0) and has_images:
|
||||
if (output is None or (hasattr(output, 'images') and len(output.images) == 0)) and has_images:
|
||||
if output is not None:
|
||||
shared.log.debug('Processing: using input as base output')
|
||||
output.images = p.init_images
|
||||
|
|
|
|||
|
|
@ -0,0 +1,145 @@
|
|||
import io
|
||||
import os
|
||||
import time
|
||||
|
||||
import sys
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
|
||||
|
||||
from PIL import Image
|
||||
from installer import install, reload, log
|
||||
|
||||
|
||||
image_size_buckets = {
|
||||
'720p': 1280*720,
|
||||
'1080p': 1920*1080,
|
||||
}
|
||||
aspect_ratios_buckets = {
|
||||
'1:1': 1/1,
|
||||
'2:3': 2/3,
|
||||
'3:2': 3/2,
|
||||
'4:3': 4/3,
|
||||
'3:4': 3/4,
|
||||
'4:5': 4/5,
|
||||
'5:4': 5/4,
|
||||
'16:9': 16/9,
|
||||
'9:16': 9/16,
|
||||
'21:9': 21/9,
|
||||
'9:21': 9/21,
|
||||
}
|
||||
|
||||
|
||||
def google_requirements():
|
||||
install('google-genai==1.52.0')
|
||||
install('pydantic==2.11.7', ignore=True, quiet=True)
|
||||
reload('pydantic', '2.11.7')
|
||||
|
||||
|
||||
def get_size_buckets(width: int, height: int) -> str:
|
||||
aspect_ratio = width / height
|
||||
closest_aspect_ratio = min(aspect_ratios_buckets.items(), key=lambda x: abs(x[1] - aspect_ratio))[0]
|
||||
pixel_count = width * height
|
||||
closest_size = min(image_size_buckets.items(), key=lambda x: abs(x[1] - pixel_count))[0]
|
||||
closest_aspect_ratio = min(aspect_ratios_buckets.items(), key=lambda x: abs(x[1] - aspect_ratio))[0]
|
||||
return closest_size, closest_aspect_ratio
|
||||
|
||||
|
||||
class GoogleVeoVideoPipeline():
|
||||
def __init__(self, model_name: str):
|
||||
self.model = model_name
|
||||
self.client = None
|
||||
self.config = None
|
||||
google_requirements()
|
||||
log.debug(f'Load model: type=GoogleVeo model="{model_name}"')
|
||||
|
||||
def txt2vid(self, prompt):
|
||||
return self.client.models.generate_videos(
|
||||
model=self.model,
|
||||
prompt=prompt,
|
||||
config=self.config,
|
||||
)
|
||||
|
||||
def img2vid(self, prompt, image):
|
||||
from google import genai
|
||||
image_bytes = io.BytesIO()
|
||||
image.save(image_bytes, format='JPEG')
|
||||
return self.client.models.generate_videos(
|
||||
model=self.model,
|
||||
prompt=prompt,
|
||||
config=self.config,
|
||||
image=genai.types.Image(image_bytes=image_bytes.getvalue(), mime_type='image/jpeg'),
|
||||
)
|
||||
|
||||
def __call__(self, prompt: list[str], width: int, height: int, image: Image.Image = None, num_frames: int = 4*24):
|
||||
from google import genai
|
||||
|
||||
if isinstance(prompt, list) and len(prompt) > 0:
|
||||
prompt = prompt[0]
|
||||
if self.client is None:
|
||||
api_key = os.getenv("GOOGLE_API_KEY", None)
|
||||
if api_key is None:
|
||||
log.error(f'Cloud: model="{self.model}" GOOGLE_API_KEY environment variable not set')
|
||||
return None
|
||||
self.client = genai.Client(api_key=api_key, vertexai=False)
|
||||
|
||||
resolution, aspect_ratio = get_size_buckets(width, height)
|
||||
duration = num_frames // 24
|
||||
if duration < 4:
|
||||
duration = 4
|
||||
if duration > 8:
|
||||
duration = 8
|
||||
self.config=genai.types.GenerateVideosConfig(
|
||||
# seed=42,
|
||||
# fps=24,
|
||||
duration_seconds=duration,
|
||||
aspect_ratio=aspect_ratio,
|
||||
resolution=resolution,
|
||||
# person_generation='ALLOW_ALL',
|
||||
# safety_filter_level='BLOCK_NONE',
|
||||
# negative_prompt=None,
|
||||
# enhance_prompt=True,
|
||||
# generate_audio=True,
|
||||
)
|
||||
log.debug(f'Cloud: prompt="{prompt}" size={resolution} ar={aspect_ratio} image={image} model="{self.model}" frames={num_frames} duration={duration}')
|
||||
|
||||
operation = None
|
||||
try:
|
||||
if image is not None:
|
||||
operation = self.img2vid(prompt, image)
|
||||
else:
|
||||
operation = self.txt2vid(prompt)
|
||||
while not operation.done:
|
||||
log.debug(f"Cloud processing: {operation}")
|
||||
time.sleep(10)
|
||||
operation = self.client.operations.get(operation)
|
||||
except Exception as e:
|
||||
log.error(f'Cloud video: model="{self.model}" {operation} {e}')
|
||||
return None
|
||||
|
||||
if operation is None or operation.response is None or operation.response.generated_videos is None or len(operation.response.generated_videos) == 0:
|
||||
log.error(f'Cloud video: model="{self.model}" no response {operation}')
|
||||
return None
|
||||
try:
|
||||
response: genai.types.GeneratedVideo = operation.response.generated_videos[0]
|
||||
self.client.files.download(file=response.video)
|
||||
video_bytes = response.video.video_bytes
|
||||
return { 'bytes': video_bytes, 'images': [] }
|
||||
except Exception as e:
|
||||
log.error(f'Cloud download: model="{self.model}" {e}')
|
||||
return None
|
||||
|
||||
|
||||
def load_veo(model_name): # pylint: disable=unused-argument
|
||||
pipe = GoogleVeoVideoPipeline(model_name = model_name)
|
||||
return pipe
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from installer import setup_logging
|
||||
setup_logging()
|
||||
log.info('test')
|
||||
model = GoogleVeoVideoPipeline('veo-3.1-generate-preview')
|
||||
img = Image.open('C:\\Users\\mandi\\OneDrive\\Generative\\Samples\\cartoon.png')
|
||||
vid = model(['A beautiful young woman walking through the fantasy city'], 1280, 720, image=img)
|
||||
if vid is not None:
|
||||
with open("veo.mp4", "wb") as f:
|
||||
f.write(vid['video'])
|
||||
|
|
@ -10,6 +10,7 @@ class Model():
|
|||
name: str
|
||||
url: str = ''
|
||||
repo: str = None
|
||||
custom: str = None
|
||||
repo_cls: classmethod = None
|
||||
repo_revision: str = None
|
||||
dit: str = None
|
||||
|
|
@ -480,6 +481,22 @@ try:
|
|||
te_cls=getattr(transformers, 'Qwen2_5_VLForConditionalGeneration', None),
|
||||
dit_cls=getattr(diffusers, 'Kandinsky5Transformer3DModel', None)),
|
||||
],
|
||||
'Google Veo': [
|
||||
Model(name='Google Veo 3.1 T2V',
|
||||
url='https://gemini.google/overview/video-generation/',
|
||||
repo='veo-3.1-generate-preview',
|
||||
custom='GoogleVeoVideoPipeline',
|
||||
repo_cls=None,
|
||||
te_cls=None,
|
||||
dit_cls=None),
|
||||
Model(name='Google Veo 3.1 I2V',
|
||||
url='https://gemini.google/overview/video-generation/',
|
||||
repo='veo-3.1-generate-preview',
|
||||
custom='GoogleVeoVideoPipeline',
|
||||
repo_cls=None,
|
||||
te_cls=None,
|
||||
dit_cls=None),
|
||||
],
|
||||
}
|
||||
t1 = time.time()
|
||||
errors = 0
|
||||
|
|
@ -488,11 +505,14 @@ try:
|
|||
for m in model:
|
||||
if m.name == 'None':
|
||||
continue
|
||||
"""
|
||||
if (m.repo_cls is None) or (m.dit_cls is None) or (m.te_cls is None):
|
||||
log.error(f'Video: pipeline="{m.name}" not available')
|
||||
errors += 1
|
||||
else:
|
||||
total += 1
|
||||
"""
|
||||
total += 1
|
||||
log.info(f'Networks: type="video" engines={len(models)} models={total} errors={errors} time={t1 - t0:.2f}')
|
||||
except Exception as e:
|
||||
models = {}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from modules import shared
|
|||
|
||||
|
||||
def apply_teacache_patch(cls):
|
||||
if shared.opts.teacache_enabled:
|
||||
if shared.opts.teacache_enabled and cls is not None:
|
||||
from modules import teacache
|
||||
shared.log.debug(f'Transformers cache: type=teacache patch=forward cls={cls.__name__}')
|
||||
if cls.__name__ == 'LTXVideoTransformer3DModel':
|
||||
|
|
|
|||
|
|
@ -20,15 +20,26 @@ def _loader(component):
|
|||
loaded_model = None
|
||||
|
||||
|
||||
def load_custom(model_name: str):
|
||||
shared.log.debug(f'Video load: module=pipe repo="{model_name}" cls=Custom')
|
||||
if 'veo-3.1' in model_name:
|
||||
from modules.video_models.google_veo import load_veo
|
||||
pipe = load_veo(model_name)
|
||||
return pipe
|
||||
return None
|
||||
|
||||
|
||||
def load_model(selected: models_def.Model):
|
||||
if selected is None or selected.te_cls is None or selected.dit_cls is None:
|
||||
if selected is None or selected.repo is None:
|
||||
return ''
|
||||
global loaded_model # pylint: disable=global-statement
|
||||
if not shared.sd_loaded:
|
||||
loaded_model = None
|
||||
if loaded_model == selected.name:
|
||||
return ''
|
||||
sd_models.unload_model_weights()
|
||||
if shared.sd_loaded:
|
||||
sd_models.unload_model_weights()
|
||||
|
||||
t0 = time.time()
|
||||
jobid = shared.state.begin('Load model')
|
||||
|
||||
|
|
@ -46,89 +57,99 @@ def load_model(selected: models_def.Model):
|
|||
kwargs = video_overrides.load_override(selected, **offline_args)
|
||||
|
||||
# text encoder
|
||||
try:
|
||||
load_args, quant_args = model_quant.get_dit_args({}, module='TE', device_map=True)
|
||||
if selected.te_cls is not None:
|
||||
try:
|
||||
load_args, quant_args = model_quant.get_dit_args({}, module='TE', device_map=True)
|
||||
|
||||
# loader deduplication of text-encoder models
|
||||
if selected.te_cls.__name__ == 'T5EncoderModel' and shared.opts.te_shared_t5:
|
||||
selected.te = 'Disty0/t5-xxl'
|
||||
selected.te_folder = ''
|
||||
selected.te_revision = None
|
||||
if selected.te_cls.__name__ == 'UMT5EncoderModel' and shared.opts.te_shared_t5:
|
||||
if 'SDNQ' in selected.name:
|
||||
selected.te = 'Disty0/Wan2.2-T2V-A14B-SDNQ-uint4-svd-r32'
|
||||
else:
|
||||
selected.te = 'Wan-AI/Wan2.2-TI2V-5B-Diffusers'
|
||||
selected.te_folder = 'text_encoder'
|
||||
selected.te_revision = None
|
||||
if selected.te_cls.__name__ == 'LlamaModel' and shared.opts.te_shared_t5:
|
||||
selected.te = 'hunyuanvideo-community/HunyuanVideo'
|
||||
selected.te_folder = 'text_encoder'
|
||||
selected.te_revision = None
|
||||
if selected.te_cls.__name__ == 'Qwen2_5_VLForConditionalGeneration' and shared.opts.te_shared_t5:
|
||||
selected.te = 'ai-forever/Kandinsky-5.0-T2V-Lite-sft-5s-Diffusers'
|
||||
selected.te_folder = 'text_encoder'
|
||||
selected.te_revision = None
|
||||
# loader deduplication of text-encoder models
|
||||
if selected.te_cls.__name__ == 'T5EncoderModel' and shared.opts.te_shared_t5:
|
||||
selected.te = 'Disty0/t5-xxl'
|
||||
selected.te_folder = ''
|
||||
selected.te_revision = None
|
||||
if selected.te_cls.__name__ == 'UMT5EncoderModel' and shared.opts.te_shared_t5:
|
||||
if 'SDNQ' in selected.name:
|
||||
selected.te = 'Disty0/Wan2.2-T2V-A14B-SDNQ-uint4-svd-r32'
|
||||
else:
|
||||
selected.te = 'Wan-AI/Wan2.2-TI2V-5B-Diffusers'
|
||||
selected.te_folder = 'text_encoder'
|
||||
selected.te_revision = None
|
||||
if selected.te_cls.__name__ == 'LlamaModel' and shared.opts.te_shared_t5:
|
||||
selected.te = 'hunyuanvideo-community/HunyuanVideo'
|
||||
selected.te_folder = 'text_encoder'
|
||||
selected.te_revision = None
|
||||
if selected.te_cls.__name__ == 'Qwen2_5_VLForConditionalGeneration' and shared.opts.te_shared_t5:
|
||||
selected.te = 'ai-forever/Kandinsky-5.0-T2V-Lite-sft-5s-Diffusers'
|
||||
selected.te_folder = 'text_encoder'
|
||||
selected.te_revision = None
|
||||
|
||||
shared.log.debug(f'Video load: module=te repo="{selected.te or selected.repo}" folder="{selected.te_folder}" cls={selected.te_cls.__name__} quant={model_quant.get_quant_type(quant_args)} loader={_loader("transformers")}')
|
||||
kwargs["text_encoder"] = selected.te_cls.from_pretrained(
|
||||
pretrained_model_name_or_path=selected.te or selected.repo,
|
||||
subfolder=selected.te_folder,
|
||||
revision=selected.te_revision or selected.repo_revision,
|
||||
cache_dir=shared.opts.hfcache_dir,
|
||||
**load_args,
|
||||
**quant_args,
|
||||
**offline_args,
|
||||
)
|
||||
except Exception as e:
|
||||
shared.log.error(f'video load: module=te cls={selected.te_cls.__name__} {e}')
|
||||
errors.display(e, 'video')
|
||||
shared.log.debug(f'Video load: module=te repo="{selected.te or selected.repo}" folder="{selected.te_folder}" cls={selected.te_cls.__name__} quant={model_quant.get_quant_type(quant_args)} loader={_loader("transformers")}')
|
||||
kwargs["text_encoder"] = selected.te_cls.from_pretrained(
|
||||
pretrained_model_name_or_path=selected.te or selected.repo,
|
||||
subfolder=selected.te_folder,
|
||||
revision=selected.te_revision or selected.repo_revision,
|
||||
cache_dir=shared.opts.hfcache_dir,
|
||||
**load_args,
|
||||
**quant_args,
|
||||
**offline_args,
|
||||
)
|
||||
except Exception as e:
|
||||
shared.log.error(f'video load: module=te cls={selected.te_cls.__name__} {e}')
|
||||
errors.display(e, 'video')
|
||||
|
||||
# transformer
|
||||
try:
|
||||
def load_dit_folder(dit_folder):
|
||||
if dit_folder is not None and dit_folder not in kwargs:
|
||||
# get a new quant arg on every loop to prevent the quant config classes getting entangled
|
||||
load_args, quant_args = model_quant.get_dit_args({}, module='Model', device_map=True)
|
||||
shared.log.debug(f'Video load: module=transformer repo="{selected.dit or selected.repo}" module="{dit_folder}" folder="{dit_folder}" cls={selected.dit_cls.__name__} quant={model_quant.get_quant_type(quant_args)} loader={_loader("diffusers")}')
|
||||
kwargs[dit_folder] = selected.dit_cls.from_pretrained(
|
||||
pretrained_model_name_or_path=selected.dit or selected.repo,
|
||||
subfolder=dit_folder,
|
||||
revision=selected.dit_revision or selected.repo_revision,
|
||||
cache_dir=shared.opts.hfcache_dir,
|
||||
**load_args,
|
||||
**quant_args,
|
||||
**offline_args,
|
||||
)
|
||||
else:
|
||||
shared.log.debug(f'Video load: module=transformer repo="{selected.dit or selected.repo}" module="{dit_folder}" folder="{dit_folder}" cls={selected.dit_cls.__name__} loader={_loader("diffusers")} skip')
|
||||
if selected.dit_cls is not None:
|
||||
try:
|
||||
def load_dit_folder(dit_folder):
|
||||
if dit_folder is not None and dit_folder not in kwargs:
|
||||
# get a new quant arg on every loop to prevent the quant config classes getting entangled
|
||||
load_args, quant_args = model_quant.get_dit_args({}, module='Model', device_map=True)
|
||||
shared.log.debug(f'Video load: module=transformer repo="{selected.dit or selected.repo}" module="{dit_folder}" folder="{dit_folder}" cls={selected.dit_cls.__name__} quant={model_quant.get_quant_type(quant_args)} loader={_loader("diffusers")}')
|
||||
kwargs[dit_folder] = selected.dit_cls.from_pretrained(
|
||||
pretrained_model_name_or_path=selected.dit or selected.repo,
|
||||
subfolder=dit_folder,
|
||||
revision=selected.dit_revision or selected.repo_revision,
|
||||
cache_dir=shared.opts.hfcache_dir,
|
||||
**load_args,
|
||||
**quant_args,
|
||||
**offline_args,
|
||||
)
|
||||
else:
|
||||
shared.log.debug(f'Video load: module=transformer repo="{selected.dit or selected.repo}" module="{dit_folder}" folder="{dit_folder}" cls={selected.dit_cls.__name__} loader={_loader("diffusers")} skip')
|
||||
|
||||
if selected.dit_folder is None:
|
||||
selected.dit_folder = ['transformer']
|
||||
if isinstance(selected.dit_folder, list) or isinstance(selected.dit_folder, tuple):
|
||||
for dit_folder in selected.dit_folder: # wan a14b has transformer and transformer_2
|
||||
load_dit_folder(dit_folder)
|
||||
else:
|
||||
load_dit_folder(selected.dit_folder)
|
||||
except Exception as e:
|
||||
shared.log.error(f'video load: module=transformer cls={selected.dit_cls.__name__} {e}')
|
||||
errors.display(e, 'video')
|
||||
if selected.dit_folder is None:
|
||||
selected.dit_folder = ['transformer']
|
||||
if isinstance(selected.dit_folder, list) or isinstance(selected.dit_folder, tuple):
|
||||
for dit_folder in selected.dit_folder: # wan a14b has transformer and transformer_2
|
||||
load_dit_folder(dit_folder)
|
||||
else:
|
||||
load_dit_folder(selected.dit_folder)
|
||||
except Exception as e:
|
||||
shared.log.error(f'video load: module=transformer cls={selected.dit_cls.__name__} {e}')
|
||||
errors.display(e, 'video')
|
||||
|
||||
# model
|
||||
try:
|
||||
shared.log.debug(f'Video load: module=pipe repo="{selected.repo}" cls={selected.repo_cls.__name__}')
|
||||
shared.sd_model = selected.repo_cls.from_pretrained(
|
||||
pretrained_model_name_or_path=selected.repo,
|
||||
revision=selected.repo_revision,
|
||||
cache_dir=shared.opts.hfcache_dir,
|
||||
torch_dtype=devices.dtype,
|
||||
**kwargs,
|
||||
**offline_args,
|
||||
)
|
||||
if selected.repo_cls is None:
|
||||
shared.sd_model = load_custom(selected.repo)
|
||||
else:
|
||||
shared.log.debug(f'Video load: module=pipe repo="{selected.repo}" cls={selected.repo_cls.__name__}')
|
||||
shared.sd_model = selected.repo_cls.from_pretrained(
|
||||
pretrained_model_name_or_path=selected.repo,
|
||||
revision=selected.repo_revision,
|
||||
cache_dir=shared.opts.hfcache_dir,
|
||||
torch_dtype=devices.dtype,
|
||||
**kwargs,
|
||||
**offline_args,
|
||||
)
|
||||
except Exception as e:
|
||||
shared.log.error(f'video load: module=pipe repo="{selected.repo}" cls={selected.repo_cls.__name__} {e}')
|
||||
errors.display(e, 'video')
|
||||
|
||||
if shared.sd_model is None:
|
||||
msg = f'Video load: model="{selected.name}" failed'
|
||||
shared.log.error(msg)
|
||||
return msg
|
||||
|
||||
t1 = time.time()
|
||||
if shared.sd_model.__class__.__name__.startswith("LTX"):
|
||||
shared.sd_model.scheduler.config.use_dynamic_shifting = False
|
||||
|
|
@ -138,7 +159,7 @@ def load_model(selected: models_def.Model):
|
|||
sd_models.set_diffuser_options(shared.sd_model, offload=False)
|
||||
|
||||
decode, text, image, slicing, tiling, framewise = False, False, False, False, False, False
|
||||
if selected.vae_hijack and hasattr(shared.sd_model.vae, 'decode'):
|
||||
if selected.vae_hijack and hasattr(shared.sd_model, 'vae') and hasattr(shared.sd_model.vae, 'decode'):
|
||||
sd_hijack_vae.init_hijack(shared.sd_model)
|
||||
decode = True
|
||||
if selected.te_hijack and hasattr(shared.sd_model, 'encode_prompt'):
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ def generate(*args, **kwargs):
|
|||
orig_sampler_shift = shared.opts.schedulers_shift
|
||||
shared.opts.data['schedulers_dynamic_shift'] = dynamic_shift
|
||||
shared.opts.data['schedulers_shift'] = sampler_shift
|
||||
if hasattr(shared.sd_model.scheduler, 'config') and hasattr(shared.sd_model.scheduler, 'register_to_config'):
|
||||
if hasattr(shared.sd_model, 'scheduler') and hasattr(shared.sd_model.scheduler, 'config') and hasattr(shared.sd_model.scheduler, 'register_to_config'):
|
||||
if hasattr(shared.sd_model.scheduler.config, 'use_dynamic_shifting'):
|
||||
shared.sd_model.scheduler.config.use_dynamic_shifting = dynamic_shift
|
||||
shared.sd_model.scheduler.register_to_config(use_dynamic_shifting = dynamic_shift)
|
||||
|
|
@ -146,15 +146,18 @@ def generate(*args, **kwargs):
|
|||
# done
|
||||
if err:
|
||||
return video_utils.queue_err(err)
|
||||
if processed is None or len(processed.images) == 0:
|
||||
if processed is None or (len(processed.images) == 0 and processed.bytes is None):
|
||||
return video_utils.queue_err('processing failed')
|
||||
shared.log.info(f'Video: name="{selected.name}" cls={shared.sd_model.__class__.__name__} frames={len(processed.images)} time={t1-t0:.2f}')
|
||||
|
||||
# video_file = images.save_video(p, filename=None, images=processed.images, video_type=video_type, duration=video_duration, loop=video_loop, pad=video_pad, interpolate=video_interpolate) # legacy video save from list of images
|
||||
pixels = video_save.images_to_tensor(processed.images)
|
||||
if hasattr(processed, 'images') and processed.images is not None:
|
||||
pixels = video_save.images_to_tensor(processed.images)
|
||||
else:
|
||||
pixels = None
|
||||
_num_frames, video_file = video_save.save_video(
|
||||
p=p,
|
||||
pixels=pixels,
|
||||
binary=processed.bytes,
|
||||
mp4_fps=mp4_fps,
|
||||
mp4_codec=mp4_codec,
|
||||
mp4_opt=mp4_opt,
|
||||
|
|
|
|||
|
|
@ -107,7 +107,8 @@ def atomic_save_video(filename, tensor:torch.Tensor, fps:float=24, codec:str='li
|
|||
|
||||
def save_video(
|
||||
p:processing.StableDiffusionProcessingVideo,
|
||||
pixels:torch.Tensor,
|
||||
pixels:torch.Tensor=None,
|
||||
binary:bytes=None,
|
||||
mp4_fps:int=24,
|
||||
mp4_codec:str='libx264',
|
||||
mp4_opt:str='',
|
||||
|
|
@ -121,6 +122,23 @@ def save_video(
|
|||
pbar=None, # progress bar for video
|
||||
):
|
||||
output_video = None
|
||||
|
||||
if binary is not None:
|
||||
output_filename = get_video_filename(p)
|
||||
output_video = f'{output_filename}.{mp4_ext}'
|
||||
try:
|
||||
try:
|
||||
with open(output_video, 'wb') as f:
|
||||
f.write(binary)
|
||||
shared.log.info(f'Video output: file="{output_video}" size={len(binary)}')
|
||||
shared.state.outputs(output_video)
|
||||
except Exception as e:
|
||||
shared.log.error(f'Video output: file="{output_video}" {e}')
|
||||
except Exception as e:
|
||||
shared.log.error(f'Video output: file="{output_video}" write error {e}')
|
||||
errors.display(e, 'video')
|
||||
return 0, output_video
|
||||
|
||||
if pixels is None:
|
||||
return 0, output_video
|
||||
if not torch.is_tensor(pixels):
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ vae_type = None
|
|||
def set_vae_params(p):
|
||||
global vae_type # pylint: disable=global-statement
|
||||
vae_type = p.vae_type
|
||||
if not hasattr(shared.sd_model, 'vae'):
|
||||
return
|
||||
if hasattr(shared.sd_model.vae, 'enable_slicing'):
|
||||
shared.sd_model.vae.enable_slicing()
|
||||
if p.frames > p.vae_tile_frames:
|
||||
|
|
|
|||
|
|
@ -24,6 +24,12 @@ aspect_ratios_buckets = {
|
|||
}
|
||||
|
||||
|
||||
def google_requirements():
|
||||
install('google-genai==1.52.0')
|
||||
install('pydantic==2.11.7', ignore=True, quiet=True)
|
||||
reload('pydantic', '2.11.7')
|
||||
|
||||
|
||||
def get_size_buckets(width: int, height: int) -> str:
|
||||
aspect_ratio = width / height
|
||||
closest_aspect_ratio = min(aspect_ratios_buckets.items(), key=lambda x: abs(x[1] - aspect_ratio))[0]
|
||||
|
|
@ -38,9 +44,7 @@ class GoogleNanoBananaPipeline():
|
|||
self.model = model_name
|
||||
self.client = None
|
||||
self.config = None
|
||||
install('google-genai==1.52.0')
|
||||
install('pydantic==2.11.7', ignore=True, quiet=True)
|
||||
reload('pydantic', '2.11.7')
|
||||
google_requirements()
|
||||
log.debug(f'Load model: type=NanoBanana model="{model_name}"')
|
||||
|
||||
def txt2img(self, prompt):
|
||||
|
|
@ -81,7 +85,7 @@ class GoogleNanoBananaPipeline():
|
|||
response_modalities=["IMAGE"],
|
||||
image_config=image_config
|
||||
)
|
||||
log.debug(f'Cloud: prompt={prompt} size={image_size} ar={aspect_ratio} image={image} model="{self.model}"')
|
||||
log.debug(f'Cloud: prompt="{prompt}" size={image_size} ar={aspect_ratio} image={image} model="{self.model}"')
|
||||
# log.debug(f'Cloud: config={self.config}')
|
||||
|
||||
try:
|
||||
|
|
@ -114,6 +118,6 @@ if __name__ == "__main__":
|
|||
import sys
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
log.info('test')
|
||||
model =GoogleNanoBananaPipeline('gemini-3-pro-image-preview')
|
||||
model = GoogleNanoBananaPipeline('gemini-3-pro-image-preview')
|
||||
img = model(['A beautiful landscape with mountains and a river'], 1024, 1024)
|
||||
img.save('test.png')
|
||||
|
|
|
|||
2
wiki
2
wiki
|
|
@ -1 +1 @@
|
|||
Subproject commit f7289d6c03899f519de8692efe8ea2731779607c
|
||||
Subproject commit 2fb950abdeaad2d2a7976857f04646fc8c6963e1
|
||||
Loading…
Reference in New Issue