add google-veo-3.1

Signed-off-by: vladmandic <mandic00@live.com>
pull/4456/head
vladmandic 2025-12-09 19:14:08 +01:00
parent 27e6d17c9a
commit 3a65d561a7
14 changed files with 330 additions and 98 deletions

View File

@ -9,8 +9,10 @@ Merge commit: `f903a36d9`
### Highlights for 2025-12-09
New native [kanvas](https://vladmandic.github.io/sdnext-docs/Kanvas/) module for image manipulation that fully replaces *img2img*, *inpaint* and *outpaint* controls, massive update to **Captioning/VQA** models and features
New generation of **Flux.2** large image model, new **Z-Image** model that is creating a lot of buzz, new **Kandinsky 5 Lite** image model and a first cloud model with **Google's Nano Banana** *2.5 Flash and 3.0 Pro*, new **Photoroom PRX** model
Also new are **HunyuanVideo 1.5** and **Kandinsky 5 Pro** video models, plus a lot of internal improvements and fixes
New generation of **Flux.2** large image model, new **Z-Image** model that is creating a lot of buzz, new **Kandinsky 5 Lite** image model and new **Photoroom PRX** model
And first cloud models with **Google Nano Banana** *2.5 Flash and 3.0 Pro* and **Google Veo** *3.1* video model
Also new are **HunyuanVideo 1.5** and **Kandinsky 5 Pro** video models
Plus a lot of internal improvements and fixes
![Screenshot](https://github.com/user-attachments/assets/54b25586-b611-4d70-a28f-ee3360944034)
@ -42,6 +44,8 @@ Also new are **HunyuanVideo 1.5** and **Kandinsky 5 Pro** video models, plus a l
distilled variants provide faster generation with slightly reduced quality
- [Kandinsky 5.0 Pro Video](https://huggingface.co/kandinskylab/Kandinsky-5.0-T2V-Pro-sft-5s-Diffusers) in T2V and I2V variants
larger 19B (and more powerful version) of previously released Lite 2B models
- [Google Veo 3.1](https://gemini.google/us/overview/video-generation/) for T2V and I2V workflows
*note*: need to set `GOOGLE_API_KEY` environment variable with your key to use this model
- **Kanvas**: new module for native canvas-based image manipulation
kanvas is a full replacement for *img2img, inpaint and outpaint* controls
see [docs](https://vladmandic.github.io/sdnext-docs/Kanvas/) for details

View File

@ -782,7 +782,7 @@
"Kandinsky 5.0 T2I Lite": {
"path": "kandinskylab/Kandinsky-5.0-T2I-Lite-sft-Diffusers",
"desc": "Kandinsky 5.0 Image Lite is a 6B image generation models 1K resulution, high visual quality and strong text-writing",
"preview": "kandinskylab--Kandinsky-5.0-T2I-Lite-sft-Diffusers.jpg",
"preview": "kandinsky-community--kandinsky-3.jpg",
"skip": true,
"size": 33.20,
"date": "2025 November"
@ -790,7 +790,7 @@
"Kandinsky 5.0 I2I Lite": {
"path": "kandinskylab/Kandinsky-5.0-I2I-Lite-sft-Diffusers",
"desc": "Kandinsky 5.0 Image Lite is a 6B image editing models 1K resulution, high visual quality and strong text-writing",
"preview": "kandinskylab--Kandinsky-5.0-I2I-Lite-sft-Diffusers.jpg",
"preview": "kandinsky-community--kandinsky-3.jpg",
"skip": true,
"size": 33.20,
"date": "2025 November"

View File

@ -79,7 +79,7 @@ def get_model_type(pipe):
# video models
elif "CogVideo" in name:
model_type = 'cogvideo'
elif 'HunyuanVideo15':
elif 'HunyuanVideo15' in name:
model_type = 'hunyuanvideo15'
elif 'HunyuanVideoPipeline' in name or 'HunyuanSkyreels' in name:
model_type = 'hunyuanvideo'
@ -101,6 +101,8 @@ def get_model_type(pipe):
elif 'HunyuanImage' in name:
model_type = 'hunyuanimage'
# cloud models
elif 'GoogleVeo' in name:
model_type = 'veo3'
elif 'NanoBanana' in name:
model_type = 'nanobanana'
else:

View File

@ -31,7 +31,7 @@ processed = None # last known processed results
class Processed:
def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info=None, subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments=""):
def __init__(self, p: StableDiffusionProcessing, images_list, seed=-1, info=None, subseed=None, all_prompts=None, all_negative_prompts=None, all_seeds=None, all_subseeds=None, index_of_first_image=0, infotexts=None, comments="", binary=None):
self.sd_model_hash = getattr(shared.sd_model, 'sd_model_hash', '') if model_data.sd_model is not None else ''
self.prompt = p.prompt or ''
@ -40,6 +40,7 @@ class Processed:
self.negative_prompt = self.negative_prompt if type(self.negative_prompt) != list else self.negative_prompt[0]
self.styles = p.styles
self.bytes = binary
self.images = images_list
self.width = p.width if hasattr(p, 'width') else (self.images[0].width if len(self.images) > 0 else 0)
self.height = p.height if hasattr(p, 'height') else (self.images[0].height if len(self.images) > 0 else 0)
@ -275,6 +276,8 @@ def process_init(p: StableDiffusionProcessing):
def process_samples(p: StableDiffusionProcessing, samples):
out_images = []
out_infotexts = []
if not isinstance(samples, list):
return samples, []
for i, sample in enumerate(samples):
debug(f'Processing result: index={i+1}/{len(samples)}')
p.batch_index = i
@ -394,6 +397,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
comments = {}
infotexts = []
output_images = []
output_binary = None
process_init(p)
if p.scripts is not None and isinstance(p.scripts, scripts_manager.ScriptRunner):
@ -471,11 +475,14 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
p.scripts.postprocess_batch_list(p, batch_params, batch_number=n)
samples = batch_params.images
batch_images, batch_infotexts = process_samples(p, samples)
for batch_image, batch_infotext in zip(batch_images, batch_infotexts):
if batch_image is not None and batch_image not in output_images:
output_images.append(batch_image)
infotexts.append(batch_infotext)
if hasattr(samples, 'bytes') and samples.bytes is not None:
output_binary = samples.bytes
else:
batch_images, batch_infotexts = process_samples(p, samples)
for batch_image, batch_infotext in zip(batch_images, batch_infotexts):
if batch_image is not None and batch_image not in output_images:
output_images.append(batch_image)
infotexts.append(batch_infotext)
if shared.cmd_opts.lowvram:
devices.torch_gc(force=True, reason='lowvram')
@ -508,6 +515,7 @@ def process_images_inner(p: StableDiffusionProcessing) -> Processed:
results = get_processed(
p,
images_list=output_images,
binary=output_binary,
seed=p.all_seeds[0],
info=infotexts[0] if len(infotexts) > 0 else '',
comments="\n".join(comments),

View File

@ -435,6 +435,9 @@ def process_refine(p: processing.StableDiffusionProcessing, output):
def process_decode(p: processing.StableDiffusionProcessing, output):
shared.sd_model = sd_models.apply_balanced_offload(shared.sd_model, exclude=['vae'])
if output is not None:
if hasattr(output, 'bytes') and output.bytes is not None:
shared.log.debug(f'Generated: bytes={len(output.bytes)}')
return output
if not hasattr(output, 'images') and hasattr(output, 'frames'):
shared.log.debug(f'Generated: frames={len(output.frames[0])}')
output.images = output.frames[0]
@ -508,6 +511,8 @@ def validate_pipeline(p: processing.StableDiffusionProcessing):
for m in video_models[family]:
if m.repo_cls is not None:
models_cls.append(m.repo_cls.__name__)
if m.custom is not None:
models_cls.append(m.custom)
is_video_model = shared.sd_model.__class__.__name__ in models_cls
override_video_pipelines = ['WanPipeline', 'WanImageToVideoPipeline', 'WanVACEPipeline']
is_video_pipeline = ('video' in p.__class__.__name__.lower()) or (shared.sd_model.__class__.__name__ in override_video_pipelines)
@ -569,7 +574,7 @@ def process_diffusers(p: processing.StableDiffusionProcessing):
images, _index=shared.history.selected
output = SimpleNamespace(images=images)
if (output is None or len(output.images) == 0) and has_images:
if (output is None or (hasattr(output, 'images') and len(output.images) == 0)) and has_images:
if output is not None:
shared.log.debug('Processing: using input as base output')
output.images = p.init_images

View File

@ -0,0 +1,145 @@
import io
import os
import time
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
from PIL import Image
from installer import install, reload, log
image_size_buckets = {
'720p': 1280*720,
'1080p': 1920*1080,
}
aspect_ratios_buckets = {
'1:1': 1/1,
'2:3': 2/3,
'3:2': 3/2,
'4:3': 4/3,
'3:4': 3/4,
'4:5': 4/5,
'5:4': 5/4,
'16:9': 16/9,
'9:16': 9/16,
'21:9': 21/9,
'9:21': 9/21,
}
def google_requirements():
install('google-genai==1.52.0')
install('pydantic==2.11.7', ignore=True, quiet=True)
reload('pydantic', '2.11.7')
def get_size_buckets(width: int, height: int) -> str:
aspect_ratio = width / height
closest_aspect_ratio = min(aspect_ratios_buckets.items(), key=lambda x: abs(x[1] - aspect_ratio))[0]
pixel_count = width * height
closest_size = min(image_size_buckets.items(), key=lambda x: abs(x[1] - pixel_count))[0]
closest_aspect_ratio = min(aspect_ratios_buckets.items(), key=lambda x: abs(x[1] - aspect_ratio))[0]
return closest_size, closest_aspect_ratio
class GoogleVeoVideoPipeline():
def __init__(self, model_name: str):
self.model = model_name
self.client = None
self.config = None
google_requirements()
log.debug(f'Load model: type=GoogleVeo model="{model_name}"')
def txt2vid(self, prompt):
return self.client.models.generate_videos(
model=self.model,
prompt=prompt,
config=self.config,
)
def img2vid(self, prompt, image):
from google import genai
image_bytes = io.BytesIO()
image.save(image_bytes, format='JPEG')
return self.client.models.generate_videos(
model=self.model,
prompt=prompt,
config=self.config,
image=genai.types.Image(image_bytes=image_bytes.getvalue(), mime_type='image/jpeg'),
)
def __call__(self, prompt: list[str], width: int, height: int, image: Image.Image = None, num_frames: int = 4*24):
from google import genai
if isinstance(prompt, list) and len(prompt) > 0:
prompt = prompt[0]
if self.client is None:
api_key = os.getenv("GOOGLE_API_KEY", None)
if api_key is None:
log.error(f'Cloud: model="{self.model}" GOOGLE_API_KEY environment variable not set')
return None
self.client = genai.Client(api_key=api_key, vertexai=False)
resolution, aspect_ratio = get_size_buckets(width, height)
duration = num_frames // 24
if duration < 4:
duration = 4
if duration > 8:
duration = 8
self.config=genai.types.GenerateVideosConfig(
# seed=42,
# fps=24,
duration_seconds=duration,
aspect_ratio=aspect_ratio,
resolution=resolution,
# person_generation='ALLOW_ALL',
# safety_filter_level='BLOCK_NONE',
# negative_prompt=None,
# enhance_prompt=True,
# generate_audio=True,
)
log.debug(f'Cloud: prompt="{prompt}" size={resolution} ar={aspect_ratio} image={image} model="{self.model}" frames={num_frames} duration={duration}')
operation = None
try:
if image is not None:
operation = self.img2vid(prompt, image)
else:
operation = self.txt2vid(prompt)
while not operation.done:
log.debug(f"Cloud processing: {operation}")
time.sleep(10)
operation = self.client.operations.get(operation)
except Exception as e:
log.error(f'Cloud video: model="{self.model}" {operation} {e}')
return None
if operation is None or operation.response is None or operation.response.generated_videos is None or len(operation.response.generated_videos) == 0:
log.error(f'Cloud video: model="{self.model}" no response {operation}')
return None
try:
response: genai.types.GeneratedVideo = operation.response.generated_videos[0]
self.client.files.download(file=response.video)
video_bytes = response.video.video_bytes
return { 'bytes': video_bytes, 'images': [] }
except Exception as e:
log.error(f'Cloud download: model="{self.model}" {e}')
return None
def load_veo(model_name): # pylint: disable=unused-argument
pipe = GoogleVeoVideoPipeline(model_name = model_name)
return pipe
if __name__ == "__main__":
from installer import setup_logging
setup_logging()
log.info('test')
model = GoogleVeoVideoPipeline('veo-3.1-generate-preview')
img = Image.open('C:\\Users\\mandi\\OneDrive\\Generative\\Samples\\cartoon.png')
vid = model(['A beautiful young woman walking through the fantasy city'], 1280, 720, image=img)
if vid is not None:
with open("veo.mp4", "wb") as f:
f.write(vid['video'])

View File

@ -10,6 +10,7 @@ class Model():
name: str
url: str = ''
repo: str = None
custom: str = None
repo_cls: classmethod = None
repo_revision: str = None
dit: str = None
@ -480,6 +481,22 @@ try:
te_cls=getattr(transformers, 'Qwen2_5_VLForConditionalGeneration', None),
dit_cls=getattr(diffusers, 'Kandinsky5Transformer3DModel', None)),
],
'Google Veo': [
Model(name='Google Veo 3.1 T2V',
url='https://gemini.google/overview/video-generation/',
repo='veo-3.1-generate-preview',
custom='GoogleVeoVideoPipeline',
repo_cls=None,
te_cls=None,
dit_cls=None),
Model(name='Google Veo 3.1 I2V',
url='https://gemini.google/overview/video-generation/',
repo='veo-3.1-generate-preview',
custom='GoogleVeoVideoPipeline',
repo_cls=None,
te_cls=None,
dit_cls=None),
],
}
t1 = time.time()
errors = 0
@ -488,11 +505,14 @@ try:
for m in model:
if m.name == 'None':
continue
"""
if (m.repo_cls is None) or (m.dit_cls is None) or (m.te_cls is None):
log.error(f'Video: pipeline="{m.name}" not available')
errors += 1
else:
total += 1
"""
total += 1
log.info(f'Networks: type="video" engines={len(models)} models={total} errors={errors} time={t1 - t0:.2f}')
except Exception as e:
models = {}

View File

@ -3,7 +3,7 @@ from modules import shared
def apply_teacache_patch(cls):
if shared.opts.teacache_enabled:
if shared.opts.teacache_enabled and cls is not None:
from modules import teacache
shared.log.debug(f'Transformers cache: type=teacache patch=forward cls={cls.__name__}')
if cls.__name__ == 'LTXVideoTransformer3DModel':

View File

@ -20,15 +20,26 @@ def _loader(component):
loaded_model = None
def load_custom(model_name: str):
shared.log.debug(f'Video load: module=pipe repo="{model_name}" cls=Custom')
if 'veo-3.1' in model_name:
from modules.video_models.google_veo import load_veo
pipe = load_veo(model_name)
return pipe
return None
def load_model(selected: models_def.Model):
if selected is None or selected.te_cls is None or selected.dit_cls is None:
if selected is None or selected.repo is None:
return ''
global loaded_model # pylint: disable=global-statement
if not shared.sd_loaded:
loaded_model = None
if loaded_model == selected.name:
return ''
sd_models.unload_model_weights()
if shared.sd_loaded:
sd_models.unload_model_weights()
t0 = time.time()
jobid = shared.state.begin('Load model')
@ -46,89 +57,99 @@ def load_model(selected: models_def.Model):
kwargs = video_overrides.load_override(selected, **offline_args)
# text encoder
try:
load_args, quant_args = model_quant.get_dit_args({}, module='TE', device_map=True)
if selected.te_cls is not None:
try:
load_args, quant_args = model_quant.get_dit_args({}, module='TE', device_map=True)
# loader deduplication of text-encoder models
if selected.te_cls.__name__ == 'T5EncoderModel' and shared.opts.te_shared_t5:
selected.te = 'Disty0/t5-xxl'
selected.te_folder = ''
selected.te_revision = None
if selected.te_cls.__name__ == 'UMT5EncoderModel' and shared.opts.te_shared_t5:
if 'SDNQ' in selected.name:
selected.te = 'Disty0/Wan2.2-T2V-A14B-SDNQ-uint4-svd-r32'
else:
selected.te = 'Wan-AI/Wan2.2-TI2V-5B-Diffusers'
selected.te_folder = 'text_encoder'
selected.te_revision = None
if selected.te_cls.__name__ == 'LlamaModel' and shared.opts.te_shared_t5:
selected.te = 'hunyuanvideo-community/HunyuanVideo'
selected.te_folder = 'text_encoder'
selected.te_revision = None
if selected.te_cls.__name__ == 'Qwen2_5_VLForConditionalGeneration' and shared.opts.te_shared_t5:
selected.te = 'ai-forever/Kandinsky-5.0-T2V-Lite-sft-5s-Diffusers'
selected.te_folder = 'text_encoder'
selected.te_revision = None
# loader deduplication of text-encoder models
if selected.te_cls.__name__ == 'T5EncoderModel' and shared.opts.te_shared_t5:
selected.te = 'Disty0/t5-xxl'
selected.te_folder = ''
selected.te_revision = None
if selected.te_cls.__name__ == 'UMT5EncoderModel' and shared.opts.te_shared_t5:
if 'SDNQ' in selected.name:
selected.te = 'Disty0/Wan2.2-T2V-A14B-SDNQ-uint4-svd-r32'
else:
selected.te = 'Wan-AI/Wan2.2-TI2V-5B-Diffusers'
selected.te_folder = 'text_encoder'
selected.te_revision = None
if selected.te_cls.__name__ == 'LlamaModel' and shared.opts.te_shared_t5:
selected.te = 'hunyuanvideo-community/HunyuanVideo'
selected.te_folder = 'text_encoder'
selected.te_revision = None
if selected.te_cls.__name__ == 'Qwen2_5_VLForConditionalGeneration' and shared.opts.te_shared_t5:
selected.te = 'ai-forever/Kandinsky-5.0-T2V-Lite-sft-5s-Diffusers'
selected.te_folder = 'text_encoder'
selected.te_revision = None
shared.log.debug(f'Video load: module=te repo="{selected.te or selected.repo}" folder="{selected.te_folder}" cls={selected.te_cls.__name__} quant={model_quant.get_quant_type(quant_args)} loader={_loader("transformers")}')
kwargs["text_encoder"] = selected.te_cls.from_pretrained(
pretrained_model_name_or_path=selected.te or selected.repo,
subfolder=selected.te_folder,
revision=selected.te_revision or selected.repo_revision,
cache_dir=shared.opts.hfcache_dir,
**load_args,
**quant_args,
**offline_args,
)
except Exception as e:
shared.log.error(f'video load: module=te cls={selected.te_cls.__name__} {e}')
errors.display(e, 'video')
shared.log.debug(f'Video load: module=te repo="{selected.te or selected.repo}" folder="{selected.te_folder}" cls={selected.te_cls.__name__} quant={model_quant.get_quant_type(quant_args)} loader={_loader("transformers")}')
kwargs["text_encoder"] = selected.te_cls.from_pretrained(
pretrained_model_name_or_path=selected.te or selected.repo,
subfolder=selected.te_folder,
revision=selected.te_revision or selected.repo_revision,
cache_dir=shared.opts.hfcache_dir,
**load_args,
**quant_args,
**offline_args,
)
except Exception as e:
shared.log.error(f'video load: module=te cls={selected.te_cls.__name__} {e}')
errors.display(e, 'video')
# transformer
try:
def load_dit_folder(dit_folder):
if dit_folder is not None and dit_folder not in kwargs:
# get a new quant arg on every loop to prevent the quant config classes getting entangled
load_args, quant_args = model_quant.get_dit_args({}, module='Model', device_map=True)
shared.log.debug(f'Video load: module=transformer repo="{selected.dit or selected.repo}" module="{dit_folder}" folder="{dit_folder}" cls={selected.dit_cls.__name__} quant={model_quant.get_quant_type(quant_args)} loader={_loader("diffusers")}')
kwargs[dit_folder] = selected.dit_cls.from_pretrained(
pretrained_model_name_or_path=selected.dit or selected.repo,
subfolder=dit_folder,
revision=selected.dit_revision or selected.repo_revision,
cache_dir=shared.opts.hfcache_dir,
**load_args,
**quant_args,
**offline_args,
)
else:
shared.log.debug(f'Video load: module=transformer repo="{selected.dit or selected.repo}" module="{dit_folder}" folder="{dit_folder}" cls={selected.dit_cls.__name__} loader={_loader("diffusers")} skip')
if selected.dit_cls is not None:
try:
def load_dit_folder(dit_folder):
if dit_folder is not None and dit_folder not in kwargs:
# get a new quant arg on every loop to prevent the quant config classes getting entangled
load_args, quant_args = model_quant.get_dit_args({}, module='Model', device_map=True)
shared.log.debug(f'Video load: module=transformer repo="{selected.dit or selected.repo}" module="{dit_folder}" folder="{dit_folder}" cls={selected.dit_cls.__name__} quant={model_quant.get_quant_type(quant_args)} loader={_loader("diffusers")}')
kwargs[dit_folder] = selected.dit_cls.from_pretrained(
pretrained_model_name_or_path=selected.dit or selected.repo,
subfolder=dit_folder,
revision=selected.dit_revision or selected.repo_revision,
cache_dir=shared.opts.hfcache_dir,
**load_args,
**quant_args,
**offline_args,
)
else:
shared.log.debug(f'Video load: module=transformer repo="{selected.dit or selected.repo}" module="{dit_folder}" folder="{dit_folder}" cls={selected.dit_cls.__name__} loader={_loader("diffusers")} skip')
if selected.dit_folder is None:
selected.dit_folder = ['transformer']
if isinstance(selected.dit_folder, list) or isinstance(selected.dit_folder, tuple):
for dit_folder in selected.dit_folder: # wan a14b has transformer and transformer_2
load_dit_folder(dit_folder)
else:
load_dit_folder(selected.dit_folder)
except Exception as e:
shared.log.error(f'video load: module=transformer cls={selected.dit_cls.__name__} {e}')
errors.display(e, 'video')
if selected.dit_folder is None:
selected.dit_folder = ['transformer']
if isinstance(selected.dit_folder, list) or isinstance(selected.dit_folder, tuple):
for dit_folder in selected.dit_folder: # wan a14b has transformer and transformer_2
load_dit_folder(dit_folder)
else:
load_dit_folder(selected.dit_folder)
except Exception as e:
shared.log.error(f'video load: module=transformer cls={selected.dit_cls.__name__} {e}')
errors.display(e, 'video')
# model
try:
shared.log.debug(f'Video load: module=pipe repo="{selected.repo}" cls={selected.repo_cls.__name__}')
shared.sd_model = selected.repo_cls.from_pretrained(
pretrained_model_name_or_path=selected.repo,
revision=selected.repo_revision,
cache_dir=shared.opts.hfcache_dir,
torch_dtype=devices.dtype,
**kwargs,
**offline_args,
)
if selected.repo_cls is None:
shared.sd_model = load_custom(selected.repo)
else:
shared.log.debug(f'Video load: module=pipe repo="{selected.repo}" cls={selected.repo_cls.__name__}')
shared.sd_model = selected.repo_cls.from_pretrained(
pretrained_model_name_or_path=selected.repo,
revision=selected.repo_revision,
cache_dir=shared.opts.hfcache_dir,
torch_dtype=devices.dtype,
**kwargs,
**offline_args,
)
except Exception as e:
shared.log.error(f'video load: module=pipe repo="{selected.repo}" cls={selected.repo_cls.__name__} {e}')
errors.display(e, 'video')
if shared.sd_model is None:
msg = f'Video load: model="{selected.name}" failed'
shared.log.error(msg)
return msg
t1 = time.time()
if shared.sd_model.__class__.__name__.startswith("LTX"):
shared.sd_model.scheduler.config.use_dynamic_shifting = False
@ -138,7 +159,7 @@ def load_model(selected: models_def.Model):
sd_models.set_diffuser_options(shared.sd_model, offload=False)
decode, text, image, slicing, tiling, framewise = False, False, False, False, False, False
if selected.vae_hijack and hasattr(shared.sd_model.vae, 'decode'):
if selected.vae_hijack and hasattr(shared.sd_model, 'vae') and hasattr(shared.sd_model.vae, 'decode'):
sd_hijack_vae.init_hijack(shared.sd_model)
decode = True
if selected.te_hijack and hasattr(shared.sd_model, 'encode_prompt'):

View File

@ -109,7 +109,7 @@ def generate(*args, **kwargs):
orig_sampler_shift = shared.opts.schedulers_shift
shared.opts.data['schedulers_dynamic_shift'] = dynamic_shift
shared.opts.data['schedulers_shift'] = sampler_shift
if hasattr(shared.sd_model.scheduler, 'config') and hasattr(shared.sd_model.scheduler, 'register_to_config'):
if hasattr(shared.sd_model, 'scheduler') and hasattr(shared.sd_model.scheduler, 'config') and hasattr(shared.sd_model.scheduler, 'register_to_config'):
if hasattr(shared.sd_model.scheduler.config, 'use_dynamic_shifting'):
shared.sd_model.scheduler.config.use_dynamic_shifting = dynamic_shift
shared.sd_model.scheduler.register_to_config(use_dynamic_shifting = dynamic_shift)
@ -146,15 +146,18 @@ def generate(*args, **kwargs):
# done
if err:
return video_utils.queue_err(err)
if processed is None or len(processed.images) == 0:
if processed is None or (len(processed.images) == 0 and processed.bytes is None):
return video_utils.queue_err('processing failed')
shared.log.info(f'Video: name="{selected.name}" cls={shared.sd_model.__class__.__name__} frames={len(processed.images)} time={t1-t0:.2f}')
# video_file = images.save_video(p, filename=None, images=processed.images, video_type=video_type, duration=video_duration, loop=video_loop, pad=video_pad, interpolate=video_interpolate) # legacy video save from list of images
pixels = video_save.images_to_tensor(processed.images)
if hasattr(processed, 'images') and processed.images is not None:
pixels = video_save.images_to_tensor(processed.images)
else:
pixels = None
_num_frames, video_file = video_save.save_video(
p=p,
pixels=pixels,
binary=processed.bytes,
mp4_fps=mp4_fps,
mp4_codec=mp4_codec,
mp4_opt=mp4_opt,

View File

@ -107,7 +107,8 @@ def atomic_save_video(filename, tensor:torch.Tensor, fps:float=24, codec:str='li
def save_video(
p:processing.StableDiffusionProcessingVideo,
pixels:torch.Tensor,
pixels:torch.Tensor=None,
binary:bytes=None,
mp4_fps:int=24,
mp4_codec:str='libx264',
mp4_opt:str='',
@ -121,6 +122,23 @@ def save_video(
pbar=None, # progress bar for video
):
output_video = None
if binary is not None:
output_filename = get_video_filename(p)
output_video = f'{output_filename}.{mp4_ext}'
try:
try:
with open(output_video, 'wb') as f:
f.write(binary)
shared.log.info(f'Video output: file="{output_video}" size={len(binary)}')
shared.state.outputs(output_video)
except Exception as e:
shared.log.error(f'Video output: file="{output_video}" {e}')
except Exception as e:
shared.log.error(f'Video output: file="{output_video}" write error {e}')
errors.display(e, 'video')
return 0, output_video
if pixels is None:
return 0, output_video
if not torch.is_tensor(pixels):

View File

@ -9,6 +9,8 @@ vae_type = None
def set_vae_params(p):
global vae_type # pylint: disable=global-statement
vae_type = p.vae_type
if not hasattr(shared.sd_model, 'vae'):
return
if hasattr(shared.sd_model.vae, 'enable_slicing'):
shared.sd_model.vae.enable_slicing()
if p.frames > p.vae_tile_frames:

View File

@ -24,6 +24,12 @@ aspect_ratios_buckets = {
}
def google_requirements():
install('google-genai==1.52.0')
install('pydantic==2.11.7', ignore=True, quiet=True)
reload('pydantic', '2.11.7')
def get_size_buckets(width: int, height: int) -> str:
aspect_ratio = width / height
closest_aspect_ratio = min(aspect_ratios_buckets.items(), key=lambda x: abs(x[1] - aspect_ratio))[0]
@ -38,9 +44,7 @@ class GoogleNanoBananaPipeline():
self.model = model_name
self.client = None
self.config = None
install('google-genai==1.52.0')
install('pydantic==2.11.7', ignore=True, quiet=True)
reload('pydantic', '2.11.7')
google_requirements()
log.debug(f'Load model: type=NanoBanana model="{model_name}"')
def txt2img(self, prompt):
@ -81,7 +85,7 @@ class GoogleNanoBananaPipeline():
response_modalities=["IMAGE"],
image_config=image_config
)
log.debug(f'Cloud: prompt={prompt} size={image_size} ar={aspect_ratio} image={image} model="{self.model}"')
log.debug(f'Cloud: prompt="{prompt}" size={image_size} ar={aspect_ratio} image={image} model="{self.model}"')
# log.debug(f'Cloud: config={self.config}')
try:
@ -114,6 +118,6 @@ if __name__ == "__main__":
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
log.info('test')
model =GoogleNanoBananaPipeline('gemini-3-pro-image-preview')
model = GoogleNanoBananaPipeline('gemini-3-pro-image-preview')
img = model(['A beautiful landscape with mountains and a river'], 1024, 1024)
img.save('test.png')

2
wiki

@ -1 +1 @@
Subproject commit f7289d6c03899f519de8692efe8ea2731779607c
Subproject commit 2fb950abdeaad2d2a7976857f04646fc8c6963e1