add nvidia-sana-video-2b

Signed-off-by: Vladimir Mandic <mandic00@live.com>
pull/4367/head
Vladimir Mandic 2025-11-06 08:40:10 -05:00
parent 21b3cfdb3c
commit a5cefc96b6
4 changed files with 15 additions and 2 deletions

View File

@ -6,6 +6,7 @@
Service pack release that handles critical issues and improvements for **ROCm-on-Windows** and **ZLUDA** backends
Also included are several new features, notably improvements to **detailer** and ability to run [SD.Next](https://github.com/vladmandic/sdnext) with specific modules disabled
And new video model, **nVidia SANA 2B**
![Screenshot](https://github.com/user-attachments/assets/d6119a63-6ee5-4597-95f6-29ed0701d3b5)
@ -13,6 +14,9 @@ Also included are several new features, notably improvements to **detailer** and
### Details for 2025-11-06
- **Models**
- [SANA Video_2B_480p T2V](https://huggingface.co/Efficient-Large-Model/SANA-Video_2B_480p_diffusers) is a small 2B ultra-efficient diffusion model
designed for rapid generation of high-quality videos and uses Gemma2 text encoder
- **Features**
- **ROCm for Windows** switch to using **TheRock** `torch` builds when available
recommended to run: `webui --use-rocm --reinstall`

View File

@ -613,7 +613,7 @@ def check_diffusers():
t_start = time.time()
if args.skip_all:
return
sha = 'dcfb18a2d340d8e1f0ff001b06d2931ffa8648da' # diffusers commit hash
sha = 'b3e9dfced7c9e8d00f646c710766b532383f04c6' # diffusers commit hash
# if args.use_rocm or args.use_zluda or args.use_directml:
# sha = '043ab2520f6a19fce78e6e060a68dbc947edb9f9' # lock diffusers versions for now
pkg = pkg_resources.working_set.by_key.get('diffusers', None)

View File

@ -367,6 +367,14 @@ try:
te_cls=getattr(transformers, 'T5EncoderModel', None),
dit_cls=getattr(diffusers, 'CosmosTransformer3DModel', None)),
],
'nVidia SANA': [
Model(name='SANA Video 2B 480p T2V',
url='https://huggingface.co/Efficient-Large-Model/SANA-Video_2B_480p_diffusers',
repo='Efficient-Large-Model/SANA-Video_2B_480p_diffusers',
repo_cls=getattr(diffusers, 'SanaVideoPipeline', None),
te_cls=getattr(transformers, 'Gemma2Model', None),
dit_cls=getattr(diffusers, 'SanaVideoTransformer3DModel', None)),
],
'Kandinsky': [
Model(name='Kandinsky 5.0 Lite 5s SFT T2V',
url='https://huggingface.co/ai-forever/Kandinsky-5.0-T2V-Lite-sft-5s-Diffusers',

View File

@ -1,6 +1,8 @@
import os
import copy
import time
import transformers # pylint: disable=unused-import
import diffusers
from modules import shared, errors, sd_models, sd_checkpoint, model_quant, devices, sd_hijack_te, sd_hijack_vae
from modules.video_models import models_def, video_utils, video_overrides, video_cache
@ -174,7 +176,6 @@ def load_upscale_vae():
shared.log.warning('Video decode: upscale VAE unsupported')
return
import diffusers
repo_id = 'spacepxl/Wan2.1-VAE-upscale2x'
subfolder = "diffusers/Wan2.1_VAE_upscale2x_imageonly_real_v1"
vae_decode = diffusers.AutoencoderKLWan.from_pretrained(repo_id, subfolder=subfolder, cache_dir=shared.opts.hfcache_dir)