add lumina2

Signed-off-by: Vladimir Mandic <mandic00@live.com>
2025-02-12 08:54:00 -05:00 · 2025-02-12 08:54:00 -05:00 · a7ccea60ff
parent 1d533544d2
commit a7ccea60ff
13 changed files with 78 additions and 11 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,6 +1,6 @@
 # Change Log for SD.Next

-## Update for 2025-02-11  
+## Update for 2025-02-12  

 - **User Interface**  
  - **Hints**  
@ -14,9 +14,13 @@
  - **UI**:  
    - force browser cache-invalidate on page load  
 - **Models**
+  - [AlphaVLLM Lumina 2](https://github.com/Alpha-VLLM/Lumina-Image-2.0)  
+    new foundation model for image generation based o Gemma-2-2B text encoder and a flow-based diffusion transformer  
+    simply select from *networks -> models -> reference*  
  - [Ostris Flex.1-Alpha](https://huggingface.co/ostris/Flex.1-alpha)  
-    originally based on Flux.1-Schnell, but retrained and with different architecture  
-    result is model smaller than Flux.1-Dev, but with similar capabilities  
+    originally based on *Flux.1-Schnell*, but retrained and with different architecture  
+    result is model smaller than *Flux.1-Dev*, but with similar capabilities  
+    simply select from *networks -> models -> reference*  
 - **Docs**
  - New [Outpaint](https://github.com/vladmandic/sdnext/wiki/Outpaint) step-by-step guide  
  - Updated [Docker](https://github.com/vladmandic/sdnext/wiki/Docker) guide  
--- a/README.md
+++ b/README.md
@ -24,6 +24,8 @@
 ## SD.Next Features

 All individual features are not listed here, instead check [ChangeLog](CHANGELOG.md) for full list of changes
+- Fully localized:
+  ▹ **English | Chinese | Russian | Spanish | German | French | Italian | Portuguese | Japanese | Korean**  
 - Multiple UIs!  
  ▹ **Standard | Modern**  
 - Multiple [diffusion models](https://vladmandic.github.io/sdnext-docs/Model-Support/)!  
--- a/html/reference.json
+++ b/html/reference.json
@ -309,6 +309,13 @@
    "skip": true,
    "extras": "sampler: Default"
  }, 
+  "AlphaVLLM Lumina 2": {
+    "path": "Alpha-VLLM/Lumina-Image-2.0",
+    "desc": "A Unified and Efficient Image Generative Model. Lumina-Image-2.0 is a 2 billion parameter flow-based diffusion transformer capable of generating images from text descriptions.",
+    "preview": "Alpha-VLLM--Lumina-Image-2.0.jpg",
+    "skip": true,
+    "extras": "sampler: Default"
+  }, 

  "Kwai Kolors": {
    "path": "Kwai-Kolors/Kolors-diffusers",
--- a/installer.py
+++ b/installer.py
@ -531,7 +531,7 @@ def check_diffusers():
    t_start = time.time()
    if args.skip_all or args.skip_git:
        return
-    sha = 'f63d32233f402bd603da8f3aa385aecb9c3d8809' # diffusers commit hash
+    sha = '067eab1b3aaf4d09f85edf21d8b147e0980c662a' # diffusers commit hash
    pkg = pkg_resources.working_set.by_key.get('diffusers', None)
    minor = int(pkg.version.split('.')[1] if pkg is not None else 0)
    cur = opts.get('diffusers_version', '') if minor > 0 else ''
--- a/models/Reference/Alpha-VLLM--Lumina-Image-2.0.jpg
+++ b/models/Reference/Alpha-VLLM--Lumina-Image-2.0.jpg
--- a/modules/model_lumina.py
+++ b/modules/model_lumina.py
@ -1,3 +1,4 @@
+import transformers
 import diffusers


@ -22,3 +23,23 @@ def load_lumina(_checkpoint_info, diffusers_load_config={}):
    )
    devices.torch_gc()
    return pipe
+
+
+def load_lumina2(checkpoint_info, diffusers_load_config={}):
+    from modules import shared, devices, sd_models, model_quant
+    quant_args = {}
+    quant_args = model_quant.create_bnb_config(quant_args)
+    if quant_args:
+        model_quant.load_bnb(f'Load model: type=Lumina quant={quant_args}')
+    if not quant_args:
+        quant_args = model_quant.create_ao_config(quant_args)
+        if quant_args:
+            model_quant.load_torchao(f'Load model: type=Lumina quant={quant_args}')
+    kwargs = {}
+    repo_id = sd_models.path_to_repo(checkpoint_info.name)
+    if ('Model' in shared.opts.bnb_quantization or 'Model' in shared.opts.torchao_quantization):
+        kwargs['transformer'] = diffusers.Lumina2Transformer2DModel.from_pretrained(repo_id, subfolder="transformer", cache_dir=shared.opts.diffusers_dir, torch_dtype=devices.dtype, **quant_args)
+    if ('Text Encoder' in shared.opts.bnb_quantization or 'Text Encoder' in shared.opts.torchao_quantization):
+        kwargs['text_encoder'] = transformers.AutoModel.from_pretrained(repo_id, subfolder="text_encoder", cache_dir=shared.opts.diffusers_dir, torch_dtype=devices.dtype, **quant_args)
+    sd_model = diffusers.Lumina2Text2ImgPipeline.from_pretrained(checkpoint_info.path, cache_dir=shared.opts.diffusers_dir, **diffusers_load_config, **quant_args, **kwargs)
+    return sd_model
--- a/modules/model_quant.py
+++ b/modules/model_quant.py
@ -30,8 +30,6 @@ def get_quant(name):
    return 'none'


-
-
 def create_bnb_config(kwargs = None, allow_bnb: bool = True):
    from modules import shared, devices
    if len(shared.opts.bnb_quantization) > 0 and allow_bnb:
--- a/modules/modeldata.py
+++ b/modules/modeldata.py
@ -31,6 +31,8 @@ def get_model_type(pipe):
        model_type = 'f1'
    elif "Mochi" in name:
        model_type = 'mochi'
+    elif "Lumina2" in name:
+        model_type = 'lumina2'
    elif "Lumina" in name:
        model_type = 'lumina'
    elif "OmniGen" in name:
--- a/modules/sd_detect.py
+++ b/modules/sd_detect.py
@ -69,6 +69,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
                guess = 'Sana'
            if 'lumina-next' in f.lower():
                guess = 'Lumina-Next'
+            if 'lumina-image-2' in f.lower():
+                guess = 'Lumina2'
            if 'kolors' in f.lower():
                guess = 'Kolors'
            if 'auraflow' in f.lower():
@ -101,6 +103,8 @@ def detect_pipeline(f: str, op: str = 'model', warning=True, quiet=False):
                    guess = 'FLUX'
                if 'StableDiffusion3' in pipeline.__name__:
                    guess = 'Stable Diffusion 3'
+                if 'Lumina2' in pipeline.__name__:
+                    guess = 'Lumina 2'
            # switch for specific variant
            if guess == 'Stable Diffusion' and 'inpaint' in f.lower():
                guess = 'Stable Diffusion Inpaint'
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@ -290,6 +290,9 @@ def load_diffuser_force(model_type, checkpoint_info, diffusers_load_config, op='
        elif model_type in ['FLUX']:
            from modules.model_flux import load_flux
            sd_model = load_flux(checkpoint_info, diffusers_load_config)
+        elif model_type in ['Lumina 2']:
+            from modules.model_lumina import load_lumina2
+            sd_model = load_lumina2(checkpoint_info, diffusers_load_config)
        elif model_type in ['Stable Diffusion 3']:
            from modules.model_sd3 import load_sd3
            shared.log.debug(f'Load {op}: model="Stable Diffusion 3"')
@ -314,7 +317,7 @@ def load_diffuser_folder(model_type, pipeline, checkpoint_info, diffusers_load_c
    files = shared.walk_files(checkpoint_info.path, ['.safetensors', '.bin', '.ckpt'])
    if 'variant' not in diffusers_load_config and any('diffusion_pytorch_model.fp16' in f for f in files): # deal with diffusers lack of variant fallback when loading
        diffusers_load_config['variant'] = 'fp16'
-    if model_type is not None and pipeline is not None and 'ONNX' in model_type: # forced pipeline
+    if (model_type is not None) and (pipeline is not None) and ('ONNX' in model_type): # forced pipeline
        try:
            sd_model = pipeline.from_pretrained(checkpoint_info.path)
        except Exception as e:
--- a/modules/sd_samplers_common.py
+++ b/modules/sd_samplers_common.py
@ -9,7 +9,7 @@ from modules import shared, devices, processing, images, sd_vae_approx, sd_vae_t

 SamplerData = namedtuple('SamplerData', ['name', 'constructor', 'aliases', 'options'])
 approximation_indexes = { "Simple": 0, "Approximate": 1, "TAESD": 2, "Full VAE": 3 }
-flow_models = ['f1', 'sd3', 'lumina', 'auraflow', 'sana']
+flow_models = ['f1', 'sd3', 'lumina', 'auraflow', 'sana', 'lumina2']
 warned = False
 queue_lock = threading.Lock()

--- a/modules/sd_samplers_diffusers.py
+++ b/modules/sd_samplers_diffusers.py
@ -306,6 +306,5 @@ class DiffusionSampler:
        if name == 'DC Solver':
            if not hasattr(self.sampler, 'dc_ratios'):
                pass
-                # self.sampler.dc_ratios = self.sampler.cascade_polynomial_regression(test_CFG=6.0, test_NFE=10, cpr_path='tmp/sd2.1.npy')
        # shared.log.debug_log(f'Sampler: class="{self.sampler.__class__.__name__}" config={self.sampler.config}')
        self.sampler.name = name
--- a/scripts/mixture_of_diffusers.py
+++ b/scripts/mixture_of_diffusers.py
@ -1,6 +1,8 @@
 import gradio as gr
-from modules import scripts, processing, shared
+from modules import scripts, processing, shared, sd_models

+max_xtiles = 4
+max_ytiles = 4

 class Script(scripts.Script):
    def __init__(self):
@ -13,9 +15,31 @@ class Script(scripts.Script):
    def show(self, is_img2img):
        return shared.native

+    def update_ui(self, x, y):
+        updates = []
+        for i in range(max_xtiles):
+            for j in range(max_ytiles):
+                updates.append(gr.update(visible=(i < x) and (j < y)))
+        return updates
+
    def ui(self, _is_img2img): # ui elements
        with gr.Row():
            gr.HTML('<a href="https://arxiv.org/abs/2302.02412">&nbsp Mixture-of-Diffusers</a><br>')
+        with gr.Row():
+            x_tiles = gr.Slider(minimum=1, maximum=max_xtiles, default=1, label="X-axis tiles")
+            y_tiles = gr.Slider(minimum=1, maximum=max_ytiles, default=1, label="Y-axis tiles")
+        with gr.Row():
+            tile_width = gr.Number(minimum=1, maximum=2048, value=1024, label="Tile width")
+            tile_height = gr.Number(minimum=1, maximum=2048, value=1024, label="Tile height")
+        with gr.Row():
+            overlap_width = gr.Number(minimum=1, maximum=512, value=128, label="Overlap width")
+            overlap_height = gr.Number(minimum=1, maximum=512, value=128, label="Overlap height")
+        with gr.Row():
+            prompts = []
+            for i in range(max_xtiles*max_ytiles):
+                prompts.append(gr.Textbox('', label=f"Tile prompt: x={i%max_xtiles} y={i//max_ytiles}", placeholder='Prompt for tile', visible=False))
+        x_tiles.change(fn=self.update_ui, inputs=[x_tiles, y_tiles], outputs=prompts)
+        y_tiles.change(fn=self.update_ui, inputs=[x_tiles, y_tiles], outputs=prompts)
        return []

    def run(self, p: processing.StableDiffusionProcessing): # pylint: disable=arguments-differ, unused-argument
@ -24,10 +48,13 @@ class Script(scripts.Script):
            shared.log.warning(f'MoD: class={shared.sd_model.__class__.__name__} model={shared.sd_model_type} required={supported_model_list}')
            return None
        self.orig_pipe = shared.sd_model
+        from modules.mod import StableDiffusionXLTilingPipeline
+        shared.sd_model = sd_models.switch_pipe(StableDiffusionXLTilingPipeline, shared.sd_model)
+        sd_models.set_diffuser_options(shared.sd_model)
+        sd_models.apply_balanced_offload(shared.sd_model)

        shared.log.info(f'MoD: ')

-
    def after(self, p: processing.StableDiffusionProcessing, processed: processing.Processed): # pylint: disable=arguments-differ, unused-argument
        if self.orig_pipe is None:
            return processed