diff --git a/cli/test-all-models.py b/cli/test-all-models.py
index 163c193d6..a9853ebd1 100755
--- a/cli/test-all-models.py
+++ b/cli/test-all-models.py
@@ -1,14 +1,16 @@
 #!/usr/bin/env python
 """
-Warning:
+Warnings:
 - fal/AuraFlow-v0.3: layer_class_name=Linear layer_weight_shape=torch.Size([3072, 2, 1024]) weights_dtype=int8 unsupported
 - Kwai-Kolors/Kolors-diffusers: `set_input_embeddings` not auto‑handled for ChatGLMModel
-Error:
+- kandinsky-community/kandinsky-2-1: `get_input_embeddings` not auto‑handled for MultilingualCLIP
+Errors:
+- kandinsky-community/kandinsky-3: corrupt output
 - nvidia/Cosmos-Predict2-2B-Text2Image: mat1 and mat2 shapes cannot be multiplied (512x4096 and 1024x2048)
 - nvidia/Cosmos-Predict2-14B-Text2Image: mat1 and mat2 shapes cannot be multiplied (512x4096 and 1024x5120)
 - Tencent-Hunyuan/HunyuanDiT-v1.2-Diffusers: CUDA error: device-side assert triggered
 Other:
-- HiDream-ai/HiDream-I1-Full: 30+s/it
+- HiDream-ai/HiDream-I1-Full: very slow at 30+s/it
 """
 
 import io
@@ -54,10 +56,9 @@ models = {
     "OmniGen2/OmniGen2": {},
     # "HiDream-ai/HiDream-I1-Full": {},
     "Kwai-Kolors/Kolors-diffusers": {},
-    "lodestones/Chroma1-HD": {},
-    "vladmandic/chroma-unlocked-v50-annealed": {},
-    "vladmandic/chroma-unlocked-v48": {},
-    "vladmandic/chroma-unlocked-v48-detail-calibrated": {},
+    # "kandinsky-community/kandinsky-3": {},
+    "kandinsky-community/kandinsky-2-2-decoder": {},
+    "kandinsky-community/kandinsky-2-1": {},
     "Alpha-VLLM/Lumina-Next-SFT-diffusers": {},
     "Alpha-VLLM/Lumina-Image-2.0": {},
     "MeissonFlow/Meissonic": {},
@@ -68,14 +69,15 @@ models = {
     "Wan-AI/Wan2.1-T2V-1.3B-Diffusers": {},
     "Wan-AI/Wan2.1-T2V-14B-Diffusers": {},
     "stabilityai/stable-cascade": {},
+    "lodestones/Chroma1-HD": {},
+    "vladmandic/chroma-unlocked-v50-annealed": {},
+    "vladmandic/chroma-unlocked-v48": {},
+    "vladmandic/chroma-unlocked-v48-detail-calibrated": {},
 }
 models_tbd = [
     "black-forest-labs/FLUX.1-dev",
     "black-forest-labs/FLUX.1-Kontext-dev",
     "black-forest-labs/FLUX.1-Krea-dev",
-    "kandinsky-community/kandinsky-3", # TODO
-    "kandinsky-community/kandinsky-2-2-decoder",
-    "kandinsky-community/kandinsky-2-1",
 ]
 styles = [
     'Fixed Astronaut',
diff --git a/html/reference.json b/html/reference.json
index 873dcaefb..a8c3447bb 100644
--- a/html/reference.json
+++ b/html/reference.json
@@ -490,7 +490,7 @@
   },
   "Kandinsky 2.2": {
     "path": "kandinsky-community/kandinsky-2-2-decoder",
-    "desc": "Kandinsky 2.2 is a text-conditional diffusion model (+0.1!) based on unCLIP and latent diffusion, composed of a transformer-based image prior model, a unet diffusion model, and a decoder. Kandinsky 2.1 inherits best practices from Dall-E 2 and Latent diffusion while introducing some new ideas. It uses the CLIP model as a text and image encoder, and diffusion image prior (mapping) between latent spaces of CLIP modalities. This approach increases the visual performance of the model and unveils new horizons in blending images and text-guided image manipulation.",
+    "desc": "Kandinsky 2.2 is a text-conditional diffusion model (+0.1!) based on unCLIP and latent diffusion, composed of a transformer-based image prior model, a unet diffusion model, and a decoder. Kandinsky 2.2 inherits best practices from Dall-E 2 and Latent diffusion while introducing some new ideas. It uses the CLIP model as a text and image encoder, and diffusion image prior (mapping) between latent spaces of CLIP modalities. This approach increases the visual performance of the model and unveils new horizons in blending images and text-guided image manipulation.",
     "preview": "kandinsky-community--kandinsky-2-2-decoder.jpg",
     "extras": "width: 768, height: 768, sampler: Default"
   },
diff --git a/modules/sd_detect.py b/modules/sd_detect.py
index b5767956e..a6169e5c3 100644
--- a/modules/sd_detect.py
+++ b/modules/sd_detect.py
@@ -103,6 +103,12 @@ def guess_by_name(fn, current_guess):
         return 'Bria'
     elif 'qwen' in fn.lower():
         return 'Qwen'
+    elif 'kandinsky-2-1' in fn.lower():
+        return 'Kandinsky 2.1'
+    elif 'kandinsky-2-2' in fn.lower():
+        return 'Kandinsky 2.2'
+    elif 'kandinsky-3' in fn.lower():
+        return 'Kandinsky 3.0'
     return current_guess
 
 
diff --git a/modules/sd_models.py b/modules/sd_models.py
index da863a924..ec11f2c1e 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -381,6 +381,18 @@ def load_diffuser_force(model_type, checkpoint_info, diffusers_load_config, op='
             from pipelines.model_hunyuandit import load_hunyuandit
             sd_model = load_hunyuandit(checkpoint_info, diffusers_load_config)
             allow_post_quant = False
+        elif model_type in ['Kandinsky 2.1']:
+            from pipelines.model_kandinsky import load_kandinsky21
+            sd_model = load_kandinsky21(checkpoint_info, diffusers_load_config)
+            allow_post_quant = True
+        elif model_type in ['Kandinsky 2.2']:
+            from pipelines.model_kandinsky import load_kandinsky22
+            sd_model = load_kandinsky22(checkpoint_info, diffusers_load_config)
+            allow_post_quant = False
+        elif model_type in ['Kandinsky 3.0']:
+            from pipelines.model_kandinsky import load_kandinsky3
+            sd_model = load_kandinsky3(checkpoint_info, diffusers_load_config)
+            allow_post_quant = False
     except Exception as e:
         shared.log.error(f'Load {op}: path="{checkpoint_info.path}" {e}')
         if debug_load:
diff --git a/modules/sd_offload.py b/modules/sd_offload.py
index c6156a3e8..4a253a428 100644
--- a/modules/sd_offload.py
+++ b/modules/sd_offload.py
@@ -16,7 +16,7 @@ debug_move = log.trace if debug else lambda *args, **kwargs: None
 offload_warn = ['sc', 'sd3', 'f1', 'h1', 'hunyuandit', 'auraflow', 'omnigen', 'omnigen2', 'cogview4', 'cosmos', 'chroma']
 offload_post = ['h1']
 offload_hook_instance = None
-balanced_offload_exclude = ['CogView4Pipeline']
+balanced_offload_exclude = ['CogView4Pipeline', 'MeissonicPipeline']
 accelerate_dtype_byte_size = None
 
 
diff --git a/modules/sd_vae.py b/modules/sd_vae.py
index 2578ff196..7986b2568 100644
--- a/modules/sd_vae.py
+++ b/modules/sd_vae.py
@@ -34,7 +34,7 @@ def get_vae_scale_factor(model=None):
     elif hasattr(model, 'config') and hasattr(model.config, 'vae_scale_factor'):
         vae_scale_factor = model.config.vae_scale_factor
     else:
-        shared.log.warning(f'VAE: cls={model.__class__.__name__ if model else "None"} scale=unknown')
+        # shared.log.warning(f'VAE: cls={model.__class__.__name__ if model else "None"} scale=unknown')
         vae_scale_factor = 8
     if hasattr(model, 'patch_size'):
         patch_size = model.patch_size
diff --git a/modules/sd_vae_taesd.py b/modules/sd_vae_taesd.py
index 7fc9c35f7..a89dff777 100644
--- a/modules/sd_vae_taesd.py
+++ b/modules/sd_vae_taesd.py
@@ -45,7 +45,7 @@ def warn_once(msg, variant=None):
     global prev_warnings # pylint: disable=global-statement
     if not prev_warnings:
         prev_warnings = True
-        shared.log.error(f'Decode: type="taesd" variant="{variant}": {msg}')
+        shared.log.warning(f'Decode: type="taesd" variant="{variant}": {msg}')
     return Image.new('RGB', (8, 8), color = (0, 0, 0))
 
 
diff --git a/pipelines/generic.py b/pipelines/generic.py
index 3801963d5..102b5e6b8 100644
--- a/pipelines/generic.py
+++ b/pipelines/generic.py
@@ -8,9 +8,10 @@ from modules import shared, devices, sd_models, model_quant
 debug = shared.log.trace if os.environ.get('SD_LOAD_DEBUG', None) is not None else lambda *args, **kwargs: None
 
 
-def load_transformer(repo_id, cls_name, load_config={}, subfolder="transformer", allow_quant=True, variant=None):
+def load_transformer(repo_id, cls_name, load_config={}, subfolder="transformer", allow_quant=True, variant=None, dtype=None):
     load_args, quant_args = model_quant.get_dit_args(load_config, module='Model', device_map=True, allow_quant=allow_quant)
     quant_type = model_quant.get_quant_type(quant_args)
+    dtype = dtype or devices.dtype
 
     local_file = None
     if shared.opts.sd_unet is not None and shared.opts.sd_unet != 'Default':
@@ -27,7 +28,7 @@ def load_transformer(repo_id, cls_name, load_config={}, subfolder="transformer",
         loader = cls_name.from_single_file if hasattr(cls_name, 'from_single_file') else cls_name.from_pretrained
         transformer = loader(
             local_file,
-            quantization_config=diffusers.GGUFQuantizationConfig(compute_dtype=devices.dtype),
+            quantization_config=diffusers.GGUFQuantizationConfig(compute_dtype=dtype),
             cache_dir=shared.opts.hfcache_dir,
             **load_args,
         )
@@ -43,6 +44,8 @@ def load_transformer(repo_id, cls_name, load_config={}, subfolder="transformer",
         transformer = model_quant.do_post_load_quant(transformer, allow=quant_type is not None)
     else:
         shared.log.debug(f'Load model: transformer="{repo_id}" cls={cls_name.__name__} quant="{quant_type}" args={load_args}')
+        if dtype is not None:
+            load_args['torch_dtype'] = dtype
         if subfolder is not None:
             load_args['subfolder'] = subfolder
         if variant is not None:
@@ -58,10 +61,11 @@ def load_transformer(repo_id, cls_name, load_config={}, subfolder="transformer",
     return transformer
 
 
-def load_text_encoder(repo_id, cls_name, load_config={}, subfolder="text_encoder", allow_quant=True, allow_shared=True, variant=None):
+def load_text_encoder(repo_id, cls_name, load_config={}, subfolder="text_encoder", allow_quant=True, allow_shared=True, variant=None, dtype=None):
     load_args, quant_args = model_quant.get_dit_args(load_config, module='TE', device_map=True, allow_quant=allow_quant)
     quant_type = model_quant.get_quant_type(quant_args)
     text_encoder = None
+    dtype = dtype or devices.dtype
 
     # load from local file if specified
     local_file = None
@@ -79,7 +83,7 @@ def load_text_encoder(repo_id, cls_name, load_config={}, subfolder="text_encoder
         ggml.install_gguf()
         text_encoder = cls_name.from_pretrained(
             gguf_file=local_file,
-            quantization_config=diffusers.GGUFQuantizationConfig(compute_dtype=devices.dtype),
+            quantization_config=diffusers.GGUFQuantizationConfig(compute_dtype=dtype),
             cache_dir=shared.opts.hfcache_dir,
             **load_args,
         )
@@ -104,12 +108,14 @@ def load_text_encoder(repo_id, cls_name, load_config={}, subfolder="text_encoder
             shared.log.debug(f'Load model: text_encoder="{repo_id}" cls={cls_name.__name__} quant="SVDQuant"')
             text_encoder = nunchaku.NunchakuT5EncoderModel.from_pretrained(
                 repo_id,
-                torch_dtype=devices.dtype,
+                torch_dtype=dtype,
             )
             text_encoder.quantization_method = 'SVDQuant'
         elif shared.opts.te_shared_t5:
             repo_id = 'Disty0/t5-xxl'
             shared.log.debug(f'Load model: text_encoder="{repo_id}" cls={cls_name.__name__} quant="{quant_type}" shared={shared.opts.te_shared_t5}')
+            if dtype is not None:
+                load_args['torch_dtype'] = dtype
             text_encoder = cls_name.from_pretrained(
                 repo_id,
                 cache_dir=shared.opts.hfcache_dir,
@@ -120,6 +126,8 @@ def load_text_encoder(repo_id, cls_name, load_config={}, subfolder="text_encoder
     # load from repo
     if text_encoder is None:
         shared.log.debug(f'Load model: text_encoder="{repo_id}" cls={cls_name.__name__} quant="{quant_type}" shared={shared.opts.te_shared_t5}')
+        if dtype is not None:
+            load_args['torch_dtype'] = dtype
         if subfolder is not None:
             load_args['subfolder'] = subfolder
         if variant is not None:
diff --git a/pipelines/meissonic/pipeline.py b/pipelines/meissonic/pipeline.py
index 4f1bb05a2..34b894081 100644
--- a/pipelines/meissonic/pipeline.py
+++ b/pipelines/meissonic/pipeline.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import sys
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
@@ -49,7 +48,7 @@ def _prepare_latent_image_ids(batch_size, height, width, device, dtype):
     return latent_image_ids.to(device=device, dtype=dtype)
 
 
-class Pipeline(DiffusionPipeline):
+class MeissonicPipeline(DiffusionPipeline):
     image_processor: VaeImageProcessor
     vqvae: VQModel
     tokenizer: CLIPTokenizer
@@ -212,27 +211,27 @@ class Pipeline(DiffusionPipeline):
             width = self.transformer.config.sample_size * self.vae_scale_factor
 
         if prompt_embeds is None:
-                input_ids = self.tokenizer(
-                    prompt,
-                    return_tensors="pt",
-                    padding="max_length",
-                    truncation=True,
-                    max_length=77, #self.tokenizer.model_max_length,
-                ).input_ids.to(self._execution_device)
-                # input_ids_t5 = self.tokenizer_t5(
-                #     prompt,
-                #     return_tensors="pt",
-                #     padding="max_length",
-                #     truncation=True,
-                #     max_length=512,
-                # ).input_ids.to(self._execution_device)
+            input_ids = self.tokenizer(
+                prompt,
+                return_tensors="pt",
+                padding="max_length",
+                truncation=True,
+                max_length=77, #self.tokenizer.model_max_length,
+            ).input_ids.to(self._execution_device)
+            # input_ids_t5 = self.tokenizer_t5(
+            #     prompt,
+            #     return_tensors="pt",
+            #     padding="max_length",
+            #     truncation=True,
+            #     max_length=512,
+            # ).input_ids.to(self._execution_device)
 
 
-                outputs = self.text_encoder(input_ids, return_dict=True, output_hidden_states=True)
-                # outputs_t5 = self.text_encoder_t5(input_ids_t5, decoder_input_ids = input_ids_t5 ,return_dict=True, output_hidden_states=True)
-                prompt_embeds = outputs.text_embeds
-                encoder_hidden_states = outputs.hidden_states[-2]
-                # encoder_hidden_states = outputs_t5.encoder_hidden_states[-2]
+            outputs = self.text_encoder(input_ids, return_dict=True, output_hidden_states=True)
+            # outputs_t5 = self.text_encoder_t5(input_ids_t5, decoder_input_ids = input_ids_t5 ,return_dict=True, output_hidden_states=True)
+            prompt_embeds = outputs.text_embeds
+            encoder_hidden_states = outputs.hidden_states[-2]
+            # encoder_hidden_states = outputs_t5.encoder_hidden_states[-2]
 
         prompt_embeds = prompt_embeds.repeat(num_images_per_prompt, 1)
         encoder_hidden_states = encoder_hidden_states.repeat(num_images_per_prompt, 1, 1)
diff --git a/pipelines/meissonic/pipeline_img2img.py b/pipelines/meissonic/pipeline_img2img.py
index 13e5c3717..2aaf9d987 100644
--- a/pipelines/meissonic/pipeline_img2img.py
+++ b/pipelines/meissonic/pipeline_img2img.py
@@ -46,7 +46,7 @@ def _prepare_latent_image_ids(batch_size, height, width, device, dtype):
     return latent_image_ids.to(device=device, dtype=dtype)
 
 
-class Img2ImgPipeline(DiffusionPipeline):
+class MeissonicImg2ImgPipeline(DiffusionPipeline):
     image_processor: VaeImageProcessor
     vqvae: VQModel
     tokenizer: CLIPTokenizer
diff --git a/pipelines/meissonic/pipeline_inpaint.py b/pipelines/meissonic/pipeline_inpaint.py
index d405afa53..aa352d9b4 100644
--- a/pipelines/meissonic/pipeline_inpaint.py
+++ b/pipelines/meissonic/pipeline_inpaint.py
@@ -43,7 +43,7 @@ def _prepare_latent_image_ids(batch_size, height, width, device, dtype):
     return latent_image_ids.to(device=device, dtype=dtype)
 
 
-class InpaintPipeline(DiffusionPipeline):
+class MeissonicInpaintPipeline(DiffusionPipeline):
     image_processor: VaeImageProcessor
     vqvae: VQModel
     tokenizer: CLIPTokenizer
diff --git a/pipelines/model_kandinsky.py b/pipelines/model_kandinsky.py
new file mode 100644
index 000000000..0d5ad0013
--- /dev/null
+++ b/pipelines/model_kandinsky.py
@@ -0,0 +1,65 @@
+import transformers
+import diffusers
+from modules import shared, sd_models, devices, model_quant, sd_hijack_te
+from pipelines import generic
+
+
+def load_kandinsky21(checkpoint_info, diffusers_load_config={}):
+    repo_id = sd_models.path_to_repo(checkpoint_info)
+    sd_models.hf_auth_check(checkpoint_info)
+
+    load_args, _quant_args = model_quant.get_dit_args(diffusers_load_config)
+    shared.log.debug(f'Load model: type=Kandinsky21 repo="{repo_id}" config={diffusers_load_config} offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype} args={load_args}')
+    pipe = diffusers.KandinskyCombinedPipeline.from_pretrained(
+        repo_id,
+        cache_dir=shared.opts.diffusers_dir,
+        **load_args,
+    )
+    sd_hijack_te.init_hijack(pipe)
+    devices.torch_gc(force=True, reason='load')
+    return pipe
+
+
+def load_kandinsky22(checkpoint_info, diffusers_load_config={}):
+    repo_id = sd_models.path_to_repo(checkpoint_info)
+    sd_models.hf_auth_check(checkpoint_info)
+
+    load_args, _quant_args = model_quant.get_dit_args(diffusers_load_config)
+    shared.log.debug(f'Load model: type=Kandinsky22 repo="{repo_id}" config={diffusers_load_config} offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype} args={load_args}')
+    pipe = diffusers.KandinskyV22CombinedPipeline.from_pretrained(
+        repo_id,
+        cache_dir=shared.opts.diffusers_dir,
+        **load_args,
+    )
+    sd_hijack_te.init_hijack(pipe)
+    devices.torch_gc(force=True, reason='load')
+    return pipe
+
+
+def load_kandinsky3(checkpoint_info, diffusers_load_config={}):
+    repo_id = sd_models.path_to_repo(checkpoint_info)
+    sd_models.hf_auth_check(checkpoint_info)
+
+    load_args, _quant_args = model_quant.get_dit_args(diffusers_load_config)
+    shared.log.debug(f'Load model: type=Kandinsky30 repo="{repo_id}" config={diffusers_load_config} offload={shared.opts.diffusers_offload_mode} dtype={devices.dtype} args={load_args}')
+
+    unet = generic.load_transformer(repo_id, cls_name=diffusers.Kandinsky3UNet, load_config=diffusers_load_config, subfolder="unet", variant="fp16")
+    text_encoder = generic.load_text_encoder(repo_id, cls_name=transformers.T5EncoderModel, load_config=diffusers_load_config, subfolder="text_encoder", variant="fp16")
+
+    pipe = diffusers.Kandinsky3Pipeline.from_pretrained(
+        repo_id,
+        unet=unet,
+        text_encoder=text_encoder,
+        variant="fp16",
+        cache_dir=shared.opts.diffusers_dir,
+        **load_args,
+    )
+    pipe.task_args = {
+        'output_type': 'np',
+    }
+
+    del text_encoder
+    del unet
+    sd_hijack_te.init_hijack(pipe)
+    devices.torch_gc(force=True, reason='load')
+    return pipe