diff --git a/html/locale_en.json b/html/locale_en.json
index cbfd34342..c7b5c77f3 100644
--- a/html/locale_en.json
+++ b/html/locale_en.json
@@ -562,18 +562,17 @@
   {"id":"","label":"Token merging ratio","localized":"","hint":"Enable redundant token merging via tomesd for speed and memory improvements, 0=disabled"},
   {"id":"","label":"Token merging ratio for img2img","localized":"","hint":"Enable redundant token merging for img2img via tomesd for speed and memory improvements, 0=disabled"},
   {"id":"","label":"Token merging ratio for hires pass","localized":"","hint":"Enable redundant token merging for hires pass via tomesd for speed and memory improvements, 0=disabled"},
-  {"id":"","label":"Diffusers allow loading from safetensors files","localized":"","hint":"Allow loading of safetensors files as diffuser models"},
   {"id":"","label":"Select diffuser pipeline when loading from safetensors","localized":"","hint":""},
   {"id":"","label":"Move base model to CPU when using refiner","localized":"","hint":""},
   {"id":"","label":"Move refiner model to CPU when not in use","localized":"","hint":""},
   {"id":"","label":"Move UNet to CPU while VAE decoding","localized":"","hint":""},
   {"id":"","label":"Use model EMA weights when possible","localized":"","hint":""},
   {"id":"","label":"Generator device","localized":"","hint":""},
-  {"id":"","label":"Enable sequential CPU offload","localized":"","hint":"Reduces GPU memory usage by transferring weights to the CPU. Increases inference time approximately 10%. Use with Enable Attention slicing for minimal memory consumption"},
-  {"id":"","label":"Enable model CPU offload","localized":"","hint":"Transferring of entire models to the CPU, negligible impact on inference time while still providing some memory savings. Use with Enable Attention slicing for additional memory savings"},
-  {"id":"","label":"Enable VAE slicing","localized":"","hint":"Decodes batch latents one image at a time with limited VRAM. Small performance boost in VAE decode on multi-image batches. Use with Enable Attention slicing"},
-  {"id":"","label":"Enable VAE tiling","localized":"","hint":"Divide large images into overlapping tiles with limited VRAM. Might result in a minor increase in processing time. Use with Enable Attention Slicing"},
-  {"id":"","label":"Enable attention slicing","localized":"","hint":"Performs attention computation in steps instead of all at once. 10% slower inference times. Greatly reduces memory usage. Best used, period"},
+  {"id":"","label":"Enable sequential CPU offload","localized":"","hint":"Reduces GPU memory usage by transferring weights to the CPU. Increases inference time approximately 10%"},
+  {"id":"","label":"Enable model CPU offload","localized":"","hint":"Transferring of entire models to the CPU, negligible impact on inference time while still providing some memory savings"},
+  {"id":"","label":"Enable VAE slicing","localized":"","hint":"Decodes batch latents one image at a time with limited VRAM. Small performance boost in VAE decode on multi-image batches"},
+  {"id":"","label":"Enable VAE tiling","localized":"","hint":"Divide large images into overlapping tiles with limited VRAM. Results in a minor increase in processing time"},
+  {"id":"","label":"Enable attention slicing","localized":"","hint":"Performs attention computation in steps instead of all at once. Slower inference times, but greatly reduced memory usage"},
   {"id":"","label":"Diffusers model loading variant","localized":"","hint":""},
   {"id":"","label":"Diffusers VAE loading variant","localized":"","hint":""},
   {"id":"","label":"Diffusers LoRA loading variant","localized":"","hint":"'sequential apply' loads and applies each LoRA in order of appearance, 'merge and apply' loads all LoRAs and merges them in-memory before applying to model, 'diffusers default' uses single LoRA loading method"}
diff --git a/html/locale_ko.json b/html/locale_ko.json
index 1dc3600e2..f85119fd0 100644
--- a/html/locale_ko.json
+++ b/html/locale_ko.json
@@ -562,7 +562,6 @@
   {"id":"","label":"Token merging ratio","localized":"토큰 병합 비율","hint":"속도와 메모리 절감을 위해 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
   {"id":"","label":"Token merging ratio for img2img","localized":"이미지➠이미지 토큰 병합 비율","hint":"속도와 메모리 절감을 위해 이미지➠이미지에서 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
   {"id":"","label":"Token merging ratio for hires pass","localized":"텍스트➠이미지 업스케일링(Hires fix) 토큰 병합 비율","hint":"속도와 메모리 절감을 위해 Hires fix에서 tomesd를 사용해 토큰 병합을 활성화한다. (0이면 비활성화)"},
-  {"id":"","label":"Diffusers allow loading from safetensors files","localized":"safetensors 파일에서 로드 허용","hint":"safetensors 파일을 Diffusers 모델로 로드할 수 있게 한다."},
   {"id":"","label":"Select diffuser pipeline when loading from safetensors","localized":"safetensors 파일에서 로드할 때 사용할 파이프라인 선택","hint":""},
   {"id":"","label":"Move base model to CPU when using refiner","localized":"리파이너를 사용 중일 때 base 모델을 CPU로 이동","hint":""},
   {"id":"","label":"Move refiner model to CPU when not in use","localized":"사용 중이지 않을 때 리파이너 모델을 CPU로 이동","hint":""},
diff --git a/installer.py b/installer.py
index 1376a5d33..b4bfb7444 100644
--- a/installer.py
+++ b/installer.py
@@ -186,8 +186,6 @@ def install(package, friendly: str = None, ignore: bool = False):
     if args.reinstall or args.upgrade:
         global quick_allowed # pylint: disable=global-statement
         quick_allowed = False
-    if args.use_ipex and "accelerate==" in package:
-        package = "accelerate==0.20.3"
     if args.reinstall or not installed(package, friendly):
         pip(f"install --upgrade {package}", ignore=ignore)
 
diff --git a/modules/lora_diffusers.py b/modules/lora_diffusers.py
index f8ab54dde..b2fe8fc00 100644
--- a/modules/lora_diffusers.py
+++ b/modules/lora_diffusers.py
@@ -24,10 +24,10 @@ def unload_diffusers_lora():
             lora_state['all_loras'].reverse()
             lora_state['multiplier'].reverse()
             for i, lora_network in enumerate(lora_state['all_loras']):
-              if shared.opts.diffusers_lora_loader == "merge and apply":
-                lora_network.restore_from(multiplier=lora_state['multiplier'][i])
-              if shared.opts.diffusers_lora_loader == "sequential apply":
-                lora_network.unapply_to()
+                if shared.opts.diffusers_lora_loader == "merge and apply":
+                    lora_network.restore_from(multiplier=lora_state['multiplier'][i])
+                if shared.opts.diffusers_lora_loader == "sequential apply":
+                    lora_network.unapply_to()
         lora_state['active'] = False
         lora_state['loaded'] = 0
         lora_state['all_loras'] = []
@@ -45,7 +45,7 @@ def load_diffusers_lora(name, lora, strength = 1.0):
         lora_state['multiplier'].append(strength)
         if shared.opts.diffusers_lora_loader == "diffusers default":
             pipe.load_lora_weights(lora.filename, cache_dir=shared.opts.diffusers_dir, local_files_only=True, lora_scale=strength)
-            shared.log.info(f"Diffusers LoRA loaded: {name} {lora_state['multiplier']}")
+            shared.log.info(f"LoRA loaded: {name} {lora_state['multiplier']}")
         else:
             from safetensors.torch import load_file
             lora_sd = load_file(lora.filename)
@@ -61,7 +61,7 @@ def load_diffusers_lora(name, lora, strength = 1.0):
                 lora_network.to(shared.device, dtype=pipe.unet.dtype)
                 lora_network.apply_to(multiplier=strength)
             lora_state['all_loras'].append(lora_network)
-            shared.log.info(f"Diffusers LoRA loaded: {name} {strength}")
+            shared.log.info(f"LoRA loaded: {name}:{strength} loader={shared.opts.diffusers_lora_loader}")
     except Exception as e:
         shared.log.error(f"Diffusers LoRA loading failed: {name} {e}")
 
@@ -332,7 +332,7 @@ def merge_lora_weights(pipe, weights_sd: Dict, multiplier: float = 1.0):
 
 
 # block weightや学習に対応しない簡易版 / simple version without block weight and training
-class LoRANetwork(torch.nn.Module):
+class LoRANetwork(torch.nn.Module): # pylint: disable=abstract-method
     UNET_TARGET_REPLACE_MODULE = ["Transformer2DModel"]
     UNET_TARGET_REPLACE_MODULE_CONV2D_3X3 = ["ResnetBlock2D", "Downsample2D", "Upsample2D"]
     TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"]
@@ -350,17 +350,17 @@ class LoRANetwork(torch.nn.Module):
         multiplier: float = 1.0,
         modules_dim: Optional[Dict[str, int]] = None,
         modules_alpha: Optional[Dict[str, int]] = None,
-        varbose: Optional[bool] = False,
+        varbose: Optional[bool] = False, # pylint: disable=unused-argument
     ) -> None:
         super().__init__()
         self.multiplier = multiplier
 
-        shared.log.debug("create LoRA network from weights")
+        # shared.log.debug("create LoRA network from weights")
 
         # convert SDXL Stability AI's U-Net modules to Diffusers
         converted = self.convert_unet_modules(modules_dim, modules_alpha)
         if converted:
-            shared.log.debug(f"converted {converted} Stability AI's U-Net LoRA modules to Diffusers (SDXL)")
+            shared.log.debug(f"LoRA convert: modules={converted} SDXL SAI/SGM to Diffusers")
 
         # create module instances
         def create_modules(
@@ -422,18 +422,13 @@ class LoRANetwork(torch.nn.Module):
             text_encoder_loras, skipped = create_modules(False, index, text_encoder, LoRANetwork.TEXT_ENCODER_TARGET_REPLACE_MODULE)
             self.text_encoder_loras.extend(text_encoder_loras)
             skipped_te += skipped
-        shared.log.debug(f"create LoRA for Text Encoder: {len(self.text_encoder_loras)} modules.")
-        if len(skipped_te) > 0:
-            shared.log.debug(f"skipped {len(skipped_te)} modules because of missing weight.")
 
         # extend U-Net target modules to include Conv2d 3x3
         target_modules = LoRANetwork.UNET_TARGET_REPLACE_MODULE + LoRANetwork.UNET_TARGET_REPLACE_MODULE_CONV2D_3X3
 
         self.unet_loras: List[LoRAModule]
         self.unet_loras, skipped_un = create_modules(True, None, unet, target_modules)
-        shared.log.debug(f"create LoRA for U-Net: {len(self.unet_loras)} modules.")
-        if len(skipped_un) > 0:
-            shared.log.debug(f"skipped {len(skipped_un)} modules because of missing weight.")
+        shared.log.debug(f"LoRA modules loaded/skipped: te={len(self.text_encoder_loras)}/{len(skipped_te)} unet={len(self.unet_loras)}/skip={len(skipped_un)}")
 
         # assertion
         names = set()
@@ -480,11 +475,11 @@ class LoRANetwork(torch.nn.Module):
 
     def apply_to(self, multiplier=1.0, apply_text_encoder=True, apply_unet=True):
         if apply_text_encoder:
-            shared.log.debug("enable LoRA for text encoder")
+            # shared.log.debug("LoRA apply for text encoder")
             for lora in self.text_encoder_loras:
                 lora.apply_to(multiplier)
         if apply_unet:
-            shared.log.debug("enable LoRA for U-Net")
+            # shared.log.debug("LoRA apply for U-Net")
             for lora in self.unet_loras:
                 lora.apply_to(multiplier)
 
@@ -493,16 +488,14 @@ class LoRANetwork(torch.nn.Module):
             lora.unapply_to()
 
     def merge_to(self, multiplier=1.0):
-        shared.log.debug("merge LoRA weights to original weights")
+        # shared.log.debug("LoRA merge weights for text encoder")
         for lora in tqdm(self.text_encoder_loras + self.unet_loras):
             lora.merge_to(multiplier)
-        shared.log.debug("weights are merged")
 
     def restore_from(self, multiplier=1.0):
-        shared.log.debug("restore LoRA weights from original weights")
+        # shared.log.debug("LoRA restore weights")
         for lora in tqdm(self.text_encoder_loras + self.unet_loras):
             lora.restore_from(multiplier)
-        shared.log.debug("weights are restored")
 
     def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
         # convert SDXL Stability AI's state dict to Diffusers' based state dict
@@ -527,4 +520,3 @@ class LoRANetwork(torch.nn.Module):
                 state_dict[key] = state_dict[key].view(my_state_dict[key].size())
 
         return super().load_state_dict(state_dict, strict)
-
diff --git a/modules/processing_diffusers.py b/modules/processing_diffusers.py
index cf84055d2..66dfbf25f 100644
--- a/modules/processing_diffusers.py
+++ b/modules/processing_diffusers.py
@@ -52,6 +52,24 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
         imgs = model.image_processor.postprocess(decoded, output_type=output_type)
         return imgs
 
+    def fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2):
+        if type(prompts) is str:
+            prompts = [prompts]
+        if type(negative_prompts) is str:
+            negative_prompts = [negative_prompts]
+        while len(negative_prompts) < len(prompts):
+            negative_prompts.append(negative_prompts[-1])
+        if type(prompts_2) is str:
+            prompts_2 = [prompts_2]
+        if type(prompts_2) is list:
+            while len(prompts_2) < len(prompts):
+                prompts_2.append(prompts_2[-1])
+        if type(negative_prompts_2) is str:
+            negative_prompts_2 = [negative_prompts_2]
+        if type(negative_prompts_2) is list:
+            while len(negative_prompts_2) < len(prompts_2):
+                negative_prompts_2.append(negative_prompts_2[-1])
+        return prompts, negative_prompts, prompts_2, negative_prompts_2
 
     def set_pipeline_args(model, prompts: list, negative_prompts: list, prompts_2: typing.Optional[list]=None, negative_prompts_2: typing.Optional[list]=None, is_refiner: bool=False, **kwargs):
         args = {}
@@ -64,6 +82,7 @@ def process_diffusers(p: StableDiffusionProcessing, seeds, prompts, negative_pro
         pooled = None
         negative_embed = None
         negative_pooled = None
+        prompts, negative_prompts, prompts_2, negative_prompts_2 = fix_prompts(prompts, negative_prompts, prompts_2, negative_prompts_2)
         if shared.opts.data['prompt_attention'] in {'Compel parser', 'Full parser'}:
             prompt_embed, pooled, negative_embed, negative_pooled = prompt_parser_diffusers.compel_encode_prompts(model,
                                                                                                                   prompts,
diff --git a/modules/prompt_parser_diffusers.py b/modules/prompt_parser_diffusers.py
index 559845059..eaa542c35 100644
--- a/modules/prompt_parser_diffusers.py
+++ b/modules/prompt_parser_diffusers.py
@@ -47,7 +47,12 @@ def compel_encode_prompts(
     negative_embeds = []
     negative_pooleds = []
     for i in range(len(prompts)):
-        prompt_embed, positive_pooled, negative_embed, negative_pooled = compel_encode_prompt(pipeline, prompts[i], negative_prompts[i], prompts_2[i], negative_prompts_2[i], is_refiner, clip_skip)
+        prompt_embed, positive_pooled, negative_embed, negative_pooled = compel_encode_prompt(pipeline,
+                                                                                              prompts[i],
+                                                                                              negative_prompts[i],
+                                                                                              prompts_2[i] if prompts_2 is not None else None,
+                                                                                              negative_prompts_2[i] if negative_prompts_2 is not None else None,
+                                                                                              is_refiner, clip_skip)
         prompt_embeds.append(prompt_embed)
         positive_pooleds.append(positive_pooled)
         negative_embeds.append(negative_embed)
diff --git a/modules/sd_models.py b/modules/sd_models.py
index 02a4383d6..48775d2e4 100644
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -136,12 +136,9 @@ def list_models():
     checkpoints_list.clear()
     checkpoint_aliases.clear()
     ext_filter=[".safetensors"] if shared.opts.sd_disable_ckpt else [".ckpt", ".safetensors"]
-    model_list = []
-    if shared.backend == shared.Backend.ORIGINAL or shared.opts.diffusers_allow_safetensors:
-        model_list += modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"])
+    model_list = modelloader.load_models(model_path=model_path, model_url=None, command_path=shared.opts.ckpt_dir, ext_filter=ext_filter, download_name=None, ext_blacklist=[".vae.ckpt", ".vae.safetensors"])
     if shared.backend == shared.Backend.DIFFUSERS:
         model_list += modelloader.load_diffusers_models(model_path=os.path.join(models_path, 'Diffusers'), command_path=shared.opts.diffusers_dir)
-
     for filename in sorted(model_list, key=str.lower):
         checkpoint_info = CheckpointInfo(filename)
         if checkpoint_info.name is not None:
@@ -844,7 +841,6 @@ def set_diffuser_pipe(pipe, new_pipe_type):
         new_pipe = diffusers.AutoPipelineForImage2Image.from_pipe(pipe)
     elif new_pipe_type == DiffusersTaskType.INPAINTING:
         new_pipe = diffusers.AutoPipelineForInpainting.from_pipe(pipe)
-
     if pipe.__class__ == new_pipe.__class__:
         return
 
@@ -1030,20 +1026,35 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model')
     shared.log.info(f"Weights loaded in {timer.summary()}")
 
 
+def disable_offload(sd_model):
+    from accelerate.hooks import remove_hook_from_module
+    if not sd_model.has_accelerate:
+        return
+    for _name, model in sd_model.components.items():
+        if not isinstance(model, torch.nn.Module):
+            continue
+        remove_hook_from_module(model, recurse=True)
+
+
 def unload_model_weights(op='model'):
     from modules import sd_hijack
     if op == 'model' or op == 'dict':
         if model_data.sd_model:
-            model_data.sd_model.to(devices.cpu)
             if shared.backend == shared.Backend.ORIGINAL:
+                model_data.sd_model.to(devices.cpu)
                 sd_hijack.model_hijack.undo_hijack(model_data.sd_model)
+            else:
+                disable_offload(model_data.sd_model)
+                model_data.sd_model.to('meta')
             model_data.sd_model = None
             shared.log.debug(f'Unload weights {op}: {memory_stats()}')
     else:
         if model_data.sd_refiner:
-            model_data.sd_refiner.to(devices.cpu)
+            model_data.sd_refiner.to('meta')
             if shared.backend == shared.Backend.ORIGINAL:
                 sd_hijack.model_hijack.undo_hijack(model_data.sd_refiner)
+            else:
+                disable_offload(model_data.sd_model)
             model_data.sd_refiner = None
             shared.log.debug(f'Unload weights {op}: {memory_stats()}')
     devices.torch_gc(force=True)
diff --git a/modules/shared.py b/modules/shared.py
index b156a4cb5..945f2a27f 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -395,11 +395,10 @@ options_templates.update(options_section(('cuda', "Compute Settings"), {
 }))
 
 options_templates.update(options_section(('diffusers', "Diffusers Settings"), {
-    "diffusers_allow_safetensors": OptionInfo(True, 'Diffusers allow loading from safetensors files'),
     "diffusers_pipeline": OptionInfo(pipelines[0], 'Diffusers pipeline', gr.Dropdown, lambda: {"choices": pipelines}),
     "diffusers_move_base": OptionInfo(False, "Move base model to CPU when using refiner"),
+    "diffusers_move_unet": OptionInfo(False, "Move base model to CPU when using VAE"),
     "diffusers_move_refiner": OptionInfo(True, "Move refiner model to CPU when not in use"),
-    "diffusers_move_unet": OptionInfo(False, "Move UNet to CPU while VAE decoding"),
     "diffusers_extract_ema": OptionInfo(True, "Use model EMA weights when possible"),
     "diffusers_generator_device": OptionInfo("default", "Generator device", gr.Radio, lambda: {"choices": ["default", "cpu"]}),
     "diffusers_seq_cpu_offload": OptionInfo(False, "Enable sequential CPU offload"),
diff --git a/requirements.txt b/requirements.txt
index 7dac5b439..55da323bf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -46,7 +46,7 @@ typing-extensions==4.7.1
 antlr4-python3-runtime==4.9.3
 requests==2.31.0
 tqdm==4.65.0
-accelerate==0.21.0
+accelerate==0.20.3
 opencv-python-headless==4.7.0.72
 diffusers==0.19.3
 einops==0.4.1