OpenVINO fixes

2023-08-21 21:34:53 +03:00 · 2023-08-21 21:34:53 +03:00 · ebf7b90e3e
parent 698c8d56cd
commit ebf7b90e3e
5 changed files with 109 additions and 15 deletions
--- a/.gitignore
+++ b/.gitignore
@ -15,6 +15,7 @@ pnpm-lock.yaml
 package-lock.json
 venv
 .history
+cache

 # all models and temp files
 *.log
--- a/README.md
+++ b/README.md
@ -68,7 +68,8 @@ Additional models will be added as they become available and there is public int
  Support will be extended to *Windows* once AMD releases ROCm for Windows
 - Any GPU compatibile with *DirectX* on *Windows* using **DirectML** libraries.  
  This includes support for AMD GPUs that are not supported by native ROCm libraries
- *Intel Arc* GPUs using *Intel OneAPI* **Ipex/XPU** libraries  
+- *Intel Arc* GPUs using *Intel OneAPI* **Ipex/XPU** libraries
+- *Intel* iGPUs using *Intel OneAPI* **OpenVINO** libraries
 - *Apple M1/M2* on *OSX* using built-in support in Torch with **MPS** optimizations

 ## Install & Run
@ -94,8 +95,8 @@ Once SD.Next is installed, simply run `webui.bat` (*Windows*) or `webui.sh` (*Li
 Below is partial list of all available parameters, run `webui --help` for the full list:

    Setup options:
-      --use-ipex                       Use Intel OneAPI XPU backend, default: False
      --use-directml                   Use DirectML if no compatible GPU is detected, default: False
+      --use-ipex                       Force use Intel OneAPI XPU backend, default: False
      --use-cuda                       Force use nVidia CUDA backend, default: False
      --use-rocm                       Force use AMD ROCm backend, default: False
      --skip-update                    Skip update of extensions and submodules, default: False
--- a/installer.py
+++ b/installer.py
@ -458,6 +458,7 @@ def check_torch():
        install('hidet', 'hidet')
    if opts.get('cuda_compile_backend', '') == 'openvino_fx':
        install('openvino==2023.1.0.dev20230811', 'openvino')
+        os.environ.setdefault('PYTORCH_TRACING_MODE', 'TORCHFX')
    if args.profile:
        print_profile(pr, 'Torch')

@ -829,8 +830,8 @@ def add_args(parser):
    group.add_argument('--upgrade', default = False, action='store_true', help = "Upgrade main repository to latest version, default: %(default)s")
    group.add_argument('--requirements', default = False, action='store_true', help = "Force re-check of requirements, default: %(default)s")
    group.add_argument('--quick', default = False, action='store_true', help = "Run with startup sequence only, default: %(default)s")
-    group.add_argument("--use-ipex", default = False, action='store_true', help="Use Intel OneAPI XPU backend, default: %(default)s")
    group.add_argument('--use-directml', default = False, action='store_true', help = "Use DirectML if no compatible GPU is detected, default: %(default)s")
+    group.add_argument("--use-ipex", default = False, action='store_true', help="Force use Intel OneAPI XPU backend, default: %(default)s")
    group.add_argument("--use-cuda", default=False, action='store_true', help="Force use nVidia CUDA backend, default: %(default)s")
    group.add_argument("--use-rocm", default=False, action='store_true', help="Force use AMD ROCm backend, default: %(default)s")
    group.add_argument('--skip-requirements', default = False, action='store_true', help = "Skips checking and installing requirements, default: %(default)s")
--- a/modules/cmd_args.py
+++ b/modules/cmd_args.py
@ -40,8 +40,8 @@ group.add_argument("--no-download", action='store_true', help="Disable download
 group.add_argument("--profile", action='store_true', help="Run profiler, default: %(default)s")
 group.add_argument("--disable-queue", action='store_true', help="Disable queues, default: %(default)s")
 group.add_argument('--debug', default = False, action='store_true', help = "Run installer with debug logging, default: %(default)s")
-group.add_argument("--use-ipex", default = False, action='store_true', help="Use Intel OneAPI XPU backend, default: %(default)s")
 group.add_argument('--use-directml', default = False, action='store_true', help = "Use DirectML if no compatible GPU is detected, default: %(default)s")
+group.add_argument("--use-ipex", default = False, action='store_true', help="Force use Intel OneAPI XPU backend, default: %(default)s")
 group.add_argument("--use-cuda", default=False, action='store_true', help="Force use nVidia CUDA backend, default: %(default)s")
 group.add_argument("--use-rocm", default=False, action='store_true', help="Force use AMD ROCm backend, default: %(default)s")
 group.add_argument('--subpath', type=str, help='Customize the URL subpath for usage with reverse proxy')
--- a/modules/intel/openvino/init.py
+++ b/modules/intel/openvino/init.py
@ -2,23 +2,114 @@ import os
 import torch
 from openvino.frontend.pytorch.torchdynamo.execute import execute
 from openvino.frontend.pytorch.torchdynamo.partition import Partitioner
+from openvino.runtime import Core, Type, PartialShape
 from torch._dynamo.backends.common import fake_tensor_unsupported
 from torch._dynamo.backends.registry import register_backend
 from torch.fx.experimental.proxy_tensor import make_fx
+from torch._inductor.compile_fx import compile_fx
+from hashlib import sha256
+import modules.shared
+
+class ModelState:
+    def __init__(self):
+        self.recompile = 1
+        self.device = "CPU"
+        self.height = 512
+        self.width = 512
+        self.batch_size = 1
+        self.mode = 0
+        self.partition_id = 0
+        self.model_hash = ""
+
+model_state = ModelState()

@register_backend
@fake_tensor_unsupported
 def openvino_fx(subgraph, example_inputs):
-    if os.getenv("OPENVINO_TORCH_BACKEND_DEVICE") is None:
-        os.environ.setdefault("OPENVINO_TORCH_BACKEND_DEVICE", "GPU")
+    try:
+        executor_parameters = None
+        core = Core()
+        if os.getenv("OPENVINO_TORCH_MODEL_CACHING") != "0":
+            os.environ.setdefault('OPENVINO_TORCH_MODEL_CACHING', "1")
+            model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest()
+            model_hash_str_file = model_hash_str + str(model_state.partition_id)
+            model_state.partition_id = model_state.partition_id + 1
+            executor_parameters = {"model_hash_str": model_hash_str}

-    model = make_fx(subgraph)(*example_inputs)
-    with torch.no_grad():
-        model.eval()
-    partitioner = Partitioner()
-    compiled_model = partitioner.make_partitions(model)
+        example_inputs.reverse()
+        cache_root = "./cache/"
+        if os.getenv("OPENVINO_TORCH_CACHE_DIR") is not None:
+            cache_root = os.getenv("OPENVINO_TORCH_CACHE_DIR")

-    def _call(*args):
-        res = execute(compiled_model, *args, executor="openvino")
-        return res
-    return _call
+        device = "GPU"
+
+        if os.getenv("OPENVINO_TORCH_BACKEND_DEVICE") is not None:
+            device = os.getenv("OPENVINO_TORCH_BACKEND_DEVICE")
+            assert device in core.available_devices, "Specified device " + device + " is not in the list of OpenVINO Available Devices"
+        else:
+            os.environ.setdefault('OPENVINO_TORCH_BACKEND_DEVICE', device)
+
+        file_name = get_cached_file_name(*example_inputs, model_hash_str=model_hash_str_file, device=device, cache_root=cache_root)
+
+        if file_name is not None and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin"):
+            om = core.read_model(file_name + ".xml")
+
+            dtype_mapping = {
+                torch.float32: Type.f32,
+                torch.float64: Type.f64,
+                torch.float16: Type.f16,
+                torch.int64: Type.i64,
+                torch.int32: Type.i32,
+                torch.uint8: Type.u8,
+                torch.int8: Type.i8,
+                torch.bool: Type.boolean
+            }
+
+            for idx, input_data in enumerate(example_inputs):
+                om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
+                om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
+            om.validate_nodes_and_infer_types()
+
+            if model_hash_str is not None:
+                core.set_property({'CACHE_DIR': cache_root + '/blob'})
+
+            compiled_model = core.compile_model(om, device)
+            def _call(*args):
+                ov_inputs = [a.detach().cpu().numpy() for a in args]
+                ov_inputs.reverse()
+                res = compiled_model(ov_inputs)
+                result = [torch.from_numpy(res[out]) for out in compiled_model.outputs]
+                return result
+            return _call
+        else:
+            example_inputs.reverse()
+            model = make_fx(subgraph)(*example_inputs)
+            with torch.no_grad():
+                model.eval()
+            partitioner = Partitioner()
+            compiled_model = partitioner.make_partitions(model)
+
+            def _call(*args):
+                res = execute(compiled_model, *args, executor="openvino",
+                              executor_parameters=executor_parameters)
+                return res
+            return _call
+    except Exception:
+        return compile_fx(subgraph, example_inputs)
+
+
+def get_cached_file_name(*args, model_hash_str, device, cache_root):
+    file_name = None
+    if model_hash_str is not None:
+        model_cache_dir = cache_root + "/model/"
+        try:
+            os.makedirs(model_cache_dir, exist_ok=True)
+            file_name = model_cache_dir + model_hash_str + "_" + device
+            for input_data in args:
+                if file_name is not None:
+                    file_name += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "")
+        except OSError as error:
+            print("Cache directory ", cache_root, " cannot be created. Model caching is disabled. Error: ", error)
+            file_name = None
+            model_hash_str = None
+    return file_name