mirror of https://github.com/vladmandic/automatic
100 lines
3.4 KiB
Python
Executable File
100 lines
3.4 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# pylint: disable=cell-var-from-loop
|
|
"""
|
|
Test Torch Dynamo functionality and backends
|
|
"""
|
|
import json
|
|
import warnings
|
|
|
|
import numpy as np
|
|
import torch
|
|
from torchvision.models import resnet18
|
|
|
|
|
|
print('torch:', torch.__version__)
|
|
try:
|
|
# must be imported explicitly or namespace is not found
|
|
import torch._dynamo as dynamo # pylint: disable=ungrouped-imports
|
|
except Exception as err:
|
|
print('torch without dynamo support', err)
|
|
|
|
|
|
N_ITERS = 20
|
|
torch._dynamo.config.verbose=True # pylint: disable=protected-access
|
|
warnings.filterwarnings('ignore', category=UserWarning) # disable those for now as many backends reports tons
|
|
# torch.set_float32_matmul_precision('high') # enable to test in fp32
|
|
|
|
|
|
def timed(fn): # returns the result of running `fn()` and the time it took for `fn()` to run in ms using CUDA events
|
|
start = torch.cuda.Event(enable_timing=True)
|
|
end = torch.cuda.Event(enable_timing=True)
|
|
start.record()
|
|
result = fn()
|
|
end.record()
|
|
torch.cuda.synchronize()
|
|
return result, start.elapsed_time(end)
|
|
|
|
|
|
def generate_data(b):
|
|
return (
|
|
torch.randn(b, 3, 128, 128).to(torch.float32).cuda(),
|
|
torch.randint(1000, (b,)).cuda(),
|
|
)
|
|
|
|
|
|
def init_model():
|
|
return resnet18().to(torch.float32).cuda()
|
|
|
|
|
|
def evaluate(mod, val):
|
|
return mod(val)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# first pass, dynamo is going to be slower as it compiles
|
|
model = init_model()
|
|
inp = generate_data(16)[0]
|
|
|
|
# repeat test
|
|
results = {}
|
|
times = []
|
|
print('eager initial eval:', timed(lambda: evaluate(model, inp))[1])
|
|
for _i in range(N_ITERS):
|
|
inp = generate_data(16)[0]
|
|
_res, time = timed(lambda: evaluate(model, inp)) # noqa: B023
|
|
times.append(time)
|
|
results['default'] = np.median(times)
|
|
|
|
print('dynamo available backends:', dynamo.list_backends())
|
|
for backend in dynamo.list_backends():
|
|
try:
|
|
# required before changing backends
|
|
torch._dynamo.reset() # pylint: disable=protected-access
|
|
eval_dyn = dynamo.optimize(backend)(evaluate)
|
|
print('dynamo initial eval:', backend, timed(lambda: eval_dyn(model, inp))[1]) # noqa: B023
|
|
times = []
|
|
for _i in range(N_ITERS):
|
|
inp = generate_data(16)[0]
|
|
_res, time = timed(lambda: eval_dyn(model, inp)) # noqa: B023
|
|
times.append(time)
|
|
results[backend] = np.median(times)
|
|
except Exception as err:
|
|
lines = str(err).split('\n')
|
|
print('dyanmo backend failed:', backend, lines[0]) # print just first error line as backtraces can be quite long
|
|
results[backend] = 'error'
|
|
|
|
# print stats
|
|
print(json.dumps(results, indent = 4))
|
|
|
|
"""
|
|
Reference: <https://github.com/pytorch/pytorch/blob/4f4b62e4a255708e928445b6502139d5962974fa/docs/source/dynamo/get-started.rst>
|
|
Training & Inference backends:
|
|
dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels
|
|
dynamo.optimize("aot_nvfuser") - nvFuser with AotAutograd
|
|
dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd
|
|
Inference-only backends:
|
|
dynamo.optimize("ofi") - Uses Torchscript optimize_for_inference
|
|
dynamo.optimize("fx2trt") - Uses Nvidia TensorRT for inference optimizations
|
|
dynamo.optimize("onnxrt") - Uses ONNXRT for inference on CPU/GPU
|
|
"""
|