automatic/cli/torch-compile.py

#!/usr/bin/env python
# pylint: disable=cell-var-from-loop
"""
Test Torch Dynamo functionality and backends
"""
import json
import warnings

import numpy as np
import torch
from torchvision.models import resnet18


print('torch:', torch.__version__)
try:
    # must be imported explicitly or namespace is not found
    import torch._dynamo as dynamo # pylint: disable=ungrouped-imports
except Exception as err:
    print('torch without dynamo support', err)


N_ITERS = 20
torch._dynamo.config.verbose=True # pylint: disable=protected-access
warnings.filterwarnings('ignore', category=UserWarning) # disable those for now as many backends reports tons
# torch.set_float32_matmul_precision('high') # enable to test in fp32


def timed(fn): # returns the result of running `fn()` and the time it took for `fn()` to run in ms using CUDA events
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)
    start.record()
    result = fn()
    end.record()
    torch.cuda.synchronize()
    return result, start.elapsed_time(end)


def generate_data(b):
    return (
        torch.randn(b, 3, 128, 128).to(torch.float32).cuda(),
        torch.randint(1000, (b,)).cuda(),
    )


def init_model():
    return resnet18().to(torch.float32).cuda()


def evaluate(mod, val):
    return mod(val)


if __name__ == '__main__':
    # first pass, dynamo is going to be slower as it compiles
    model = init_model()
    inp = generate_data(16)[0]

    # repeat test
    results = {}
    times = []
    print('eager initial eval:', timed(lambda: evaluate(model, inp))[1])
    for _i in range(N_ITERS):
        inp = generate_data(16)[0]
        _res, time = timed(lambda: evaluate(model, inp)) # noqa: B023
        times.append(time)
    results['default'] = np.median(times)

    print('dynamo available backends:', dynamo.list_backends())
    for backend in dynamo.list_backends():
        try:
            # required before changing backends
            torch._dynamo.reset() # pylint: disable=protected-access
            eval_dyn = dynamo.optimize(backend)(evaluate)
            print('dynamo initial eval:', backend, timed(lambda: eval_dyn(model, inp))[1]) # noqa: B023
            times = []
            for _i in range(N_ITERS):
                inp = generate_data(16)[0]
                _res, time = timed(lambda: eval_dyn(model, inp)) # noqa: B023
                times.append(time)
            results[backend] = np.median(times)
        except Exception as err:
            lines = str(err).split('\n')
            print('dyanmo backend failed:', backend, lines[0]) # print just first error line as backtraces can be quite long
            results[backend] = 'error'

    # print stats
    print(json.dumps(results, indent = 4))

"""
Reference: <https://github.com/pytorch/pytorch/blob/4f4b62e4a255708e928445b6502139d5962974fa/docs/source/dynamo/get-started.rst>
Training & Inference backends:
    dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels
    dynamo.optimize("aot_nvfuser") - nvFuser with AotAutograd
    dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd
Inference-only backends:
    dynamo.optimize("ofi") - Uses Torchscript optimize_for_inference
    dynamo.optimize("fx2trt") - Uses Nvidia TensorRT for inference optimizations
    dynamo.optimize("onnxrt") - Uses ONNXRT for inference on CPU/GPU
"""