added option to mask body using ControlNet seg
parent
d729c016fa
commit
eccaca12ff
|
|
@ -0,0 +1,53 @@
|
|||
from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot
|
||||
from annotator.uniformer.mmseg.core.evaluation import get_palette
|
||||
from modules.shared import extensions
|
||||
from modules.paths import models_path
|
||||
from modules import devices
|
||||
import os
|
||||
|
||||
modeldir = os.path.join(models_path, "uniformer")
|
||||
checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth"
|
||||
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "exp", "upernet_global_small", "config.py")
|
||||
old_modeldir = os.path.dirname(os.path.realpath(__file__))
|
||||
model = None
|
||||
|
||||
def unload_uniformer_model():
|
||||
global model
|
||||
if model is not None:
|
||||
model = model.cpu()
|
||||
|
||||
def apply_uniformer(img):
|
||||
global model
|
||||
if model is None:
|
||||
modelpath = os.path.join(modeldir, "upernet_global_small.pth")
|
||||
old_modelpath = os.path.join(old_modeldir, "upernet_global_small.pth")
|
||||
if os.path.exists(old_modelpath):
|
||||
modelpath = old_modelpath
|
||||
elif not os.path.exists(modelpath):
|
||||
from basicsr.utils.download_util import load_file_from_url
|
||||
load_file_from_url(checkpoint_file, model_dir=modeldir)
|
||||
|
||||
model = init_segmentor(config_file, modelpath)
|
||||
model = model.to(devices.get_device_for("controlnet"))
|
||||
|
||||
if devices.get_device_for("controlnet").type == 'mps':
|
||||
# adaptive_avg_pool2d can fail on MPS, workaround with CPU
|
||||
import torch.nn.functional
|
||||
|
||||
orig_adaptive_avg_pool2d = torch.nn.functional.adaptive_avg_pool2d
|
||||
def cpu_if_exception(input, *args, **kwargs):
|
||||
try:
|
||||
return orig_adaptive_avg_pool2d(input, *args, **kwargs)
|
||||
except:
|
||||
return orig_adaptive_avg_pool2d(input.cpu(), *args, **kwargs).to(input.device)
|
||||
|
||||
try:
|
||||
torch.nn.functional.adaptive_avg_pool2d = cpu_if_exception
|
||||
result = inference_segmentor(model, img)
|
||||
finally:
|
||||
torch.nn.functional.adaptive_avg_pool2d = orig_adaptive_avg_pool2d
|
||||
else:
|
||||
result = inference_segmentor(model, img)
|
||||
|
||||
res_img = show_result_pyplot(model, img, result, get_palette('ade'), opacity=1)
|
||||
return res_img
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# dataset settings
|
||||
dataset_type = 'ADE20KDataset'
|
||||
data_root = 'data/ade/ADEChallengeData2016'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2048, 512),
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/training',
|
||||
ann_dir='annotations/training',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/validation',
|
||||
ann_dir='annotations/validation',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/validation',
|
||||
ann_dir='annotations/validation',
|
||||
pipeline=test_pipeline))
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
# dataset settings
|
||||
dataset_type = 'ChaseDB1Dataset'
|
||||
data_root = 'data/CHASE_DB1'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
img_scale = (960, 999)
|
||||
crop_size = (128, 128)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale,
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img'])
|
||||
])
|
||||
]
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type='RepeatDataset',
|
||||
times=40000,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/training',
|
||||
ann_dir='annotations/training',
|
||||
pipeline=train_pipeline)),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/validation',
|
||||
ann_dir='annotations/validation',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/validation',
|
||||
ann_dir='annotations/validation',
|
||||
pipeline=test_pipeline))
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# dataset settings
|
||||
dataset_type = 'CityscapesDataset'
|
||||
data_root = 'data/cityscapes/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
crop_size = (512, 1024)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2048, 1024),
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=2,
|
||||
workers_per_gpu=2,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='leftImg8bit/train',
|
||||
ann_dir='gtFine/train',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='leftImg8bit/val',
|
||||
ann_dir='gtFine/val',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='leftImg8bit/val',
|
||||
ann_dir='gtFine/val',
|
||||
pipeline=test_pipeline))
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
_base_ = './cityscapes.py'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
crop_size = (769, 769)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2049, 1025),
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
train=dict(pipeline=train_pipeline),
|
||||
val=dict(pipeline=test_pipeline),
|
||||
test=dict(pipeline=test_pipeline))
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
# dataset settings
|
||||
dataset_type = 'DRIVEDataset'
|
||||
data_root = 'data/DRIVE'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
img_scale = (584, 565)
|
||||
crop_size = (64, 64)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale,
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img'])
|
||||
])
|
||||
]
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type='RepeatDataset',
|
||||
times=40000,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/training',
|
||||
ann_dir='annotations/training',
|
||||
pipeline=train_pipeline)),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/validation',
|
||||
ann_dir='annotations/validation',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/validation',
|
||||
ann_dir='annotations/validation',
|
||||
pipeline=test_pipeline))
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
# dataset settings
|
||||
dataset_type = 'HRFDataset'
|
||||
data_root = 'data/HRF'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
img_scale = (2336, 3504)
|
||||
crop_size = (256, 256)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale,
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img'])
|
||||
])
|
||||
]
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type='RepeatDataset',
|
||||
times=40000,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/training',
|
||||
ann_dir='annotations/training',
|
||||
pipeline=train_pipeline)),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/validation',
|
||||
ann_dir='annotations/validation',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/validation',
|
||||
ann_dir='annotations/validation',
|
||||
pipeline=test_pipeline))
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
# dataset settings
|
||||
dataset_type = 'PascalContextDataset'
|
||||
data_root = 'data/VOCdevkit/VOC2010/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
img_scale = (520, 520)
|
||||
crop_size = (480, 480)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale,
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClassContext',
|
||||
split='ImageSets/SegmentationContext/train.txt',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClassContext',
|
||||
split='ImageSets/SegmentationContext/val.txt',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClassContext',
|
||||
split='ImageSets/SegmentationContext/val.txt',
|
||||
pipeline=test_pipeline))
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
# dataset settings
|
||||
dataset_type = 'PascalContextDataset59'
|
||||
data_root = 'data/VOCdevkit/VOC2010/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
img_scale = (520, 520)
|
||||
crop_size = (480, 480)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale,
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClassContext',
|
||||
split='ImageSets/SegmentationContext/train.txt',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClassContext',
|
||||
split='ImageSets/SegmentationContext/val.txt',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClassContext',
|
||||
split='ImageSets/SegmentationContext/val.txt',
|
||||
pipeline=test_pipeline))
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
# dataset settings
|
||||
dataset_type = 'PascalVOCDataset'
|
||||
data_root = 'data/VOCdevkit/VOC2012'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2048, 512),
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClass',
|
||||
split='ImageSets/Segmentation/train.txt',
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClass',
|
||||
split='ImageSets/Segmentation/val.txt',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='JPEGImages',
|
||||
ann_dir='SegmentationClass',
|
||||
split='ImageSets/Segmentation/val.txt',
|
||||
pipeline=test_pipeline))
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = './pascal_voc12.py'
|
||||
# dataset settings
|
||||
data = dict(
|
||||
train=dict(
|
||||
ann_dir=['SegmentationClass', 'SegmentationClassAug'],
|
||||
split=[
|
||||
'ImageSets/Segmentation/train.txt',
|
||||
'ImageSets/Segmentation/aug.txt'
|
||||
]))
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
# dataset settings
|
||||
dataset_type = 'STAREDataset'
|
||||
data_root = 'data/STARE'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
img_scale = (605, 700)
|
||||
crop_size = (128, 128)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale,
|
||||
# img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img'])
|
||||
])
|
||||
]
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
train=dict(
|
||||
type='RepeatDataset',
|
||||
times=40000,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/training',
|
||||
ann_dir='annotations/training',
|
||||
pipeline=train_pipeline)),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/validation',
|
||||
ann_dir='annotations/validation',
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
img_dir='images/validation',
|
||||
ann_dir='annotations/validation',
|
||||
pipeline=test_pipeline))
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
# yapf:disable
|
||||
log_config = dict(
|
||||
interval=50,
|
||||
hooks=[
|
||||
dict(type='TextLoggerHook', by_epoch=False),
|
||||
# dict(type='TensorboardLoggerHook')
|
||||
])
|
||||
# yapf:enable
|
||||
dist_params = dict(backend='nccl')
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume_from = None
|
||||
workflow = [('train', 1)]
|
||||
cudnn_benchmark = True
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='ANNHead',
|
||||
in_channels=[1024, 2048],
|
||||
in_index=[2, 3],
|
||||
channels=512,
|
||||
project_channels=256,
|
||||
query_scales=(1, ),
|
||||
key_pool_scales=(1, 3, 6, 8),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='APCHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='CCHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
recurrence=2,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
backbone=dict(
|
||||
type='CGNet',
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=3,
|
||||
num_channels=(32, 64, 128),
|
||||
num_blocks=(3, 21),
|
||||
dilations=(2, 4),
|
||||
reductions=(8, 16)),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=256,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=0,
|
||||
concat_input=False,
|
||||
dropout_ratio=0,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss',
|
||||
use_sigmoid=False,
|
||||
loss_weight=1.0,
|
||||
class_weight=[
|
||||
2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
|
||||
10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
|
||||
10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
|
||||
10.396974, 10.055647
|
||||
])),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(sampler=None),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='DAHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
pam_channels=64,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='ASPPHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
dilations=(1, 12, 24, 36),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='UNet',
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
dec_num_convs=(2, 2, 2, 2),
|
||||
downsamples=(True, True, True, True),
|
||||
enc_dilations=(1, 1, 1, 1, 1),
|
||||
dec_dilations=(1, 1, 1, 1),
|
||||
with_cp=False,
|
||||
conv_cfg=None,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
upsample_cfg=dict(type='InterpConv'),
|
||||
norm_eval=False),
|
||||
decode_head=dict(
|
||||
type='ASPPHead',
|
||||
in_channels=64,
|
||||
in_index=4,
|
||||
channels=16,
|
||||
dilations=(1, 12, 24, 36),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
in_index=3,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='slide', crop_size=256, stride=170))
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='DepthwiseSeparableASPPHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
dilations=(1, 12, 24, 36),
|
||||
c1_in_channels=256,
|
||||
c1_channels=48,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='DMHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
filter_sizes=(1, 3, 5, 7),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='DNLHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
reduction=2,
|
||||
use_scale=True,
|
||||
mode='embedded_gaussian',
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='EMAHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=256,
|
||||
ema_channels=512,
|
||||
num_bases=64,
|
||||
num_stages=3,
|
||||
momentum=0.1,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='EncHead',
|
||||
in_channels=[512, 1024, 2048],
|
||||
in_index=(1, 2, 3),
|
||||
channels=512,
|
||||
num_codes=32,
|
||||
use_se_loss=True,
|
||||
add_lateral=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
loss_se_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
backbone=dict(
|
||||
type='FastSCNN',
|
||||
downsample_dw_channels=(32, 48),
|
||||
global_in_channels=64,
|
||||
global_block_channels=(64, 96, 128),
|
||||
global_block_strides=(2, 2, 1),
|
||||
global_out_channels=128,
|
||||
higher_in_channels=64,
|
||||
lower_in_channels=128,
|
||||
fusion_out_channels=128,
|
||||
out_indices=(0, 1, 2),
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False),
|
||||
decode_head=dict(
|
||||
type='DepthwiseSeparableFCNHead',
|
||||
in_channels=128,
|
||||
channels=128,
|
||||
concat_input=False,
|
||||
num_classes=19,
|
||||
in_index=-1,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=32,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=-2,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=64,
|
||||
channels=32,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=-3,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://msra/hrnetv2_w18',
|
||||
backbone=dict(
|
||||
type='HRNet',
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
extra=dict(
|
||||
stage1=dict(
|
||||
num_modules=1,
|
||||
num_branches=1,
|
||||
block='BOTTLENECK',
|
||||
num_blocks=(4, ),
|
||||
num_channels=(64, )),
|
||||
stage2=dict(
|
||||
num_modules=1,
|
||||
num_branches=2,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4),
|
||||
num_channels=(18, 36)),
|
||||
stage3=dict(
|
||||
num_modules=4,
|
||||
num_branches=3,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4, 4),
|
||||
num_channels=(18, 36, 72)),
|
||||
stage4=dict(
|
||||
num_modules=3,
|
||||
num_branches=4,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4, 4, 4),
|
||||
num_channels=(18, 36, 72, 144)))),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=[18, 36, 72, 144],
|
||||
in_index=(0, 1, 2, 3),
|
||||
channels=sum([18, 36, 72, 144]),
|
||||
input_transform='resize_concat',
|
||||
kernel_size=1,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=-1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
num_convs=2,
|
||||
concat_input=True,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='UNet',
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
dec_num_convs=(2, 2, 2, 2),
|
||||
downsamples=(True, True, True, True),
|
||||
enc_dilations=(1, 1, 1, 1, 1),
|
||||
dec_dilations=(1, 1, 1, 1),
|
||||
with_cp=False,
|
||||
conv_cfg=None,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
upsample_cfg=dict(type='InterpConv'),
|
||||
norm_eval=False),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=64,
|
||||
in_index=4,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
in_index=3,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='slide', crop_size=256, stride=170))
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 1, 1),
|
||||
strides=(1, 2, 2, 2),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
num_outs=4),
|
||||
decode_head=dict(
|
||||
type='FPNHead',
|
||||
in_channels=[256, 256, 256, 256],
|
||||
in_index=[0, 1, 2, 3],
|
||||
feature_strides=[4, 8, 16, 32],
|
||||
channels=128,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
backbone=dict(
|
||||
type='UniFormer',
|
||||
embed_dim=[64, 128, 320, 512],
|
||||
layers=[3, 4, 8, 3],
|
||||
head_dim=64,
|
||||
mlp_ratio=4.,
|
||||
qkv_bias=True,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.1),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[64, 128, 320, 512],
|
||||
out_channels=256,
|
||||
num_outs=4),
|
||||
decode_head=dict(
|
||||
type='FPNHead',
|
||||
in_channels=[256, 256, 256, 256],
|
||||
in_index=[0, 1, 2, 3],
|
||||
feature_strides=[4, 8, 16, 32],
|
||||
channels=128,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=150,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole')
|
||||
)
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='GCHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
ratio=1 / 4.,
|
||||
pooling_type='att',
|
||||
fusion_types=('channel_add', ),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
backbone=dict(
|
||||
type='MobileNetV3',
|
||||
arch='large',
|
||||
out_indices=(1, 3, 16),
|
||||
norm_cfg=norm_cfg),
|
||||
decode_head=dict(
|
||||
type='LRASPPHead',
|
||||
in_channels=(16, 24, 960),
|
||||
in_index=(0, 1, 2),
|
||||
channels=128,
|
||||
input_transform='multiple_select',
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='NLHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
reduction=2,
|
||||
use_scale=True,
|
||||
mode='embedded_gaussian',
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='CascadeEncoderDecoder',
|
||||
num_stages=2,
|
||||
pretrained='open-mmlab://msra/hrnetv2_w18',
|
||||
backbone=dict(
|
||||
type='HRNet',
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
extra=dict(
|
||||
stage1=dict(
|
||||
num_modules=1,
|
||||
num_branches=1,
|
||||
block='BOTTLENECK',
|
||||
num_blocks=(4, ),
|
||||
num_channels=(64, )),
|
||||
stage2=dict(
|
||||
num_modules=1,
|
||||
num_branches=2,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4),
|
||||
num_channels=(18, 36)),
|
||||
stage3=dict(
|
||||
num_modules=4,
|
||||
num_branches=3,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4, 4),
|
||||
num_channels=(18, 36, 72)),
|
||||
stage4=dict(
|
||||
num_modules=3,
|
||||
num_branches=4,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4, 4, 4),
|
||||
num_channels=(18, 36, 72, 144)))),
|
||||
decode_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=[18, 36, 72, 144],
|
||||
channels=sum([18, 36, 72, 144]),
|
||||
in_index=(0, 1, 2, 3),
|
||||
input_transform='resize_concat',
|
||||
kernel_size=1,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=-1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='OCRHead',
|
||||
in_channels=[18, 36, 72, 144],
|
||||
in_index=(0, 1, 2, 3),
|
||||
input_transform='resize_concat',
|
||||
channels=512,
|
||||
ocr_channels=256,
|
||||
dropout_ratio=-1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='CascadeEncoderDecoder',
|
||||
num_stages=2,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='OCRHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
ocr_channels=256,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='CascadeEncoderDecoder',
|
||||
num_stages=2,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 1, 1),
|
||||
strides=(1, 2, 2, 2),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
num_outs=4),
|
||||
decode_head=[
|
||||
dict(
|
||||
type='FPNHead',
|
||||
in_channels=[256, 256, 256, 256],
|
||||
in_index=[0, 1, 2, 3],
|
||||
feature_strides=[4, 8, 16, 32],
|
||||
channels=128,
|
||||
dropout_ratio=-1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='PointHead',
|
||||
in_channels=[256],
|
||||
in_index=[0],
|
||||
channels=256,
|
||||
num_fcs=3,
|
||||
coarse_pred_each_layer=True,
|
||||
dropout_ratio=-1,
|
||||
num_classes=19,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75),
|
||||
test_cfg=dict(
|
||||
mode='whole',
|
||||
subdivision_steps=2,
|
||||
subdivision_num_points=8196,
|
||||
scale_factor=2))
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='PSAHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
mask_size=(97, 97),
|
||||
psa_type='bi-direction',
|
||||
compact=False,
|
||||
shrink_factor=2,
|
||||
normalization_factor=1.0,
|
||||
psa_softmax=True,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='PSPHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='UNet',
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
dec_num_convs=(2, 2, 2, 2),
|
||||
downsamples=(True, True, True, True),
|
||||
enc_dilations=(1, 1, 1, 1, 1),
|
||||
dec_dilations=(1, 1, 1, 1),
|
||||
with_cp=False,
|
||||
conv_cfg=None,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
upsample_cfg=dict(type='InterpConv'),
|
||||
norm_eval=False),
|
||||
decode_head=dict(
|
||||
type='PSPHead',
|
||||
in_channels=64,
|
||||
in_index=4,
|
||||
channels=16,
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
in_index=3,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='slide', crop_size=256, stride=170))
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 1, 1),
|
||||
strides=(1, 2, 2, 2),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='UPerHead',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
in_index=[0, 1, 2, 3],
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
# model settings
|
||||
norm_cfg = dict(type='BN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='UniFormer',
|
||||
embed_dim=[64, 128, 320, 512],
|
||||
layers=[3, 4, 8, 3],
|
||||
head_dim=64,
|
||||
mlp_ratio=4.,
|
||||
qkv_bias=True,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.1),
|
||||
decode_head=dict(
|
||||
type='UPerHead',
|
||||
in_channels=[64, 128, 320, 512],
|
||||
in_index=[0, 1, 2, 3],
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=320,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optimizer_config = dict()
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
|
||||
# runtime settings
|
||||
runner = dict(type='IterBasedRunner', max_iters=160000)
|
||||
checkpoint_config = dict(by_epoch=False, interval=16000)
|
||||
evaluation = dict(interval=16000, metric='mIoU')
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optimizer_config = dict()
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
|
||||
# runtime settings
|
||||
runner = dict(type='IterBasedRunner', max_iters=20000)
|
||||
checkpoint_config = dict(by_epoch=False, interval=2000)
|
||||
evaluation = dict(interval=2000, metric='mIoU')
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optimizer_config = dict()
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
|
||||
# runtime settings
|
||||
runner = dict(type='IterBasedRunner', max_iters=40000)
|
||||
checkpoint_config = dict(by_epoch=False, interval=4000)
|
||||
evaluation = dict(interval=4000, metric='mIoU')
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optimizer_config = dict()
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
|
||||
# runtime settings
|
||||
runner = dict(type='IterBasedRunner', max_iters=80000)
|
||||
checkpoint_config = dict(by_epoch=False, interval=8000)
|
||||
evaluation = dict(interval=8000, metric='mIoU')
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
_base_ = [
|
||||
'../../configs/_base_/models/upernet_uniformer.py',
|
||||
'../../configs/_base_/datasets/ade20k.py',
|
||||
'../../configs/_base_/default_runtime.py',
|
||||
'../../configs/_base_/schedules/schedule_160k.py'
|
||||
]
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
type='UniFormer',
|
||||
embed_dim=[64, 128, 320, 512],
|
||||
layers=[3, 4, 8, 3],
|
||||
head_dim=64,
|
||||
drop_path_rate=0.25,
|
||||
windows=False,
|
||||
hybrid=False
|
||||
),
|
||||
decode_head=dict(
|
||||
in_channels=[64, 128, 320, 512],
|
||||
num_classes=150
|
||||
),
|
||||
auxiliary_head=dict(
|
||||
in_channels=320,
|
||||
num_classes=150
|
||||
))
|
||||
|
||||
# AdamW optimizer, no weight decay for position embedding & layer norm in backbone
|
||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
|
||||
paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
|
||||
'relative_position_bias_table': dict(decay_mult=0.),
|
||||
'norm': dict(decay_mult=0.)}))
|
||||
|
||||
lr_config = dict(_delete_=True, policy='poly',
|
||||
warmup='linear',
|
||||
warmup_iters=1500,
|
||||
warmup_ratio=1e-6,
|
||||
power=1.0, min_lr=0.0, by_epoch=False)
|
||||
|
||||
data=dict(samples_per_gpu=2)
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
work_path=$(dirname $0)
|
||||
PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \
|
||||
python -m torch.distributed.launch --nproc_per_node=8 \
|
||||
tools/train.py ${work_path}/config.py \
|
||||
--launcher pytorch \
|
||||
--options model.backbone.pretrained_path='your_model_path/uniformer_small_in1k.pth' \
|
||||
--work-dir ${work_path}/ckpt \
|
||||
2>&1 | tee -a ${work_path}/log.txt
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
work_path=$(dirname $0)
|
||||
PYTHONPATH="$(dirname $0)/../../":$PYTHONPATH \
|
||||
python -m torch.distributed.launch --nproc_per_node=8 \
|
||||
tools/test.py ${work_path}/test_config_h32.py \
|
||||
${work_path}/ckpt/latest.pth \
|
||||
--launcher pytorch \
|
||||
--eval mIoU \
|
||||
2>&1 | tee -a ${work_path}/log.txt
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
_base_ = [
|
||||
'../../configs/_base_/models/upernet_uniformer.py',
|
||||
'../../configs/_base_/datasets/ade20k.py',
|
||||
'../../configs/_base_/default_runtime.py',
|
||||
'../../configs/_base_/schedules/schedule_160k.py'
|
||||
]
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
type='UniFormer',
|
||||
embed_dim=[64, 128, 320, 512],
|
||||
layers=[3, 4, 8, 3],
|
||||
head_dim=64,
|
||||
drop_path_rate=0.25,
|
||||
windows=False,
|
||||
hybrid=False,
|
||||
),
|
||||
decode_head=dict(
|
||||
in_channels=[64, 128, 320, 512],
|
||||
num_classes=150
|
||||
),
|
||||
auxiliary_head=dict(
|
||||
in_channels=320,
|
||||
num_classes=150
|
||||
))
|
||||
|
||||
# AdamW optimizer, no weight decay for position embedding & layer norm in backbone
|
||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
|
||||
paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
|
||||
'relative_position_bias_table': dict(decay_mult=0.),
|
||||
'norm': dict(decay_mult=0.)}))
|
||||
|
||||
lr_config = dict(_delete_=True, policy='poly',
|
||||
warmup='linear',
|
||||
warmup_iters=1500,
|
||||
warmup_ratio=1e-6,
|
||||
power=1.0, min_lr=0.0, by_epoch=False)
|
||||
|
||||
data=dict(samples_per_gpu=2)
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
_base_ = [
|
||||
'../../configs/_base_/models/upernet_uniformer.py',
|
||||
'../../configs/_base_/datasets/ade20k.py',
|
||||
'../../configs/_base_/default_runtime.py',
|
||||
'../../configs/_base_/schedules/schedule_160k.py'
|
||||
]
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
type='UniFormer',
|
||||
embed_dim=[64, 128, 320, 512],
|
||||
layers=[3, 4, 8, 3],
|
||||
head_dim=64,
|
||||
drop_path_rate=0.25,
|
||||
windows=False,
|
||||
hybrid=True,
|
||||
window_size=32
|
||||
),
|
||||
decode_head=dict(
|
||||
in_channels=[64, 128, 320, 512],
|
||||
num_classes=150
|
||||
),
|
||||
auxiliary_head=dict(
|
||||
in_channels=320,
|
||||
num_classes=150
|
||||
))
|
||||
|
||||
# AdamW optimizer, no weight decay for position embedding & layer norm in backbone
|
||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
|
||||
paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
|
||||
'relative_position_bias_table': dict(decay_mult=0.),
|
||||
'norm': dict(decay_mult=0.)}))
|
||||
|
||||
lr_config = dict(_delete_=True, policy='poly',
|
||||
warmup='linear',
|
||||
warmup_iters=1500,
|
||||
warmup_ratio=1e-6,
|
||||
power=1.0, min_lr=0.0, by_epoch=False)
|
||||
|
||||
data=dict(samples_per_gpu=2)
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
_base_ = [
|
||||
'../../configs/_base_/models/upernet_uniformer.py',
|
||||
'../../configs/_base_/datasets/ade20k.py',
|
||||
'../../configs/_base_/default_runtime.py',
|
||||
'../../configs/_base_/schedules/schedule_160k.py'
|
||||
]
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
type='UniFormer',
|
||||
embed_dim=[64, 128, 320, 512],
|
||||
layers=[3, 4, 8, 3],
|
||||
head_dim=64,
|
||||
drop_path_rate=0.25,
|
||||
windows=True,
|
||||
hybrid=False,
|
||||
window_size=32
|
||||
),
|
||||
decode_head=dict(
|
||||
in_channels=[64, 128, 320, 512],
|
||||
num_classes=150
|
||||
),
|
||||
auxiliary_head=dict(
|
||||
in_channels=320,
|
||||
num_classes=150
|
||||
))
|
||||
|
||||
# AdamW optimizer, no weight decay for position embedding & layer norm in backbone
|
||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01,
|
||||
paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
|
||||
'relative_position_bias_table': dict(decay_mult=0.),
|
||||
'norm': dict(decay_mult=0.)}))
|
||||
|
||||
lr_config = dict(_delete_=True, policy='poly',
|
||||
warmup='linear',
|
||||
warmup_iters=1500,
|
||||
warmup_ratio=1e-6,
|
||||
power=1.0, min_lr=0.0, by_epoch=False)
|
||||
|
||||
data=dict(samples_per_gpu=2)
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
# flake8: noqa
|
||||
from .arraymisc import *
|
||||
from .fileio import *
|
||||
from .image import *
|
||||
from .utils import *
|
||||
from .version import *
|
||||
from .video import *
|
||||
from .visualization import *
|
||||
|
||||
# The following modules are not imported to this level, so mmcv may be used
|
||||
# without PyTorch.
|
||||
# - runner
|
||||
# - parallel
|
||||
# - op
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .quantization import dequantize, quantize
|
||||
|
||||
__all__ = ['quantize', 'dequantize']
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import numpy as np
|
||||
|
||||
|
||||
def quantize(arr, min_val, max_val, levels, dtype=np.int64):
|
||||
"""Quantize an array of (-inf, inf) to [0, levels-1].
|
||||
|
||||
Args:
|
||||
arr (ndarray): Input array.
|
||||
min_val (scalar): Minimum value to be clipped.
|
||||
max_val (scalar): Maximum value to be clipped.
|
||||
levels (int): Quantization levels.
|
||||
dtype (np.type): The type of the quantized array.
|
||||
|
||||
Returns:
|
||||
tuple: Quantized array.
|
||||
"""
|
||||
if not (isinstance(levels, int) and levels > 1):
|
||||
raise ValueError(
|
||||
f'levels must be a positive integer, but got {levels}')
|
||||
if min_val >= max_val:
|
||||
raise ValueError(
|
||||
f'min_val ({min_val}) must be smaller than max_val ({max_val})')
|
||||
|
||||
arr = np.clip(arr, min_val, max_val) - min_val
|
||||
quantized_arr = np.minimum(
|
||||
np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1)
|
||||
|
||||
return quantized_arr
|
||||
|
||||
|
||||
def dequantize(arr, min_val, max_val, levels, dtype=np.float64):
|
||||
"""Dequantize an array.
|
||||
|
||||
Args:
|
||||
arr (ndarray): Input array.
|
||||
min_val (scalar): Minimum value to be clipped.
|
||||
max_val (scalar): Maximum value to be clipped.
|
||||
levels (int): Quantization levels.
|
||||
dtype (np.type): The type of the dequantized array.
|
||||
|
||||
Returns:
|
||||
tuple: Dequantized array.
|
||||
"""
|
||||
if not (isinstance(levels, int) and levels > 1):
|
||||
raise ValueError(
|
||||
f'levels must be a positive integer, but got {levels}')
|
||||
if min_val >= max_val:
|
||||
raise ValueError(
|
||||
f'min_val ({min_val}) must be smaller than max_val ({max_val})')
|
||||
|
||||
dequantized_arr = (arr + 0.5).astype(dtype) * (max_val -
|
||||
min_val) / levels + min_val
|
||||
|
||||
return dequantized_arr
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .alexnet import AlexNet
|
||||
# yapf: disable
|
||||
from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS,
|
||||
PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS,
|
||||
ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule,
|
||||
ConvTranspose2d, ConvTranspose3d, ConvWS2d,
|
||||
DepthwiseSeparableConvModule, GeneralizedAttention,
|
||||
HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d,
|
||||
NonLocal1d, NonLocal2d, NonLocal3d, Scale, Swish,
|
||||
build_activation_layer, build_conv_layer,
|
||||
build_norm_layer, build_padding_layer, build_plugin_layer,
|
||||
build_upsample_layer, conv_ws_2d, is_norm)
|
||||
from .builder import MODELS, build_model_from_cfg
|
||||
# yapf: enable
|
||||
from .resnet import ResNet, make_res_layer
|
||||
from .utils import (INITIALIZERS, Caffe2XavierInit, ConstantInit, KaimingInit,
|
||||
NormalInit, PretrainedInit, TruncNormalInit, UniformInit,
|
||||
XavierInit, bias_init_with_prob, caffe2_xavier_init,
|
||||
constant_init, fuse_conv_bn, get_model_complexity_info,
|
||||
initialize, kaiming_init, normal_init, trunc_normal_init,
|
||||
uniform_init, xavier_init)
|
||||
from .vgg import VGG, make_vgg_layer
|
||||
|
||||
__all__ = [
|
||||
'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer',
|
||||
'constant_init', 'xavier_init', 'normal_init', 'trunc_normal_init',
|
||||
'uniform_init', 'kaiming_init', 'caffe2_xavier_init',
|
||||
'bias_init_with_prob', 'ConvModule', 'build_activation_layer',
|
||||
'build_conv_layer', 'build_norm_layer', 'build_padding_layer',
|
||||
'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'NonLocal1d',
|
||||
'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'HSigmoid', 'Swish', 'HSwish',
|
||||
'GeneralizedAttention', 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS',
|
||||
'PADDING_LAYERS', 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale',
|
||||
'get_model_complexity_info', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d',
|
||||
'fuse_conv_bn', 'DepthwiseSeparableConvModule', 'Linear', 'Conv2d',
|
||||
'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d',
|
||||
'initialize', 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit',
|
||||
'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit',
|
||||
'Caffe2XavierInit', 'MODELS', 'build_model_from_cfg'
|
||||
]
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import logging
|
||||
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class AlexNet(nn.Module):
|
||||
"""AlexNet backbone.
|
||||
|
||||
Args:
|
||||
num_classes (int): number of classes for classification.
|
||||
"""
|
||||
|
||||
def __init__(self, num_classes=-1):
|
||||
super(AlexNet, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
self.features = nn.Sequential(
|
||||
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=2),
|
||||
nn.Conv2d(64, 192, kernel_size=5, padding=2),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=2),
|
||||
nn.Conv2d(192, 384, kernel_size=3, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(384, 256, kernel_size=3, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Conv2d(256, 256, kernel_size=3, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=3, stride=2),
|
||||
)
|
||||
if self.num_classes > 0:
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Dropout(),
|
||||
nn.Linear(256 * 6 * 6, 4096),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(),
|
||||
nn.Linear(4096, 4096),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Linear(4096, num_classes),
|
||||
)
|
||||
|
||||
def init_weights(self, pretrained=None):
|
||||
if isinstance(pretrained, str):
|
||||
logger = logging.getLogger()
|
||||
from ..runner import load_checkpoint
|
||||
load_checkpoint(self, pretrained, strict=False, logger=logger)
|
||||
elif pretrained is None:
|
||||
# use default initializer
|
||||
pass
|
||||
else:
|
||||
raise TypeError('pretrained must be a str or None')
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
x = self.features(x)
|
||||
if self.num_classes > 0:
|
||||
x = x.view(x.size(0), 256 * 6 * 6)
|
||||
x = self.classifier(x)
|
||||
|
||||
return x
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .activation import build_activation_layer
|
||||
from .context_block import ContextBlock
|
||||
from .conv import build_conv_layer
|
||||
from .conv2d_adaptive_padding import Conv2dAdaptivePadding
|
||||
from .conv_module import ConvModule
|
||||
from .conv_ws import ConvAWS2d, ConvWS2d, conv_ws_2d
|
||||
from .depthwise_separable_conv_module import DepthwiseSeparableConvModule
|
||||
from .drop import Dropout, DropPath
|
||||
from .generalized_attention import GeneralizedAttention
|
||||
from .hsigmoid import HSigmoid
|
||||
from .hswish import HSwish
|
||||
from .non_local import NonLocal1d, NonLocal2d, NonLocal3d
|
||||
from .norm import build_norm_layer, is_norm
|
||||
from .padding import build_padding_layer
|
||||
from .plugin import build_plugin_layer
|
||||
from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS,
|
||||
PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS)
|
||||
from .scale import Scale
|
||||
from .swish import Swish
|
||||
from .upsample import build_upsample_layer
|
||||
from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d,
|
||||
Linear, MaxPool2d, MaxPool3d)
|
||||
|
||||
__all__ = [
|
||||
'ConvModule', 'build_activation_layer', 'build_conv_layer',
|
||||
'build_norm_layer', 'build_padding_layer', 'build_upsample_layer',
|
||||
'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d',
|
||||
'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention',
|
||||
'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS',
|
||||
'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d',
|
||||
'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear',
|
||||
'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d',
|
||||
'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'Dropout', 'DropPath'
|
||||
]
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from annotator.uniformer.mmcv.utils import TORCH_VERSION, build_from_cfg, digit_version
|
||||
from .registry import ACTIVATION_LAYERS
|
||||
|
||||
for module in [
|
||||
nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU,
|
||||
nn.Sigmoid, nn.Tanh
|
||||
]:
|
||||
ACTIVATION_LAYERS.register_module(module=module)
|
||||
|
||||
|
||||
@ACTIVATION_LAYERS.register_module(name='Clip')
|
||||
@ACTIVATION_LAYERS.register_module()
|
||||
class Clamp(nn.Module):
|
||||
"""Clamp activation layer.
|
||||
|
||||
This activation function is to clamp the feature map value within
|
||||
:math:`[min, max]`. More details can be found in ``torch.clamp()``.
|
||||
|
||||
Args:
|
||||
min (Number | optional): Lower-bound of the range to be clamped to.
|
||||
Default to -1.
|
||||
max (Number | optional): Upper-bound of the range to be clamped to.
|
||||
Default to 1.
|
||||
"""
|
||||
|
||||
def __init__(self, min=-1., max=1.):
|
||||
super(Clamp, self).__init__()
|
||||
self.min = min
|
||||
self.max = max
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward function.
|
||||
|
||||
Args:
|
||||
x (torch.Tensor): The input tensor.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: Clamped tensor.
|
||||
"""
|
||||
return torch.clamp(x, min=self.min, max=self.max)
|
||||
|
||||
|
||||
class GELU(nn.Module):
|
||||
r"""Applies the Gaussian Error Linear Units function:
|
||||
|
||||
.. math::
|
||||
\text{GELU}(x) = x * \Phi(x)
|
||||
where :math:`\Phi(x)` is the Cumulative Distribution Function for
|
||||
Gaussian Distribution.
|
||||
|
||||
Shape:
|
||||
- Input: :math:`(N, *)` where `*` means, any number of additional
|
||||
dimensions
|
||||
- Output: :math:`(N, *)`, same shape as the input
|
||||
|
||||
.. image:: scripts/activation_images/GELU.png
|
||||
|
||||
Examples::
|
||||
|
||||
>>> m = nn.GELU()
|
||||
>>> input = torch.randn(2)
|
||||
>>> output = m(input)
|
||||
"""
|
||||
|
||||
def forward(self, input):
|
||||
return F.gelu(input)
|
||||
|
||||
|
||||
if (TORCH_VERSION == 'parrots'
|
||||
or digit_version(TORCH_VERSION) < digit_version('1.4')):
|
||||
ACTIVATION_LAYERS.register_module(module=GELU)
|
||||
else:
|
||||
ACTIVATION_LAYERS.register_module(module=nn.GELU)
|
||||
|
||||
|
||||
def build_activation_layer(cfg):
|
||||
"""Build activation layer.
|
||||
|
||||
Args:
|
||||
cfg (dict): The activation layer config, which should contain:
|
||||
- type (str): Layer type.
|
||||
- layer args: Args needed to instantiate an activation layer.
|
||||
|
||||
Returns:
|
||||
nn.Module: Created activation layer.
|
||||
"""
|
||||
return build_from_cfg(cfg, ACTIVATION_LAYERS)
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
from ..utils import constant_init, kaiming_init
|
||||
from .registry import PLUGIN_LAYERS
|
||||
|
||||
|
||||
def last_zero_init(m):
|
||||
if isinstance(m, nn.Sequential):
|
||||
constant_init(m[-1], val=0)
|
||||
else:
|
||||
constant_init(m, val=0)
|
||||
|
||||
|
||||
@PLUGIN_LAYERS.register_module()
|
||||
class ContextBlock(nn.Module):
|
||||
"""ContextBlock module in GCNet.
|
||||
|
||||
See 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond'
|
||||
(https://arxiv.org/abs/1904.11492) for details.
|
||||
|
||||
Args:
|
||||
in_channels (int): Channels of the input feature map.
|
||||
ratio (float): Ratio of channels of transform bottleneck
|
||||
pooling_type (str): Pooling method for context modeling.
|
||||
Options are 'att' and 'avg', stand for attention pooling and
|
||||
average pooling respectively. Default: 'att'.
|
||||
fusion_types (Sequence[str]): Fusion method for feature fusion,
|
||||
Options are 'channels_add', 'channel_mul', stand for channelwise
|
||||
addition and multiplication respectively. Default: ('channel_add',)
|
||||
"""
|
||||
|
||||
_abbr_ = 'context_block'
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
ratio,
|
||||
pooling_type='att',
|
||||
fusion_types=('channel_add', )):
|
||||
super(ContextBlock, self).__init__()
|
||||
assert pooling_type in ['avg', 'att']
|
||||
assert isinstance(fusion_types, (list, tuple))
|
||||
valid_fusion_types = ['channel_add', 'channel_mul']
|
||||
assert all([f in valid_fusion_types for f in fusion_types])
|
||||
assert len(fusion_types) > 0, 'at least one fusion should be used'
|
||||
self.in_channels = in_channels
|
||||
self.ratio = ratio
|
||||
self.planes = int(in_channels * ratio)
|
||||
self.pooling_type = pooling_type
|
||||
self.fusion_types = fusion_types
|
||||
if pooling_type == 'att':
|
||||
self.conv_mask = nn.Conv2d(in_channels, 1, kernel_size=1)
|
||||
self.softmax = nn.Softmax(dim=2)
|
||||
else:
|
||||
self.avg_pool = nn.AdaptiveAvgPool2d(1)
|
||||
if 'channel_add' in fusion_types:
|
||||
self.channel_add_conv = nn.Sequential(
|
||||
nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
|
||||
nn.LayerNorm([self.planes, 1, 1]),
|
||||
nn.ReLU(inplace=True), # yapf: disable
|
||||
nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
|
||||
else:
|
||||
self.channel_add_conv = None
|
||||
if 'channel_mul' in fusion_types:
|
||||
self.channel_mul_conv = nn.Sequential(
|
||||
nn.Conv2d(self.in_channels, self.planes, kernel_size=1),
|
||||
nn.LayerNorm([self.planes, 1, 1]),
|
||||
nn.ReLU(inplace=True), # yapf: disable
|
||||
nn.Conv2d(self.planes, self.in_channels, kernel_size=1))
|
||||
else:
|
||||
self.channel_mul_conv = None
|
||||
self.reset_parameters()
|
||||
|
||||
def reset_parameters(self):
|
||||
if self.pooling_type == 'att':
|
||||
kaiming_init(self.conv_mask, mode='fan_in')
|
||||
self.conv_mask.inited = True
|
||||
|
||||
if self.channel_add_conv is not None:
|
||||
last_zero_init(self.channel_add_conv)
|
||||
if self.channel_mul_conv is not None:
|
||||
last_zero_init(self.channel_mul_conv)
|
||||
|
||||
def spatial_pool(self, x):
|
||||
batch, channel, height, width = x.size()
|
||||
if self.pooling_type == 'att':
|
||||
input_x = x
|
||||
# [N, C, H * W]
|
||||
input_x = input_x.view(batch, channel, height * width)
|
||||
# [N, 1, C, H * W]
|
||||
input_x = input_x.unsqueeze(1)
|
||||
# [N, 1, H, W]
|
||||
context_mask = self.conv_mask(x)
|
||||
# [N, 1, H * W]
|
||||
context_mask = context_mask.view(batch, 1, height * width)
|
||||
# [N, 1, H * W]
|
||||
context_mask = self.softmax(context_mask)
|
||||
# [N, 1, H * W, 1]
|
||||
context_mask = context_mask.unsqueeze(-1)
|
||||
# [N, 1, C, 1]
|
||||
context = torch.matmul(input_x, context_mask)
|
||||
# [N, C, 1, 1]
|
||||
context = context.view(batch, channel, 1, 1)
|
||||
else:
|
||||
# [N, C, 1, 1]
|
||||
context = self.avg_pool(x)
|
||||
|
||||
return context
|
||||
|
||||
def forward(self, x):
|
||||
# [N, C, 1, 1]
|
||||
context = self.spatial_pool(x)
|
||||
|
||||
out = x
|
||||
if self.channel_mul_conv is not None:
|
||||
# [N, C, 1, 1]
|
||||
channel_mul_term = torch.sigmoid(self.channel_mul_conv(context))
|
||||
out = out * channel_mul_term
|
||||
if self.channel_add_conv is not None:
|
||||
# [N, C, 1, 1]
|
||||
channel_add_term = self.channel_add_conv(context)
|
||||
out = out + channel_add_term
|
||||
|
||||
return out
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from torch import nn
|
||||
|
||||
from .registry import CONV_LAYERS
|
||||
|
||||
CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d)
|
||||
CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d)
|
||||
CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d)
|
||||
CONV_LAYERS.register_module('Conv', module=nn.Conv2d)
|
||||
|
||||
|
||||
def build_conv_layer(cfg, *args, **kwargs):
|
||||
"""Build convolution layer.
|
||||
|
||||
Args:
|
||||
cfg (None or dict): The conv layer config, which should contain:
|
||||
- type (str): Layer type.
|
||||
- layer args: Args needed to instantiate an conv layer.
|
||||
args (argument list): Arguments passed to the `__init__`
|
||||
method of the corresponding conv layer.
|
||||
kwargs (keyword arguments): Keyword arguments passed to the `__init__`
|
||||
method of the corresponding conv layer.
|
||||
|
||||
Returns:
|
||||
nn.Module: Created conv layer.
|
||||
"""
|
||||
if cfg is None:
|
||||
cfg_ = dict(type='Conv2d')
|
||||
else:
|
||||
if not isinstance(cfg, dict):
|
||||
raise TypeError('cfg must be a dict')
|
||||
if 'type' not in cfg:
|
||||
raise KeyError('the cfg dict must contain the key "type"')
|
||||
cfg_ = cfg.copy()
|
||||
|
||||
layer_type = cfg_.pop('type')
|
||||
if layer_type not in CONV_LAYERS:
|
||||
raise KeyError(f'Unrecognized norm type {layer_type}')
|
||||
else:
|
||||
conv_layer = CONV_LAYERS.get(layer_type)
|
||||
|
||||
layer = conv_layer(*args, **kwargs, **cfg_)
|
||||
|
||||
return layer
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import math
|
||||
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from .registry import CONV_LAYERS
|
||||
|
||||
|
||||
@CONV_LAYERS.register_module()
|
||||
class Conv2dAdaptivePadding(nn.Conv2d):
|
||||
"""Implementation of 2D convolution in tensorflow with `padding` as "same",
|
||||
which applies padding to input (if needed) so that input image gets fully
|
||||
covered by filter and stride you specified. For stride 1, this will ensure
|
||||
that output image size is same as input. For stride of 2, output dimensions
|
||||
will be half, for example.
|
||||
|
||||
Args:
|
||||
in_channels (int): Number of channels in the input image
|
||||
out_channels (int): Number of channels produced by the convolution
|
||||
kernel_size (int or tuple): Size of the convolving kernel
|
||||
stride (int or tuple, optional): Stride of the convolution. Default: 1
|
||||
padding (int or tuple, optional): Zero-padding added to both sides of
|
||||
the input. Default: 0
|
||||
dilation (int or tuple, optional): Spacing between kernel elements.
|
||||
Default: 1
|
||||
groups (int, optional): Number of blocked connections from input
|
||||
channels to output channels. Default: 1
|
||||
bias (bool, optional): If ``True``, adds a learnable bias to the
|
||||
output. Default: ``True``
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
bias=True):
|
||||
super().__init__(in_channels, out_channels, kernel_size, stride, 0,
|
||||
dilation, groups, bias)
|
||||
|
||||
def forward(self, x):
|
||||
img_h, img_w = x.size()[-2:]
|
||||
kernel_h, kernel_w = self.weight.size()[-2:]
|
||||
stride_h, stride_w = self.stride
|
||||
output_h = math.ceil(img_h / stride_h)
|
||||
output_w = math.ceil(img_w / stride_w)
|
||||
pad_h = (
|
||||
max((output_h - 1) * self.stride[0] +
|
||||
(kernel_h - 1) * self.dilation[0] + 1 - img_h, 0))
|
||||
pad_w = (
|
||||
max((output_w - 1) * self.stride[1] +
|
||||
(kernel_w - 1) * self.dilation[1] + 1 - img_w, 0))
|
||||
if pad_h > 0 or pad_w > 0:
|
||||
x = F.pad(x, [
|
||||
pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2
|
||||
])
|
||||
return F.conv2d(x, self.weight, self.bias, self.stride, self.padding,
|
||||
self.dilation, self.groups)
|
||||
|
|
@ -0,0 +1,206 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import warnings
|
||||
|
||||
import torch.nn as nn
|
||||
|
||||
from annotator.uniformer.mmcv.utils import _BatchNorm, _InstanceNorm
|
||||
from ..utils import constant_init, kaiming_init
|
||||
from .activation import build_activation_layer
|
||||
from .conv import build_conv_layer
|
||||
from .norm import build_norm_layer
|
||||
from .padding import build_padding_layer
|
||||
from .registry import PLUGIN_LAYERS
|
||||
|
||||
|
||||
@PLUGIN_LAYERS.register_module()
|
||||
class ConvModule(nn.Module):
|
||||
"""A conv block that bundles conv/norm/activation layers.
|
||||
|
||||
This block simplifies the usage of convolution layers, which are commonly
|
||||
used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
|
||||
It is based upon three build methods: `build_conv_layer()`,
|
||||
`build_norm_layer()` and `build_activation_layer()`.
|
||||
|
||||
Besides, we add some additional features in this module.
|
||||
1. Automatically set `bias` of the conv layer.
|
||||
2. Spectral norm is supported.
|
||||
3. More padding modes are supported. Before PyTorch 1.5, nn.Conv2d only
|
||||
supports zero and circular padding, and we add "reflect" padding mode.
|
||||
|
||||
Args:
|
||||
in_channels (int): Number of channels in the input feature map.
|
||||
Same as that in ``nn._ConvNd``.
|
||||
out_channels (int): Number of channels produced by the convolution.
|
||||
Same as that in ``nn._ConvNd``.
|
||||
kernel_size (int | tuple[int]): Size of the convolving kernel.
|
||||
Same as that in ``nn._ConvNd``.
|
||||
stride (int | tuple[int]): Stride of the convolution.
|
||||
Same as that in ``nn._ConvNd``.
|
||||
padding (int | tuple[int]): Zero-padding added to both sides of
|
||||
the input. Same as that in ``nn._ConvNd``.
|
||||
dilation (int | tuple[int]): Spacing between kernel elements.
|
||||
Same as that in ``nn._ConvNd``.
|
||||
groups (int): Number of blocked connections from input channels to
|
||||
output channels. Same as that in ``nn._ConvNd``.
|
||||
bias (bool | str): If specified as `auto`, it will be decided by the
|
||||
norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise
|
||||
False. Default: "auto".
|
||||
conv_cfg (dict): Config dict for convolution layer. Default: None,
|
||||
which means using conv2d.
|
||||
norm_cfg (dict): Config dict for normalization layer. Default: None.
|
||||
act_cfg (dict): Config dict for activation layer.
|
||||
Default: dict(type='ReLU').
|
||||
inplace (bool): Whether to use inplace mode for activation.
|
||||
Default: True.
|
||||
with_spectral_norm (bool): Whether use spectral norm in conv module.
|
||||
Default: False.
|
||||
padding_mode (str): If the `padding_mode` has not been supported by
|
||||
current `Conv2d` in PyTorch, we will use our own padding layer
|
||||
instead. Currently, we support ['zeros', 'circular'] with official
|
||||
implementation and ['reflect'] with our own implementation.
|
||||
Default: 'zeros'.
|
||||
order (tuple[str]): The order of conv/norm/activation layers. It is a
|
||||
sequence of "conv", "norm" and "act". Common examples are
|
||||
("conv", "norm", "act") and ("act", "conv", "norm").
|
||||
Default: ('conv', 'norm', 'act').
|
||||
"""
|
||||
|
||||
_abbr_ = 'conv_block'
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
bias='auto',
|
||||
conv_cfg=None,
|
||||
norm_cfg=None,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
inplace=True,
|
||||
with_spectral_norm=False,
|
||||
padding_mode='zeros',
|
||||
order=('conv', 'norm', 'act')):
|
||||
super(ConvModule, self).__init__()
|
||||
assert conv_cfg is None or isinstance(conv_cfg, dict)
|
||||
assert norm_cfg is None or isinstance(norm_cfg, dict)
|
||||
assert act_cfg is None or isinstance(act_cfg, dict)
|
||||
official_padding_mode = ['zeros', 'circular']
|
||||
self.conv_cfg = conv_cfg
|
||||
self.norm_cfg = norm_cfg
|
||||
self.act_cfg = act_cfg
|
||||
self.inplace = inplace
|
||||
self.with_spectral_norm = with_spectral_norm
|
||||
self.with_explicit_padding = padding_mode not in official_padding_mode
|
||||
self.order = order
|
||||
assert isinstance(self.order, tuple) and len(self.order) == 3
|
||||
assert set(order) == set(['conv', 'norm', 'act'])
|
||||
|
||||
self.with_norm = norm_cfg is not None
|
||||
self.with_activation = act_cfg is not None
|
||||
# if the conv layer is before a norm layer, bias is unnecessary.
|
||||
if bias == 'auto':
|
||||
bias = not self.with_norm
|
||||
self.with_bias = bias
|
||||
|
||||
if self.with_explicit_padding:
|
||||
pad_cfg = dict(type=padding_mode)
|
||||
self.padding_layer = build_padding_layer(pad_cfg, padding)
|
||||
|
||||
# reset padding to 0 for conv module
|
||||
conv_padding = 0 if self.with_explicit_padding else padding
|
||||
# build convolution layer
|
||||
self.conv = build_conv_layer(
|
||||
conv_cfg,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=stride,
|
||||
padding=conv_padding,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
bias=bias)
|
||||
# export the attributes of self.conv to a higher level for convenience
|
||||
self.in_channels = self.conv.in_channels
|
||||
self.out_channels = self.conv.out_channels
|
||||
self.kernel_size = self.conv.kernel_size
|
||||
self.stride = self.conv.stride
|
||||
self.padding = padding
|
||||
self.dilation = self.conv.dilation
|
||||
self.transposed = self.conv.transposed
|
||||
self.output_padding = self.conv.output_padding
|
||||
self.groups = self.conv.groups
|
||||
|
||||
if self.with_spectral_norm:
|
||||
self.conv = nn.utils.spectral_norm(self.conv)
|
||||
|
||||
# build normalization layers
|
||||
if self.with_norm:
|
||||
# norm layer is after conv layer
|
||||
if order.index('norm') > order.index('conv'):
|
||||
norm_channels = out_channels
|
||||
else:
|
||||
norm_channels = in_channels
|
||||
self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels)
|
||||
self.add_module(self.norm_name, norm)
|
||||
if self.with_bias:
|
||||
if isinstance(norm, (_BatchNorm, _InstanceNorm)):
|
||||
warnings.warn(
|
||||
'Unnecessary conv bias before batch/instance norm')
|
||||
else:
|
||||
self.norm_name = None
|
||||
|
||||
# build activation layer
|
||||
if self.with_activation:
|
||||
act_cfg_ = act_cfg.copy()
|
||||
# nn.Tanh has no 'inplace' argument
|
||||
if act_cfg_['type'] not in [
|
||||
'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish'
|
||||
]:
|
||||
act_cfg_.setdefault('inplace', inplace)
|
||||
self.activate = build_activation_layer(act_cfg_)
|
||||
|
||||
# Use msra init by default
|
||||
self.init_weights()
|
||||
|
||||
@property
|
||||
def norm(self):
|
||||
if self.norm_name:
|
||||
return getattr(self, self.norm_name)
|
||||
else:
|
||||
return None
|
||||
|
||||
def init_weights(self):
|
||||
# 1. It is mainly for customized conv layers with their own
|
||||
# initialization manners by calling their own ``init_weights()``,
|
||||
# and we do not want ConvModule to override the initialization.
|
||||
# 2. For customized conv layers without their own initialization
|
||||
# manners (that is, they don't have their own ``init_weights()``)
|
||||
# and PyTorch's conv layers, they will be initialized by
|
||||
# this method with default ``kaiming_init``.
|
||||
# Note: For PyTorch's conv layers, they will be overwritten by our
|
||||
# initialization implementation using default ``kaiming_init``.
|
||||
if not hasattr(self.conv, 'init_weights'):
|
||||
if self.with_activation and self.act_cfg['type'] == 'LeakyReLU':
|
||||
nonlinearity = 'leaky_relu'
|
||||
a = self.act_cfg.get('negative_slope', 0.01)
|
||||
else:
|
||||
nonlinearity = 'relu'
|
||||
a = 0
|
||||
kaiming_init(self.conv, a=a, nonlinearity=nonlinearity)
|
||||
if self.with_norm:
|
||||
constant_init(self.norm, 1, bias=0)
|
||||
|
||||
def forward(self, x, activate=True, norm=True):
|
||||
for layer in self.order:
|
||||
if layer == 'conv':
|
||||
if self.with_explicit_padding:
|
||||
x = self.padding_layer(x)
|
||||
x = self.conv(x)
|
||||
elif layer == 'norm' and norm and self.with_norm:
|
||||
x = self.norm(x)
|
||||
elif layer == 'act' and activate and self.with_activation:
|
||||
x = self.activate(x)
|
||||
return x
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from .registry import CONV_LAYERS
|
||||
|
||||
|
||||
def conv_ws_2d(input,
|
||||
weight,
|
||||
bias=None,
|
||||
stride=1,
|
||||
padding=0,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
eps=1e-5):
|
||||
c_in = weight.size(0)
|
||||
weight_flat = weight.view(c_in, -1)
|
||||
mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
|
||||
std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
|
||||
weight = (weight - mean) / (std + eps)
|
||||
return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
|
||||
|
||||
|
||||
@CONV_LAYERS.register_module('ConvWS')
|
||||
class ConvWS2d(nn.Conv2d):
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
bias=True,
|
||||
eps=1e-5):
|
||||
super(ConvWS2d, self).__init__(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
bias=bias)
|
||||
self.eps = eps
|
||||
|
||||
def forward(self, x):
|
||||
return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
|
||||
self.dilation, self.groups, self.eps)
|
||||
|
||||
|
||||
@CONV_LAYERS.register_module(name='ConvAWS')
|
||||
class ConvAWS2d(nn.Conv2d):
|
||||
"""AWS (Adaptive Weight Standardization)
|
||||
|
||||
This is a variant of Weight Standardization
|
||||
(https://arxiv.org/pdf/1903.10520.pdf)
|
||||
It is used in DetectoRS to avoid NaN
|
||||
(https://arxiv.org/pdf/2006.02334.pdf)
|
||||
|
||||
Args:
|
||||
in_channels (int): Number of channels in the input image
|
||||
out_channels (int): Number of channels produced by the convolution
|
||||
kernel_size (int or tuple): Size of the conv kernel
|
||||
stride (int or tuple, optional): Stride of the convolution. Default: 1
|
||||
padding (int or tuple, optional): Zero-padding added to both sides of
|
||||
the input. Default: 0
|
||||
dilation (int or tuple, optional): Spacing between kernel elements.
|
||||
Default: 1
|
||||
groups (int, optional): Number of blocked connections from input
|
||||
channels to output channels. Default: 1
|
||||
bias (bool, optional): If set True, adds a learnable bias to the
|
||||
output. Default: True
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
dilation=1,
|
||||
groups=1,
|
||||
bias=True):
|
||||
super().__init__(
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
dilation=dilation,
|
||||
groups=groups,
|
||||
bias=bias)
|
||||
self.register_buffer('weight_gamma',
|
||||
torch.ones(self.out_channels, 1, 1, 1))
|
||||
self.register_buffer('weight_beta',
|
||||
torch.zeros(self.out_channels, 1, 1, 1))
|
||||
|
||||
def _get_weight(self, weight):
|
||||
weight_flat = weight.view(weight.size(0), -1)
|
||||
mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1)
|
||||
std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1)
|
||||
weight = (weight - mean) / std
|
||||
weight = self.weight_gamma * weight + self.weight_beta
|
||||
return weight
|
||||
|
||||
def forward(self, x):
|
||||
weight = self._get_weight(self.weight)
|
||||
return F.conv2d(x, weight, self.bias, self.stride, self.padding,
|
||||
self.dilation, self.groups)
|
||||
|
||||
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
|
||||
missing_keys, unexpected_keys, error_msgs):
|
||||
"""Override default load function.
|
||||
|
||||
AWS overrides the function _load_from_state_dict to recover
|
||||
weight_gamma and weight_beta if they are missing. If weight_gamma and
|
||||
weight_beta are found in the checkpoint, this function will return
|
||||
after super()._load_from_state_dict. Otherwise, it will compute the
|
||||
mean and std of the pretrained weights and store them in weight_beta
|
||||
and weight_gamma.
|
||||
"""
|
||||
|
||||
self.weight_gamma.data.fill_(-1)
|
||||
local_missing_keys = []
|
||||
super()._load_from_state_dict(state_dict, prefix, local_metadata,
|
||||
strict, local_missing_keys,
|
||||
unexpected_keys, error_msgs)
|
||||
if self.weight_gamma.data.mean() > 0:
|
||||
for k in local_missing_keys:
|
||||
missing_keys.append(k)
|
||||
return
|
||||
weight = self.weight.data
|
||||
weight_flat = weight.view(weight.size(0), -1)
|
||||
mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1)
|
||||
std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1)
|
||||
self.weight_beta.data.copy_(mean)
|
||||
self.weight_gamma.data.copy_(std)
|
||||
missing_gamma_beta = [
|
||||
k for k in local_missing_keys
|
||||
if k.endswith('weight_gamma') or k.endswith('weight_beta')
|
||||
]
|
||||
for k in missing_gamma_beta:
|
||||
local_missing_keys.remove(k)
|
||||
for k in local_missing_keys:
|
||||
missing_keys.append(k)
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch.nn as nn
|
||||
|
||||
from .conv_module import ConvModule
|
||||
|
||||
|
||||
class DepthwiseSeparableConvModule(nn.Module):
|
||||
"""Depthwise separable convolution module.
|
||||
|
||||
See https://arxiv.org/pdf/1704.04861.pdf for details.
|
||||
|
||||
This module can replace a ConvModule with the conv block replaced by two
|
||||
conv block: depthwise conv block and pointwise conv block. The depthwise
|
||||
conv block contains depthwise-conv/norm/activation layers. The pointwise
|
||||
conv block contains pointwise-conv/norm/activation layers. It should be
|
||||
noted that there will be norm/activation layer in the depthwise conv block
|
||||
if `norm_cfg` and `act_cfg` are specified.
|
||||
|
||||
Args:
|
||||
in_channels (int): Number of channels in the input feature map.
|
||||
Same as that in ``nn._ConvNd``.
|
||||
out_channels (int): Number of channels produced by the convolution.
|
||||
Same as that in ``nn._ConvNd``.
|
||||
kernel_size (int | tuple[int]): Size of the convolving kernel.
|
||||
Same as that in ``nn._ConvNd``.
|
||||
stride (int | tuple[int]): Stride of the convolution.
|
||||
Same as that in ``nn._ConvNd``. Default: 1.
|
||||
padding (int | tuple[int]): Zero-padding added to both sides of
|
||||
the input. Same as that in ``nn._ConvNd``. Default: 0.
|
||||
dilation (int | tuple[int]): Spacing between kernel elements.
|
||||
Same as that in ``nn._ConvNd``. Default: 1.
|
||||
norm_cfg (dict): Default norm config for both depthwise ConvModule and
|
||||
pointwise ConvModule. Default: None.
|
||||
act_cfg (dict): Default activation config for both depthwise ConvModule
|
||||
and pointwise ConvModule. Default: dict(type='ReLU').
|
||||
dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is
|
||||
'default', it will be the same as `norm_cfg`. Default: 'default'.
|
||||
dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is
|
||||
'default', it will be the same as `act_cfg`. Default: 'default'.
|
||||
pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is
|
||||
'default', it will be the same as `norm_cfg`. Default: 'default'.
|
||||
pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is
|
||||
'default', it will be the same as `act_cfg`. Default: 'default'.
|
||||
kwargs (optional): Other shared arguments for depthwise and pointwise
|
||||
ConvModule. See ConvModule for ref.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride=1,
|
||||
padding=0,
|
||||
dilation=1,
|
||||
norm_cfg=None,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
dw_norm_cfg='default',
|
||||
dw_act_cfg='default',
|
||||
pw_norm_cfg='default',
|
||||
pw_act_cfg='default',
|
||||
**kwargs):
|
||||
super(DepthwiseSeparableConvModule, self).__init__()
|
||||
assert 'groups' not in kwargs, 'groups should not be specified'
|
||||
|
||||
# if norm/activation config of depthwise/pointwise ConvModule is not
|
||||
# specified, use default config.
|
||||
dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg
|
||||
dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg
|
||||
pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg
|
||||
pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg
|
||||
|
||||
# depthwise convolution
|
||||
self.depthwise_conv = ConvModule(
|
||||
in_channels,
|
||||
in_channels,
|
||||
kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
dilation=dilation,
|
||||
groups=in_channels,
|
||||
norm_cfg=dw_norm_cfg,
|
||||
act_cfg=dw_act_cfg,
|
||||
**kwargs)
|
||||
|
||||
self.pointwise_conv = ConvModule(
|
||||
in_channels,
|
||||
out_channels,
|
||||
1,
|
||||
norm_cfg=pw_norm_cfg,
|
||||
act_cfg=pw_act_cfg,
|
||||
**kwargs)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.depthwise_conv(x)
|
||||
x = self.pointwise_conv(x)
|
||||
return x
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from annotator.uniformer.mmcv import build_from_cfg
|
||||
from .registry import DROPOUT_LAYERS
|
||||
|
||||
|
||||
def drop_path(x, drop_prob=0., training=False):
|
||||
"""Drop paths (Stochastic Depth) per sample (when applied in main path of
|
||||
residual blocks).
|
||||
|
||||
We follow the implementation
|
||||
https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501
|
||||
"""
|
||||
if drop_prob == 0. or not training:
|
||||
return x
|
||||
keep_prob = 1 - drop_prob
|
||||
# handle tensors with different dimensions, not just 4D tensors.
|
||||
shape = (x.shape[0], ) + (1, ) * (x.ndim - 1)
|
||||
random_tensor = keep_prob + torch.rand(
|
||||
shape, dtype=x.dtype, device=x.device)
|
||||
output = x.div(keep_prob) * random_tensor.floor()
|
||||
return output
|
||||
|
||||
|
||||
@DROPOUT_LAYERS.register_module()
|
||||
class DropPath(nn.Module):
|
||||
"""Drop paths (Stochastic Depth) per sample (when applied in main path of
|
||||
residual blocks).
|
||||
|
||||
We follow the implementation
|
||||
https://github.com/rwightman/pytorch-image-models/blob/a2727c1bf78ba0d7b5727f5f95e37fb7f8866b1f/timm/models/layers/drop.py # noqa: E501
|
||||
|
||||
Args:
|
||||
drop_prob (float): Probability of the path to be zeroed. Default: 0.1
|
||||
"""
|
||||
|
||||
def __init__(self, drop_prob=0.1):
|
||||
super(DropPath, self).__init__()
|
||||
self.drop_prob = drop_prob
|
||||
|
||||
def forward(self, x):
|
||||
return drop_path(x, self.drop_prob, self.training)
|
||||
|
||||
|
||||
@DROPOUT_LAYERS.register_module()
|
||||
class Dropout(nn.Dropout):
|
||||
"""A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of
|
||||
``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with
|
||||
``DropPath``
|
||||
|
||||
Args:
|
||||
drop_prob (float): Probability of the elements to be
|
||||
zeroed. Default: 0.5.
|
||||
inplace (bool): Do the operation inplace or not. Default: False.
|
||||
"""
|
||||
|
||||
def __init__(self, drop_prob=0.5, inplace=False):
|
||||
super().__init__(p=drop_prob, inplace=inplace)
|
||||
|
||||
|
||||
def build_dropout(cfg, default_args=None):
|
||||
"""Builder for drop out layers."""
|
||||
return build_from_cfg(cfg, DROPOUT_LAYERS, default_args)
|
||||
|
|
@ -0,0 +1,412 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from ..utils import kaiming_init
|
||||
from .registry import PLUGIN_LAYERS
|
||||
|
||||
|
||||
@PLUGIN_LAYERS.register_module()
|
||||
class GeneralizedAttention(nn.Module):
|
||||
"""GeneralizedAttention module.
|
||||
|
||||
See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks'
|
||||
(https://arxiv.org/abs/1711.07971) for details.
|
||||
|
||||
Args:
|
||||
in_channels (int): Channels of the input feature map.
|
||||
spatial_range (int): The spatial range. -1 indicates no spatial range
|
||||
constraint. Default: -1.
|
||||
num_heads (int): The head number of empirical_attention module.
|
||||
Default: 9.
|
||||
position_embedding_dim (int): The position embedding dimension.
|
||||
Default: -1.
|
||||
position_magnitude (int): A multiplier acting on coord difference.
|
||||
Default: 1.
|
||||
kv_stride (int): The feature stride acting on key/value feature map.
|
||||
Default: 2.
|
||||
q_stride (int): The feature stride acting on query feature map.
|
||||
Default: 1.
|
||||
attention_type (str): A binary indicator string for indicating which
|
||||
items in generalized empirical_attention module are used.
|
||||
Default: '1111'.
|
||||
|
||||
- '1000' indicates 'query and key content' (appr - appr) item,
|
||||
- '0100' indicates 'query content and relative position'
|
||||
(appr - position) item,
|
||||
- '0010' indicates 'key content only' (bias - appr) item,
|
||||
- '0001' indicates 'relative position only' (bias - position) item.
|
||||
"""
|
||||
|
||||
_abbr_ = 'gen_attention_block'
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
spatial_range=-1,
|
||||
num_heads=9,
|
||||
position_embedding_dim=-1,
|
||||
position_magnitude=1,
|
||||
kv_stride=2,
|
||||
q_stride=1,
|
||||
attention_type='1111'):
|
||||
|
||||
super(GeneralizedAttention, self).__init__()
|
||||
|
||||
# hard range means local range for non-local operation
|
||||
self.position_embedding_dim = (
|
||||
position_embedding_dim
|
||||
if position_embedding_dim > 0 else in_channels)
|
||||
|
||||
self.position_magnitude = position_magnitude
|
||||
self.num_heads = num_heads
|
||||
self.in_channels = in_channels
|
||||
self.spatial_range = spatial_range
|
||||
self.kv_stride = kv_stride
|
||||
self.q_stride = q_stride
|
||||
self.attention_type = [bool(int(_)) for _ in attention_type]
|
||||
self.qk_embed_dim = in_channels // num_heads
|
||||
out_c = self.qk_embed_dim * num_heads
|
||||
|
||||
if self.attention_type[0] or self.attention_type[1]:
|
||||
self.query_conv = nn.Conv2d(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_c,
|
||||
kernel_size=1,
|
||||
bias=False)
|
||||
self.query_conv.kaiming_init = True
|
||||
|
||||
if self.attention_type[0] or self.attention_type[2]:
|
||||
self.key_conv = nn.Conv2d(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_c,
|
||||
kernel_size=1,
|
||||
bias=False)
|
||||
self.key_conv.kaiming_init = True
|
||||
|
||||
self.v_dim = in_channels // num_heads
|
||||
self.value_conv = nn.Conv2d(
|
||||
in_channels=in_channels,
|
||||
out_channels=self.v_dim * num_heads,
|
||||
kernel_size=1,
|
||||
bias=False)
|
||||
self.value_conv.kaiming_init = True
|
||||
|
||||
if self.attention_type[1] or self.attention_type[3]:
|
||||
self.appr_geom_fc_x = nn.Linear(
|
||||
self.position_embedding_dim // 2, out_c, bias=False)
|
||||
self.appr_geom_fc_x.kaiming_init = True
|
||||
|
||||
self.appr_geom_fc_y = nn.Linear(
|
||||
self.position_embedding_dim // 2, out_c, bias=False)
|
||||
self.appr_geom_fc_y.kaiming_init = True
|
||||
|
||||
if self.attention_type[2]:
|
||||
stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
|
||||
appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv
|
||||
self.appr_bias = nn.Parameter(appr_bias_value)
|
||||
|
||||
if self.attention_type[3]:
|
||||
stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
|
||||
geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv
|
||||
self.geom_bias = nn.Parameter(geom_bias_value)
|
||||
|
||||
self.proj_conv = nn.Conv2d(
|
||||
in_channels=self.v_dim * num_heads,
|
||||
out_channels=in_channels,
|
||||
kernel_size=1,
|
||||
bias=True)
|
||||
self.proj_conv.kaiming_init = True
|
||||
self.gamma = nn.Parameter(torch.zeros(1))
|
||||
|
||||
if self.spatial_range >= 0:
|
||||
# only works when non local is after 3*3 conv
|
||||
if in_channels == 256:
|
||||
max_len = 84
|
||||
elif in_channels == 512:
|
||||
max_len = 42
|
||||
|
||||
max_len_kv = int((max_len - 1.0) / self.kv_stride + 1)
|
||||
local_constraint_map = np.ones(
|
||||
(max_len, max_len, max_len_kv, max_len_kv), dtype=np.int)
|
||||
for iy in range(max_len):
|
||||
for ix in range(max_len):
|
||||
local_constraint_map[
|
||||
iy, ix,
|
||||
max((iy - self.spatial_range) //
|
||||
self.kv_stride, 0):min((iy + self.spatial_range +
|
||||
1) // self.kv_stride +
|
||||
1, max_len),
|
||||
max((ix - self.spatial_range) //
|
||||
self.kv_stride, 0):min((ix + self.spatial_range +
|
||||
1) // self.kv_stride +
|
||||
1, max_len)] = 0
|
||||
|
||||
self.local_constraint_map = nn.Parameter(
|
||||
torch.from_numpy(local_constraint_map).byte(),
|
||||
requires_grad=False)
|
||||
|
||||
if self.q_stride > 1:
|
||||
self.q_downsample = nn.AvgPool2d(
|
||||
kernel_size=1, stride=self.q_stride)
|
||||
else:
|
||||
self.q_downsample = None
|
||||
|
||||
if self.kv_stride > 1:
|
||||
self.kv_downsample = nn.AvgPool2d(
|
||||
kernel_size=1, stride=self.kv_stride)
|
||||
else:
|
||||
self.kv_downsample = None
|
||||
|
||||
self.init_weights()
|
||||
|
||||
def get_position_embedding(self,
|
||||
h,
|
||||
w,
|
||||
h_kv,
|
||||
w_kv,
|
||||
q_stride,
|
||||
kv_stride,
|
||||
device,
|
||||
dtype,
|
||||
feat_dim,
|
||||
wave_length=1000):
|
||||
# the default type of Tensor is float32, leading to type mismatch
|
||||
# in fp16 mode. Cast it to support fp16 mode.
|
||||
h_idxs = torch.linspace(0, h - 1, h).to(device=device, dtype=dtype)
|
||||
h_idxs = h_idxs.view((h, 1)) * q_stride
|
||||
|
||||
w_idxs = torch.linspace(0, w - 1, w).to(device=device, dtype=dtype)
|
||||
w_idxs = w_idxs.view((w, 1)) * q_stride
|
||||
|
||||
h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).to(
|
||||
device=device, dtype=dtype)
|
||||
h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride
|
||||
|
||||
w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).to(
|
||||
device=device, dtype=dtype)
|
||||
w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride
|
||||
|
||||
# (h, h_kv, 1)
|
||||
h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0)
|
||||
h_diff *= self.position_magnitude
|
||||
|
||||
# (w, w_kv, 1)
|
||||
w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0)
|
||||
w_diff *= self.position_magnitude
|
||||
|
||||
feat_range = torch.arange(0, feat_dim / 4).to(
|
||||
device=device, dtype=dtype)
|
||||
|
||||
dim_mat = torch.Tensor([wave_length]).to(device=device, dtype=dtype)
|
||||
dim_mat = dim_mat**((4. / feat_dim) * feat_range)
|
||||
dim_mat = dim_mat.view((1, 1, -1))
|
||||
|
||||
embedding_x = torch.cat(
|
||||
((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2)
|
||||
|
||||
embedding_y = torch.cat(
|
||||
((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2)
|
||||
|
||||
return embedding_x, embedding_y
|
||||
|
||||
def forward(self, x_input):
|
||||
num_heads = self.num_heads
|
||||
|
||||
# use empirical_attention
|
||||
if self.q_downsample is not None:
|
||||
x_q = self.q_downsample(x_input)
|
||||
else:
|
||||
x_q = x_input
|
||||
n, _, h, w = x_q.shape
|
||||
|
||||
if self.kv_downsample is not None:
|
||||
x_kv = self.kv_downsample(x_input)
|
||||
else:
|
||||
x_kv = x_input
|
||||
_, _, h_kv, w_kv = x_kv.shape
|
||||
|
||||
if self.attention_type[0] or self.attention_type[1]:
|
||||
proj_query = self.query_conv(x_q).view(
|
||||
(n, num_heads, self.qk_embed_dim, h * w))
|
||||
proj_query = proj_query.permute(0, 1, 3, 2)
|
||||
|
||||
if self.attention_type[0] or self.attention_type[2]:
|
||||
proj_key = self.key_conv(x_kv).view(
|
||||
(n, num_heads, self.qk_embed_dim, h_kv * w_kv))
|
||||
|
||||
if self.attention_type[1] or self.attention_type[3]:
|
||||
position_embed_x, position_embed_y = self.get_position_embedding(
|
||||
h, w, h_kv, w_kv, self.q_stride, self.kv_stride,
|
||||
x_input.device, x_input.dtype, self.position_embedding_dim)
|
||||
# (n, num_heads, w, w_kv, dim)
|
||||
position_feat_x = self.appr_geom_fc_x(position_embed_x).\
|
||||
view(1, w, w_kv, num_heads, self.qk_embed_dim).\
|
||||
permute(0, 3, 1, 2, 4).\
|
||||
repeat(n, 1, 1, 1, 1)
|
||||
|
||||
# (n, num_heads, h, h_kv, dim)
|
||||
position_feat_y = self.appr_geom_fc_y(position_embed_y).\
|
||||
view(1, h, h_kv, num_heads, self.qk_embed_dim).\
|
||||
permute(0, 3, 1, 2, 4).\
|
||||
repeat(n, 1, 1, 1, 1)
|
||||
|
||||
position_feat_x /= math.sqrt(2)
|
||||
position_feat_y /= math.sqrt(2)
|
||||
|
||||
# accelerate for saliency only
|
||||
if (np.sum(self.attention_type) == 1) and self.attention_type[2]:
|
||||
appr_bias = self.appr_bias.\
|
||||
view(1, num_heads, 1, self.qk_embed_dim).\
|
||||
repeat(n, 1, 1, 1)
|
||||
|
||||
energy = torch.matmul(appr_bias, proj_key).\
|
||||
view(n, num_heads, 1, h_kv * w_kv)
|
||||
|
||||
h = 1
|
||||
w = 1
|
||||
else:
|
||||
# (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for
|
||||
if not self.attention_type[0]:
|
||||
energy = torch.zeros(
|
||||
n,
|
||||
num_heads,
|
||||
h,
|
||||
w,
|
||||
h_kv,
|
||||
w_kv,
|
||||
dtype=x_input.dtype,
|
||||
device=x_input.device)
|
||||
|
||||
# attention_type[0]: appr - appr
|
||||
# attention_type[1]: appr - position
|
||||
# attention_type[2]: bias - appr
|
||||
# attention_type[3]: bias - position
|
||||
if self.attention_type[0] or self.attention_type[2]:
|
||||
if self.attention_type[0] and self.attention_type[2]:
|
||||
appr_bias = self.appr_bias.\
|
||||
view(1, num_heads, 1, self.qk_embed_dim)
|
||||
energy = torch.matmul(proj_query + appr_bias, proj_key).\
|
||||
view(n, num_heads, h, w, h_kv, w_kv)
|
||||
|
||||
elif self.attention_type[0]:
|
||||
energy = torch.matmul(proj_query, proj_key).\
|
||||
view(n, num_heads, h, w, h_kv, w_kv)
|
||||
|
||||
elif self.attention_type[2]:
|
||||
appr_bias = self.appr_bias.\
|
||||
view(1, num_heads, 1, self.qk_embed_dim).\
|
||||
repeat(n, 1, 1, 1)
|
||||
|
||||
energy += torch.matmul(appr_bias, proj_key).\
|
||||
view(n, num_heads, 1, 1, h_kv, w_kv)
|
||||
|
||||
if self.attention_type[1] or self.attention_type[3]:
|
||||
if self.attention_type[1] and self.attention_type[3]:
|
||||
geom_bias = self.geom_bias.\
|
||||
view(1, num_heads, 1, self.qk_embed_dim)
|
||||
|
||||
proj_query_reshape = (proj_query + geom_bias).\
|
||||
view(n, num_heads, h, w, self.qk_embed_dim)
|
||||
|
||||
energy_x = torch.matmul(
|
||||
proj_query_reshape.permute(0, 1, 3, 2, 4),
|
||||
position_feat_x.permute(0, 1, 2, 4, 3))
|
||||
energy_x = energy_x.\
|
||||
permute(0, 1, 3, 2, 4).unsqueeze(4)
|
||||
|
||||
energy_y = torch.matmul(
|
||||
proj_query_reshape,
|
||||
position_feat_y.permute(0, 1, 2, 4, 3))
|
||||
energy_y = energy_y.unsqueeze(5)
|
||||
|
||||
energy += energy_x + energy_y
|
||||
|
||||
elif self.attention_type[1]:
|
||||
proj_query_reshape = proj_query.\
|
||||
view(n, num_heads, h, w, self.qk_embed_dim)
|
||||
proj_query_reshape = proj_query_reshape.\
|
||||
permute(0, 1, 3, 2, 4)
|
||||
position_feat_x_reshape = position_feat_x.\
|
||||
permute(0, 1, 2, 4, 3)
|
||||
position_feat_y_reshape = position_feat_y.\
|
||||
permute(0, 1, 2, 4, 3)
|
||||
|
||||
energy_x = torch.matmul(proj_query_reshape,
|
||||
position_feat_x_reshape)
|
||||
energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4)
|
||||
|
||||
energy_y = torch.matmul(proj_query_reshape,
|
||||
position_feat_y_reshape)
|
||||
energy_y = energy_y.unsqueeze(5)
|
||||
|
||||
energy += energy_x + energy_y
|
||||
|
||||
elif self.attention_type[3]:
|
||||
geom_bias = self.geom_bias.\
|
||||
view(1, num_heads, self.qk_embed_dim, 1).\
|
||||
repeat(n, 1, 1, 1)
|
||||
|
||||
position_feat_x_reshape = position_feat_x.\
|
||||
view(n, num_heads, w*w_kv, self.qk_embed_dim)
|
||||
|
||||
position_feat_y_reshape = position_feat_y.\
|
||||
view(n, num_heads, h * h_kv, self.qk_embed_dim)
|
||||
|
||||
energy_x = torch.matmul(position_feat_x_reshape, geom_bias)
|
||||
energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv)
|
||||
|
||||
energy_y = torch.matmul(position_feat_y_reshape, geom_bias)
|
||||
energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1)
|
||||
|
||||
energy += energy_x + energy_y
|
||||
|
||||
energy = energy.view(n, num_heads, h * w, h_kv * w_kv)
|
||||
|
||||
if self.spatial_range >= 0:
|
||||
cur_local_constraint_map = \
|
||||
self.local_constraint_map[:h, :w, :h_kv, :w_kv].\
|
||||
contiguous().\
|
||||
view(1, 1, h*w, h_kv*w_kv)
|
||||
|
||||
energy = energy.masked_fill_(cur_local_constraint_map,
|
||||
float('-inf'))
|
||||
|
||||
attention = F.softmax(energy, 3)
|
||||
|
||||
proj_value = self.value_conv(x_kv)
|
||||
proj_value_reshape = proj_value.\
|
||||
view((n, num_heads, self.v_dim, h_kv * w_kv)).\
|
||||
permute(0, 1, 3, 2)
|
||||
|
||||
out = torch.matmul(attention, proj_value_reshape).\
|
||||
permute(0, 1, 3, 2).\
|
||||
contiguous().\
|
||||
view(n, self.v_dim * self.num_heads, h, w)
|
||||
|
||||
out = self.proj_conv(out)
|
||||
|
||||
# output is downsampled, upsample back to input size
|
||||
if self.q_downsample is not None:
|
||||
out = F.interpolate(
|
||||
out,
|
||||
size=x_input.shape[2:],
|
||||
mode='bilinear',
|
||||
align_corners=False)
|
||||
|
||||
out = self.gamma * out + x_input
|
||||
return out
|
||||
|
||||
def init_weights(self):
|
||||
for m in self.modules():
|
||||
if hasattr(m, 'kaiming_init') and m.kaiming_init:
|
||||
kaiming_init(
|
||||
m,
|
||||
mode='fan_in',
|
||||
nonlinearity='leaky_relu',
|
||||
bias=0,
|
||||
distribution='uniform',
|
||||
a=1)
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch.nn as nn
|
||||
|
||||
from .registry import ACTIVATION_LAYERS
|
||||
|
||||
|
||||
@ACTIVATION_LAYERS.register_module()
|
||||
class HSigmoid(nn.Module):
|
||||
"""Hard Sigmoid Module. Apply the hard sigmoid function:
|
||||
Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value)
|
||||
Default: Hsigmoid(x) = min(max((x + 1) / 2, 0), 1)
|
||||
|
||||
Args:
|
||||
bias (float): Bias of the input feature map. Default: 1.0.
|
||||
divisor (float): Divisor of the input feature map. Default: 2.0.
|
||||
min_value (float): Lower bound value. Default: 0.0.
|
||||
max_value (float): Upper bound value. Default: 1.0.
|
||||
|
||||
Returns:
|
||||
Tensor: The output tensor.
|
||||
"""
|
||||
|
||||
def __init__(self, bias=1.0, divisor=2.0, min_value=0.0, max_value=1.0):
|
||||
super(HSigmoid, self).__init__()
|
||||
self.bias = bias
|
||||
self.divisor = divisor
|
||||
assert self.divisor != 0
|
||||
self.min_value = min_value
|
||||
self.max_value = max_value
|
||||
|
||||
def forward(self, x):
|
||||
x = (x + self.bias) / self.divisor
|
||||
|
||||
return x.clamp_(self.min_value, self.max_value)
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch.nn as nn
|
||||
|
||||
from .registry import ACTIVATION_LAYERS
|
||||
|
||||
|
||||
@ACTIVATION_LAYERS.register_module()
|
||||
class HSwish(nn.Module):
|
||||
"""Hard Swish Module.
|
||||
|
||||
This module applies the hard swish function:
|
||||
|
||||
.. math::
|
||||
Hswish(x) = x * ReLU6(x + 3) / 6
|
||||
|
||||
Args:
|
||||
inplace (bool): can optionally do the operation in-place.
|
||||
Default: False.
|
||||
|
||||
Returns:
|
||||
Tensor: The output tensor.
|
||||
"""
|
||||
|
||||
def __init__(self, inplace=False):
|
||||
super(HSwish, self).__init__()
|
||||
self.act = nn.ReLU6(inplace)
|
||||
|
||||
def forward(self, x):
|
||||
return x * self.act(x + 3) / 6
|
||||
|
|
@ -0,0 +1,306 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from abc import ABCMeta
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from ..utils import constant_init, normal_init
|
||||
from .conv_module import ConvModule
|
||||
from .registry import PLUGIN_LAYERS
|
||||
|
||||
|
||||
class _NonLocalNd(nn.Module, metaclass=ABCMeta):
|
||||
"""Basic Non-local module.
|
||||
|
||||
This module is proposed in
|
||||
"Non-local Neural Networks"
|
||||
Paper reference: https://arxiv.org/abs/1711.07971
|
||||
Code reference: https://github.com/AlexHex7/Non-local_pytorch
|
||||
|
||||
Args:
|
||||
in_channels (int): Channels of the input feature map.
|
||||
reduction (int): Channel reduction ratio. Default: 2.
|
||||
use_scale (bool): Whether to scale pairwise_weight by
|
||||
`1/sqrt(inter_channels)` when the mode is `embedded_gaussian`.
|
||||
Default: True.
|
||||
conv_cfg (None | dict): The config dict for convolution layers.
|
||||
If not specified, it will use `nn.Conv2d` for convolution layers.
|
||||
Default: None.
|
||||
norm_cfg (None | dict): The config dict for normalization layers.
|
||||
Default: None. (This parameter is only applicable to conv_out.)
|
||||
mode (str): Options are `gaussian`, `concatenation`,
|
||||
`embedded_gaussian` and `dot_product`. Default: embedded_gaussian.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
reduction=2,
|
||||
use_scale=True,
|
||||
conv_cfg=None,
|
||||
norm_cfg=None,
|
||||
mode='embedded_gaussian',
|
||||
**kwargs):
|
||||
super(_NonLocalNd, self).__init__()
|
||||
self.in_channels = in_channels
|
||||
self.reduction = reduction
|
||||
self.use_scale = use_scale
|
||||
self.inter_channels = max(in_channels // reduction, 1)
|
||||
self.mode = mode
|
||||
|
||||
if mode not in [
|
||||
'gaussian', 'embedded_gaussian', 'dot_product', 'concatenation'
|
||||
]:
|
||||
raise ValueError("Mode should be in 'gaussian', 'concatenation', "
|
||||
f"'embedded_gaussian' or 'dot_product', but got "
|
||||
f'{mode} instead.')
|
||||
|
||||
# g, theta, phi are defaulted as `nn.ConvNd`.
|
||||
# Here we use ConvModule for potential usage.
|
||||
self.g = ConvModule(
|
||||
self.in_channels,
|
||||
self.inter_channels,
|
||||
kernel_size=1,
|
||||
conv_cfg=conv_cfg,
|
||||
act_cfg=None)
|
||||
self.conv_out = ConvModule(
|
||||
self.inter_channels,
|
||||
self.in_channels,
|
||||
kernel_size=1,
|
||||
conv_cfg=conv_cfg,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=None)
|
||||
|
||||
if self.mode != 'gaussian':
|
||||
self.theta = ConvModule(
|
||||
self.in_channels,
|
||||
self.inter_channels,
|
||||
kernel_size=1,
|
||||
conv_cfg=conv_cfg,
|
||||
act_cfg=None)
|
||||
self.phi = ConvModule(
|
||||
self.in_channels,
|
||||
self.inter_channels,
|
||||
kernel_size=1,
|
||||
conv_cfg=conv_cfg,
|
||||
act_cfg=None)
|
||||
|
||||
if self.mode == 'concatenation':
|
||||
self.concat_project = ConvModule(
|
||||
self.inter_channels * 2,
|
||||
1,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
bias=False,
|
||||
act_cfg=dict(type='ReLU'))
|
||||
|
||||
self.init_weights(**kwargs)
|
||||
|
||||
def init_weights(self, std=0.01, zeros_init=True):
|
||||
if self.mode != 'gaussian':
|
||||
for m in [self.g, self.theta, self.phi]:
|
||||
normal_init(m.conv, std=std)
|
||||
else:
|
||||
normal_init(self.g.conv, std=std)
|
||||
if zeros_init:
|
||||
if self.conv_out.norm_cfg is None:
|
||||
constant_init(self.conv_out.conv, 0)
|
||||
else:
|
||||
constant_init(self.conv_out.norm, 0)
|
||||
else:
|
||||
if self.conv_out.norm_cfg is None:
|
||||
normal_init(self.conv_out.conv, std=std)
|
||||
else:
|
||||
normal_init(self.conv_out.norm, std=std)
|
||||
|
||||
def gaussian(self, theta_x, phi_x):
|
||||
# NonLocal1d pairwise_weight: [N, H, H]
|
||||
# NonLocal2d pairwise_weight: [N, HxW, HxW]
|
||||
# NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
|
||||
pairwise_weight = torch.matmul(theta_x, phi_x)
|
||||
pairwise_weight = pairwise_weight.softmax(dim=-1)
|
||||
return pairwise_weight
|
||||
|
||||
def embedded_gaussian(self, theta_x, phi_x):
|
||||
# NonLocal1d pairwise_weight: [N, H, H]
|
||||
# NonLocal2d pairwise_weight: [N, HxW, HxW]
|
||||
# NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
|
||||
pairwise_weight = torch.matmul(theta_x, phi_x)
|
||||
if self.use_scale:
|
||||
# theta_x.shape[-1] is `self.inter_channels`
|
||||
pairwise_weight /= theta_x.shape[-1]**0.5
|
||||
pairwise_weight = pairwise_weight.softmax(dim=-1)
|
||||
return pairwise_weight
|
||||
|
||||
def dot_product(self, theta_x, phi_x):
|
||||
# NonLocal1d pairwise_weight: [N, H, H]
|
||||
# NonLocal2d pairwise_weight: [N, HxW, HxW]
|
||||
# NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
|
||||
pairwise_weight = torch.matmul(theta_x, phi_x)
|
||||
pairwise_weight /= pairwise_weight.shape[-1]
|
||||
return pairwise_weight
|
||||
|
||||
def concatenation(self, theta_x, phi_x):
|
||||
# NonLocal1d pairwise_weight: [N, H, H]
|
||||
# NonLocal2d pairwise_weight: [N, HxW, HxW]
|
||||
# NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
|
||||
h = theta_x.size(2)
|
||||
w = phi_x.size(3)
|
||||
theta_x = theta_x.repeat(1, 1, 1, w)
|
||||
phi_x = phi_x.repeat(1, 1, h, 1)
|
||||
|
||||
concat_feature = torch.cat([theta_x, phi_x], dim=1)
|
||||
pairwise_weight = self.concat_project(concat_feature)
|
||||
n, _, h, w = pairwise_weight.size()
|
||||
pairwise_weight = pairwise_weight.view(n, h, w)
|
||||
pairwise_weight /= pairwise_weight.shape[-1]
|
||||
|
||||
return pairwise_weight
|
||||
|
||||
def forward(self, x):
|
||||
# Assume `reduction = 1`, then `inter_channels = C`
|
||||
# or `inter_channels = C` when `mode="gaussian"`
|
||||
|
||||
# NonLocal1d x: [N, C, H]
|
||||
# NonLocal2d x: [N, C, H, W]
|
||||
# NonLocal3d x: [N, C, T, H, W]
|
||||
n = x.size(0)
|
||||
|
||||
# NonLocal1d g_x: [N, H, C]
|
||||
# NonLocal2d g_x: [N, HxW, C]
|
||||
# NonLocal3d g_x: [N, TxHxW, C]
|
||||
g_x = self.g(x).view(n, self.inter_channels, -1)
|
||||
g_x = g_x.permute(0, 2, 1)
|
||||
|
||||
# NonLocal1d theta_x: [N, H, C], phi_x: [N, C, H]
|
||||
# NonLocal2d theta_x: [N, HxW, C], phi_x: [N, C, HxW]
|
||||
# NonLocal3d theta_x: [N, TxHxW, C], phi_x: [N, C, TxHxW]
|
||||
if self.mode == 'gaussian':
|
||||
theta_x = x.view(n, self.in_channels, -1)
|
||||
theta_x = theta_x.permute(0, 2, 1)
|
||||
if self.sub_sample:
|
||||
phi_x = self.phi(x).view(n, self.in_channels, -1)
|
||||
else:
|
||||
phi_x = x.view(n, self.in_channels, -1)
|
||||
elif self.mode == 'concatenation':
|
||||
theta_x = self.theta(x).view(n, self.inter_channels, -1, 1)
|
||||
phi_x = self.phi(x).view(n, self.inter_channels, 1, -1)
|
||||
else:
|
||||
theta_x = self.theta(x).view(n, self.inter_channels, -1)
|
||||
theta_x = theta_x.permute(0, 2, 1)
|
||||
phi_x = self.phi(x).view(n, self.inter_channels, -1)
|
||||
|
||||
pairwise_func = getattr(self, self.mode)
|
||||
# NonLocal1d pairwise_weight: [N, H, H]
|
||||
# NonLocal2d pairwise_weight: [N, HxW, HxW]
|
||||
# NonLocal3d pairwise_weight: [N, TxHxW, TxHxW]
|
||||
pairwise_weight = pairwise_func(theta_x, phi_x)
|
||||
|
||||
# NonLocal1d y: [N, H, C]
|
||||
# NonLocal2d y: [N, HxW, C]
|
||||
# NonLocal3d y: [N, TxHxW, C]
|
||||
y = torch.matmul(pairwise_weight, g_x)
|
||||
# NonLocal1d y: [N, C, H]
|
||||
# NonLocal2d y: [N, C, H, W]
|
||||
# NonLocal3d y: [N, C, T, H, W]
|
||||
y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels,
|
||||
*x.size()[2:])
|
||||
|
||||
output = x + self.conv_out(y)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
class NonLocal1d(_NonLocalNd):
|
||||
"""1D Non-local module.
|
||||
|
||||
Args:
|
||||
in_channels (int): Same as `NonLocalND`.
|
||||
sub_sample (bool): Whether to apply max pooling after pairwise
|
||||
function (Note that the `sub_sample` is applied on spatial only).
|
||||
Default: False.
|
||||
conv_cfg (None | dict): Same as `NonLocalND`.
|
||||
Default: dict(type='Conv1d').
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
sub_sample=False,
|
||||
conv_cfg=dict(type='Conv1d'),
|
||||
**kwargs):
|
||||
super(NonLocal1d, self).__init__(
|
||||
in_channels, conv_cfg=conv_cfg, **kwargs)
|
||||
|
||||
self.sub_sample = sub_sample
|
||||
|
||||
if sub_sample:
|
||||
max_pool_layer = nn.MaxPool1d(kernel_size=2)
|
||||
self.g = nn.Sequential(self.g, max_pool_layer)
|
||||
if self.mode != 'gaussian':
|
||||
self.phi = nn.Sequential(self.phi, max_pool_layer)
|
||||
else:
|
||||
self.phi = max_pool_layer
|
||||
|
||||
|
||||
@PLUGIN_LAYERS.register_module()
|
||||
class NonLocal2d(_NonLocalNd):
|
||||
"""2D Non-local module.
|
||||
|
||||
Args:
|
||||
in_channels (int): Same as `NonLocalND`.
|
||||
sub_sample (bool): Whether to apply max pooling after pairwise
|
||||
function (Note that the `sub_sample` is applied on spatial only).
|
||||
Default: False.
|
||||
conv_cfg (None | dict): Same as `NonLocalND`.
|
||||
Default: dict(type='Conv2d').
|
||||
"""
|
||||
|
||||
_abbr_ = 'nonlocal_block'
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
sub_sample=False,
|
||||
conv_cfg=dict(type='Conv2d'),
|
||||
**kwargs):
|
||||
super(NonLocal2d, self).__init__(
|
||||
in_channels, conv_cfg=conv_cfg, **kwargs)
|
||||
|
||||
self.sub_sample = sub_sample
|
||||
|
||||
if sub_sample:
|
||||
max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
|
||||
self.g = nn.Sequential(self.g, max_pool_layer)
|
||||
if self.mode != 'gaussian':
|
||||
self.phi = nn.Sequential(self.phi, max_pool_layer)
|
||||
else:
|
||||
self.phi = max_pool_layer
|
||||
|
||||
|
||||
class NonLocal3d(_NonLocalNd):
|
||||
"""3D Non-local module.
|
||||
|
||||
Args:
|
||||
in_channels (int): Same as `NonLocalND`.
|
||||
sub_sample (bool): Whether to apply max pooling after pairwise
|
||||
function (Note that the `sub_sample` is applied on spatial only).
|
||||
Default: False.
|
||||
conv_cfg (None | dict): Same as `NonLocalND`.
|
||||
Default: dict(type='Conv3d').
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
sub_sample=False,
|
||||
conv_cfg=dict(type='Conv3d'),
|
||||
**kwargs):
|
||||
super(NonLocal3d, self).__init__(
|
||||
in_channels, conv_cfg=conv_cfg, **kwargs)
|
||||
self.sub_sample = sub_sample
|
||||
|
||||
if sub_sample:
|
||||
max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
|
||||
self.g = nn.Sequential(self.g, max_pool_layer)
|
||||
if self.mode != 'gaussian':
|
||||
self.phi = nn.Sequential(self.phi, max_pool_layer)
|
||||
else:
|
||||
self.phi = max_pool_layer
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import inspect
|
||||
|
||||
import torch.nn as nn
|
||||
|
||||
from annotator.uniformer.mmcv.utils import is_tuple_of
|
||||
from annotator.uniformer.mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm
|
||||
from .registry import NORM_LAYERS
|
||||
|
||||
NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d)
|
||||
NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d)
|
||||
NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d)
|
||||
NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d)
|
||||
NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm)
|
||||
NORM_LAYERS.register_module('GN', module=nn.GroupNorm)
|
||||
NORM_LAYERS.register_module('LN', module=nn.LayerNorm)
|
||||
NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d)
|
||||
NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d)
|
||||
NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d)
|
||||
NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d)
|
||||
|
||||
|
||||
def infer_abbr(class_type):
|
||||
"""Infer abbreviation from the class name.
|
||||
|
||||
When we build a norm layer with `build_norm_layer()`, we want to preserve
|
||||
the norm type in variable names, e.g, self.bn1, self.gn. This method will
|
||||
infer the abbreviation to map class types to abbreviations.
|
||||
|
||||
Rule 1: If the class has the property "_abbr_", return the property.
|
||||
Rule 2: If the parent class is _BatchNorm, GroupNorm, LayerNorm or
|
||||
InstanceNorm, the abbreviation of this layer will be "bn", "gn", "ln" and
|
||||
"in" respectively.
|
||||
Rule 3: If the class name contains "batch", "group", "layer" or "instance",
|
||||
the abbreviation of this layer will be "bn", "gn", "ln" and "in"
|
||||
respectively.
|
||||
Rule 4: Otherwise, the abbreviation falls back to "norm".
|
||||
|
||||
Args:
|
||||
class_type (type): The norm layer type.
|
||||
|
||||
Returns:
|
||||
str: The inferred abbreviation.
|
||||
"""
|
||||
if not inspect.isclass(class_type):
|
||||
raise TypeError(
|
||||
f'class_type must be a type, but got {type(class_type)}')
|
||||
if hasattr(class_type, '_abbr_'):
|
||||
return class_type._abbr_
|
||||
if issubclass(class_type, _InstanceNorm): # IN is a subclass of BN
|
||||
return 'in'
|
||||
elif issubclass(class_type, _BatchNorm):
|
||||
return 'bn'
|
||||
elif issubclass(class_type, nn.GroupNorm):
|
||||
return 'gn'
|
||||
elif issubclass(class_type, nn.LayerNorm):
|
||||
return 'ln'
|
||||
else:
|
||||
class_name = class_type.__name__.lower()
|
||||
if 'batch' in class_name:
|
||||
return 'bn'
|
||||
elif 'group' in class_name:
|
||||
return 'gn'
|
||||
elif 'layer' in class_name:
|
||||
return 'ln'
|
||||
elif 'instance' in class_name:
|
||||
return 'in'
|
||||
else:
|
||||
return 'norm_layer'
|
||||
|
||||
|
||||
def build_norm_layer(cfg, num_features, postfix=''):
|
||||
"""Build normalization layer.
|
||||
|
||||
Args:
|
||||
cfg (dict): The norm layer config, which should contain:
|
||||
|
||||
- type (str): Layer type.
|
||||
- layer args: Args needed to instantiate a norm layer.
|
||||
- requires_grad (bool, optional): Whether stop gradient updates.
|
||||
num_features (int): Number of input channels.
|
||||
postfix (int | str): The postfix to be appended into norm abbreviation
|
||||
to create named layer.
|
||||
|
||||
Returns:
|
||||
(str, nn.Module): The first element is the layer name consisting of
|
||||
abbreviation and postfix, e.g., bn1, gn. The second element is the
|
||||
created norm layer.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
raise TypeError('cfg must be a dict')
|
||||
if 'type' not in cfg:
|
||||
raise KeyError('the cfg dict must contain the key "type"')
|
||||
cfg_ = cfg.copy()
|
||||
|
||||
layer_type = cfg_.pop('type')
|
||||
if layer_type not in NORM_LAYERS:
|
||||
raise KeyError(f'Unrecognized norm type {layer_type}')
|
||||
|
||||
norm_layer = NORM_LAYERS.get(layer_type)
|
||||
abbr = infer_abbr(norm_layer)
|
||||
|
||||
assert isinstance(postfix, (int, str))
|
||||
name = abbr + str(postfix)
|
||||
|
||||
requires_grad = cfg_.pop('requires_grad', True)
|
||||
cfg_.setdefault('eps', 1e-5)
|
||||
if layer_type != 'GN':
|
||||
layer = norm_layer(num_features, **cfg_)
|
||||
if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'):
|
||||
layer._specify_ddp_gpu_num(1)
|
||||
else:
|
||||
assert 'num_groups' in cfg_
|
||||
layer = norm_layer(num_channels=num_features, **cfg_)
|
||||
|
||||
for param in layer.parameters():
|
||||
param.requires_grad = requires_grad
|
||||
|
||||
return name, layer
|
||||
|
||||
|
||||
def is_norm(layer, exclude=None):
|
||||
"""Check if a layer is a normalization layer.
|
||||
|
||||
Args:
|
||||
layer (nn.Module): The layer to be checked.
|
||||
exclude (type | tuple[type]): Types to be excluded.
|
||||
|
||||
Returns:
|
||||
bool: Whether the layer is a norm layer.
|
||||
"""
|
||||
if exclude is not None:
|
||||
if not isinstance(exclude, tuple):
|
||||
exclude = (exclude, )
|
||||
if not is_tuple_of(exclude, type):
|
||||
raise TypeError(
|
||||
f'"exclude" must be either None or type or a tuple of types, '
|
||||
f'but got {type(exclude)}: {exclude}')
|
||||
|
||||
if exclude and isinstance(layer, exclude):
|
||||
return False
|
||||
|
||||
all_norm_bases = (_BatchNorm, _InstanceNorm, nn.GroupNorm, nn.LayerNorm)
|
||||
return isinstance(layer, all_norm_bases)
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch.nn as nn
|
||||
|
||||
from .registry import PADDING_LAYERS
|
||||
|
||||
PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d)
|
||||
PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d)
|
||||
PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d)
|
||||
|
||||
|
||||
def build_padding_layer(cfg, *args, **kwargs):
|
||||
"""Build padding layer.
|
||||
|
||||
Args:
|
||||
cfg (None or dict): The padding layer config, which should contain:
|
||||
- type (str): Layer type.
|
||||
- layer args: Args needed to instantiate a padding layer.
|
||||
|
||||
Returns:
|
||||
nn.Module: Created padding layer.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
raise TypeError('cfg must be a dict')
|
||||
if 'type' not in cfg:
|
||||
raise KeyError('the cfg dict must contain the key "type"')
|
||||
|
||||
cfg_ = cfg.copy()
|
||||
padding_type = cfg_.pop('type')
|
||||
if padding_type not in PADDING_LAYERS:
|
||||
raise KeyError(f'Unrecognized padding type {padding_type}.')
|
||||
else:
|
||||
padding_layer = PADDING_LAYERS.get(padding_type)
|
||||
|
||||
layer = padding_layer(*args, **kwargs, **cfg_)
|
||||
|
||||
return layer
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
import inspect
|
||||
import platform
|
||||
|
||||
from .registry import PLUGIN_LAYERS
|
||||
|
||||
if platform.system() == 'Windows':
|
||||
import regex as re
|
||||
else:
|
||||
import re
|
||||
|
||||
|
||||
def infer_abbr(class_type):
|
||||
"""Infer abbreviation from the class name.
|
||||
|
||||
This method will infer the abbreviation to map class types to
|
||||
abbreviations.
|
||||
|
||||
Rule 1: If the class has the property "abbr", return the property.
|
||||
Rule 2: Otherwise, the abbreviation falls back to snake case of class
|
||||
name, e.g. the abbreviation of ``FancyBlock`` will be ``fancy_block``.
|
||||
|
||||
Args:
|
||||
class_type (type): The norm layer type.
|
||||
|
||||
Returns:
|
||||
str: The inferred abbreviation.
|
||||
"""
|
||||
|
||||
def camel2snack(word):
|
||||
"""Convert camel case word into snack case.
|
||||
|
||||
Modified from `inflection lib
|
||||
<https://inflection.readthedocs.io/en/latest/#inflection.underscore>`_.
|
||||
|
||||
Example::
|
||||
|
||||
>>> camel2snack("FancyBlock")
|
||||
'fancy_block'
|
||||
"""
|
||||
|
||||
word = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', word)
|
||||
word = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', word)
|
||||
word = word.replace('-', '_')
|
||||
return word.lower()
|
||||
|
||||
if not inspect.isclass(class_type):
|
||||
raise TypeError(
|
||||
f'class_type must be a type, but got {type(class_type)}')
|
||||
if hasattr(class_type, '_abbr_'):
|
||||
return class_type._abbr_
|
||||
else:
|
||||
return camel2snack(class_type.__name__)
|
||||
|
||||
|
||||
def build_plugin_layer(cfg, postfix='', **kwargs):
|
||||
"""Build plugin layer.
|
||||
|
||||
Args:
|
||||
cfg (None or dict): cfg should contain:
|
||||
type (str): identify plugin layer type.
|
||||
layer args: args needed to instantiate a plugin layer.
|
||||
postfix (int, str): appended into norm abbreviation to
|
||||
create named layer. Default: ''.
|
||||
|
||||
Returns:
|
||||
tuple[str, nn.Module]:
|
||||
name (str): abbreviation + postfix
|
||||
layer (nn.Module): created plugin layer
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
raise TypeError('cfg must be a dict')
|
||||
if 'type' not in cfg:
|
||||
raise KeyError('the cfg dict must contain the key "type"')
|
||||
cfg_ = cfg.copy()
|
||||
|
||||
layer_type = cfg_.pop('type')
|
||||
if layer_type not in PLUGIN_LAYERS:
|
||||
raise KeyError(f'Unrecognized plugin type {layer_type}')
|
||||
|
||||
plugin_layer = PLUGIN_LAYERS.get(layer_type)
|
||||
abbr = infer_abbr(plugin_layer)
|
||||
|
||||
assert isinstance(postfix, (int, str))
|
||||
name = abbr + str(postfix)
|
||||
|
||||
layer = plugin_layer(**kwargs, **cfg_)
|
||||
|
||||
return name, layer
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from annotator.uniformer.mmcv.utils import Registry
|
||||
|
||||
CONV_LAYERS = Registry('conv layer')
|
||||
NORM_LAYERS = Registry('norm layer')
|
||||
ACTIVATION_LAYERS = Registry('activation layer')
|
||||
PADDING_LAYERS = Registry('padding layer')
|
||||
UPSAMPLE_LAYERS = Registry('upsample layer')
|
||||
PLUGIN_LAYERS = Registry('plugin layer')
|
||||
|
||||
DROPOUT_LAYERS = Registry('drop out layers')
|
||||
POSITIONAL_ENCODING = Registry('position encoding')
|
||||
ATTENTION = Registry('attention')
|
||||
FEEDFORWARD_NETWORK = Registry('feed-forward Network')
|
||||
TRANSFORMER_LAYER = Registry('transformerLayer')
|
||||
TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence')
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class Scale(nn.Module):
|
||||
"""A learnable scale parameter.
|
||||
|
||||
This layer scales the input by a learnable factor. It multiplies a
|
||||
learnable scale parameter of shape (1,) with input of any shape.
|
||||
|
||||
Args:
|
||||
scale (float): Initial value of scale factor. Default: 1.0
|
||||
"""
|
||||
|
||||
def __init__(self, scale=1.0):
|
||||
super(Scale, self).__init__()
|
||||
self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
|
||||
|
||||
def forward(self, x):
|
||||
return x * self.scale
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from .registry import ACTIVATION_LAYERS
|
||||
|
||||
|
||||
@ACTIVATION_LAYERS.register_module()
|
||||
class Swish(nn.Module):
|
||||
"""Swish Module.
|
||||
|
||||
This module applies the swish function:
|
||||
|
||||
.. math::
|
||||
Swish(x) = x * Sigmoid(x)
|
||||
|
||||
Returns:
|
||||
Tensor: The output tensor.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super(Swish, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return x * torch.sigmoid(x)
|
||||
|
|
@ -0,0 +1,595 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import copy
|
||||
import warnings
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from annotator.uniformer.mmcv import ConfigDict, deprecated_api_warning
|
||||
from annotator.uniformer.mmcv.cnn import Linear, build_activation_layer, build_norm_layer
|
||||
from annotator.uniformer.mmcv.runner.base_module import BaseModule, ModuleList, Sequential
|
||||
from annotator.uniformer.mmcv.utils import build_from_cfg
|
||||
from .drop import build_dropout
|
||||
from .registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING,
|
||||
TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE)
|
||||
|
||||
# Avoid BC-breaking of importing MultiScaleDeformableAttention from this file
|
||||
try:
|
||||
from annotator.uniformer.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention # noqa F401
|
||||
warnings.warn(
|
||||
ImportWarning(
|
||||
'``MultiScaleDeformableAttention`` has been moved to '
|
||||
'``mmcv.ops.multi_scale_deform_attn``, please change original path ' # noqa E501
|
||||
'``from annotator.uniformer.mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention`` ' # noqa E501
|
||||
'to ``from annotator.uniformer.mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention`` ' # noqa E501
|
||||
))
|
||||
|
||||
except ImportError:
|
||||
warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from '
|
||||
'``mmcv.ops.multi_scale_deform_attn``, '
|
||||
'You should install ``mmcv-full`` if you need this module. ')
|
||||
|
||||
|
||||
def build_positional_encoding(cfg, default_args=None):
|
||||
"""Builder for Position Encoding."""
|
||||
return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args)
|
||||
|
||||
|
||||
def build_attention(cfg, default_args=None):
|
||||
"""Builder for attention."""
|
||||
return build_from_cfg(cfg, ATTENTION, default_args)
|
||||
|
||||
|
||||
def build_feedforward_network(cfg, default_args=None):
|
||||
"""Builder for feed-forward network (FFN)."""
|
||||
return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args)
|
||||
|
||||
|
||||
def build_transformer_layer(cfg, default_args=None):
|
||||
"""Builder for transformer layer."""
|
||||
return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args)
|
||||
|
||||
|
||||
def build_transformer_layer_sequence(cfg, default_args=None):
|
||||
"""Builder for transformer encoder and transformer decoder."""
|
||||
return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args)
|
||||
|
||||
|
||||
@ATTENTION.register_module()
|
||||
class MultiheadAttention(BaseModule):
|
||||
"""A wrapper for ``torch.nn.MultiheadAttention``.
|
||||
|
||||
This module implements MultiheadAttention with identity connection,
|
||||
and positional encoding is also passed as input.
|
||||
|
||||
Args:
|
||||
embed_dims (int): The embedding dimension.
|
||||
num_heads (int): Parallel attention heads.
|
||||
attn_drop (float): A Dropout layer on attn_output_weights.
|
||||
Default: 0.0.
|
||||
proj_drop (float): A Dropout layer after `nn.MultiheadAttention`.
|
||||
Default: 0.0.
|
||||
dropout_layer (obj:`ConfigDict`): The dropout_layer used
|
||||
when adding the shortcut.
|
||||
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
|
||||
Default: None.
|
||||
batch_first (bool): When it is True, Key, Query and Value are shape of
|
||||
(batch, n, embed_dim), otherwise (n, batch, embed_dim).
|
||||
Default to False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
embed_dims,
|
||||
num_heads,
|
||||
attn_drop=0.,
|
||||
proj_drop=0.,
|
||||
dropout_layer=dict(type='Dropout', drop_prob=0.),
|
||||
init_cfg=None,
|
||||
batch_first=False,
|
||||
**kwargs):
|
||||
super(MultiheadAttention, self).__init__(init_cfg)
|
||||
if 'dropout' in kwargs:
|
||||
warnings.warn('The arguments `dropout` in MultiheadAttention '
|
||||
'has been deprecated, now you can separately '
|
||||
'set `attn_drop`(float), proj_drop(float), '
|
||||
'and `dropout_layer`(dict) ')
|
||||
attn_drop = kwargs['dropout']
|
||||
dropout_layer['drop_prob'] = kwargs.pop('dropout')
|
||||
|
||||
self.embed_dims = embed_dims
|
||||
self.num_heads = num_heads
|
||||
self.batch_first = batch_first
|
||||
|
||||
self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop,
|
||||
**kwargs)
|
||||
|
||||
self.proj_drop = nn.Dropout(proj_drop)
|
||||
self.dropout_layer = build_dropout(
|
||||
dropout_layer) if dropout_layer else nn.Identity()
|
||||
|
||||
@deprecated_api_warning({'residual': 'identity'},
|
||||
cls_name='MultiheadAttention')
|
||||
def forward(self,
|
||||
query,
|
||||
key=None,
|
||||
value=None,
|
||||
identity=None,
|
||||
query_pos=None,
|
||||
key_pos=None,
|
||||
attn_mask=None,
|
||||
key_padding_mask=None,
|
||||
**kwargs):
|
||||
"""Forward function for `MultiheadAttention`.
|
||||
|
||||
**kwargs allow passing a more general data flow when combining
|
||||
with other operations in `transformerlayer`.
|
||||
|
||||
Args:
|
||||
query (Tensor): The input query with shape [num_queries, bs,
|
||||
embed_dims] if self.batch_first is False, else
|
||||
[bs, num_queries embed_dims].
|
||||
key (Tensor): The key tensor with shape [num_keys, bs,
|
||||
embed_dims] if self.batch_first is False, else
|
||||
[bs, num_keys, embed_dims] .
|
||||
If None, the ``query`` will be used. Defaults to None.
|
||||
value (Tensor): The value tensor with same shape as `key`.
|
||||
Same in `nn.MultiheadAttention.forward`. Defaults to None.
|
||||
If None, the `key` will be used.
|
||||
identity (Tensor): This tensor, with the same shape as x,
|
||||
will be used for the identity link.
|
||||
If None, `x` will be used. Defaults to None.
|
||||
query_pos (Tensor): The positional encoding for query, with
|
||||
the same shape as `x`. If not None, it will
|
||||
be added to `x` before forward function. Defaults to None.
|
||||
key_pos (Tensor): The positional encoding for `key`, with the
|
||||
same shape as `key`. Defaults to None. If not None, it will
|
||||
be added to `key` before forward function. If None, and
|
||||
`query_pos` has the same shape as `key`, then `query_pos`
|
||||
will be used for `key_pos`. Defaults to None.
|
||||
attn_mask (Tensor): ByteTensor mask with shape [num_queries,
|
||||
num_keys]. Same in `nn.MultiheadAttention.forward`.
|
||||
Defaults to None.
|
||||
key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys].
|
||||
Defaults to None.
|
||||
|
||||
Returns:
|
||||
Tensor: forwarded results with shape
|
||||
[num_queries, bs, embed_dims]
|
||||
if self.batch_first is False, else
|
||||
[bs, num_queries embed_dims].
|
||||
"""
|
||||
|
||||
if key is None:
|
||||
key = query
|
||||
if value is None:
|
||||
value = key
|
||||
if identity is None:
|
||||
identity = query
|
||||
if key_pos is None:
|
||||
if query_pos is not None:
|
||||
# use query_pos if key_pos is not available
|
||||
if query_pos.shape == key.shape:
|
||||
key_pos = query_pos
|
||||
else:
|
||||
warnings.warn(f'position encoding of key is'
|
||||
f'missing in {self.__class__.__name__}.')
|
||||
if query_pos is not None:
|
||||
query = query + query_pos
|
||||
if key_pos is not None:
|
||||
key = key + key_pos
|
||||
|
||||
# Because the dataflow('key', 'query', 'value') of
|
||||
# ``torch.nn.MultiheadAttention`` is (num_query, batch,
|
||||
# embed_dims), We should adjust the shape of dataflow from
|
||||
# batch_first (batch, num_query, embed_dims) to num_query_first
|
||||
# (num_query ,batch, embed_dims), and recover ``attn_output``
|
||||
# from num_query_first to batch_first.
|
||||
if self.batch_first:
|
||||
query = query.transpose(0, 1)
|
||||
key = key.transpose(0, 1)
|
||||
value = value.transpose(0, 1)
|
||||
|
||||
out = self.attn(
|
||||
query=query,
|
||||
key=key,
|
||||
value=value,
|
||||
attn_mask=attn_mask,
|
||||
key_padding_mask=key_padding_mask)[0]
|
||||
|
||||
if self.batch_first:
|
||||
out = out.transpose(0, 1)
|
||||
|
||||
return identity + self.dropout_layer(self.proj_drop(out))
|
||||
|
||||
|
||||
@FEEDFORWARD_NETWORK.register_module()
|
||||
class FFN(BaseModule):
|
||||
"""Implements feed-forward networks (FFNs) with identity connection.
|
||||
|
||||
Args:
|
||||
embed_dims (int): The feature dimension. Same as
|
||||
`MultiheadAttention`. Defaults: 256.
|
||||
feedforward_channels (int): The hidden dimension of FFNs.
|
||||
Defaults: 1024.
|
||||
num_fcs (int, optional): The number of fully-connected layers in
|
||||
FFNs. Default: 2.
|
||||
act_cfg (dict, optional): The activation config for FFNs.
|
||||
Default: dict(type='ReLU')
|
||||
ffn_drop (float, optional): Probability of an element to be
|
||||
zeroed in FFN. Default 0.0.
|
||||
add_identity (bool, optional): Whether to add the
|
||||
identity connection. Default: `True`.
|
||||
dropout_layer (obj:`ConfigDict`): The dropout_layer used
|
||||
when adding the shortcut.
|
||||
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
|
||||
Default: None.
|
||||
"""
|
||||
|
||||
@deprecated_api_warning(
|
||||
{
|
||||
'dropout': 'ffn_drop',
|
||||
'add_residual': 'add_identity'
|
||||
},
|
||||
cls_name='FFN')
|
||||
def __init__(self,
|
||||
embed_dims=256,
|
||||
feedforward_channels=1024,
|
||||
num_fcs=2,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
ffn_drop=0.,
|
||||
dropout_layer=None,
|
||||
add_identity=True,
|
||||
init_cfg=None,
|
||||
**kwargs):
|
||||
super(FFN, self).__init__(init_cfg)
|
||||
assert num_fcs >= 2, 'num_fcs should be no less ' \
|
||||
f'than 2. got {num_fcs}.'
|
||||
self.embed_dims = embed_dims
|
||||
self.feedforward_channels = feedforward_channels
|
||||
self.num_fcs = num_fcs
|
||||
self.act_cfg = act_cfg
|
||||
self.activate = build_activation_layer(act_cfg)
|
||||
|
||||
layers = []
|
||||
in_channels = embed_dims
|
||||
for _ in range(num_fcs - 1):
|
||||
layers.append(
|
||||
Sequential(
|
||||
Linear(in_channels, feedforward_channels), self.activate,
|
||||
nn.Dropout(ffn_drop)))
|
||||
in_channels = feedforward_channels
|
||||
layers.append(Linear(feedforward_channels, embed_dims))
|
||||
layers.append(nn.Dropout(ffn_drop))
|
||||
self.layers = Sequential(*layers)
|
||||
self.dropout_layer = build_dropout(
|
||||
dropout_layer) if dropout_layer else torch.nn.Identity()
|
||||
self.add_identity = add_identity
|
||||
|
||||
@deprecated_api_warning({'residual': 'identity'}, cls_name='FFN')
|
||||
def forward(self, x, identity=None):
|
||||
"""Forward function for `FFN`.
|
||||
|
||||
The function would add x to the output tensor if residue is None.
|
||||
"""
|
||||
out = self.layers(x)
|
||||
if not self.add_identity:
|
||||
return self.dropout_layer(out)
|
||||
if identity is None:
|
||||
identity = x
|
||||
return identity + self.dropout_layer(out)
|
||||
|
||||
|
||||
@TRANSFORMER_LAYER.register_module()
|
||||
class BaseTransformerLayer(BaseModule):
|
||||
"""Base `TransformerLayer` for vision transformer.
|
||||
|
||||
It can be built from `mmcv.ConfigDict` and support more flexible
|
||||
customization, for example, using any number of `FFN or LN ` and
|
||||
use different kinds of `attention` by specifying a list of `ConfigDict`
|
||||
named `attn_cfgs`. It is worth mentioning that it supports `prenorm`
|
||||
when you specifying `norm` as the first element of `operation_order`.
|
||||
More details about the `prenorm`: `On Layer Normalization in the
|
||||
Transformer Architecture <https://arxiv.org/abs/2002.04745>`_ .
|
||||
|
||||
Args:
|
||||
attn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):
|
||||
Configs for `self_attention` or `cross_attention` modules,
|
||||
The order of the configs in the list should be consistent with
|
||||
corresponding attentions in operation_order.
|
||||
If it is a dict, all of the attention modules in operation_order
|
||||
will be built with this config. Default: None.
|
||||
ffn_cfgs (list[`mmcv.ConfigDict`] | obj:`mmcv.ConfigDict` | None )):
|
||||
Configs for FFN, The order of the configs in the list should be
|
||||
consistent with corresponding ffn in operation_order.
|
||||
If it is a dict, all of the attention modules in operation_order
|
||||
will be built with this config.
|
||||
operation_order (tuple[str]): The execution order of operation
|
||||
in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').
|
||||
Support `prenorm` when you specifying first element as `norm`.
|
||||
Default:None.
|
||||
norm_cfg (dict): Config dict for normalization layer.
|
||||
Default: dict(type='LN').
|
||||
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
|
||||
Default: None.
|
||||
batch_first (bool): Key, Query and Value are shape
|
||||
of (batch, n, embed_dim)
|
||||
or (n, batch, embed_dim). Default to False.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
attn_cfgs=None,
|
||||
ffn_cfgs=dict(
|
||||
type='FFN',
|
||||
embed_dims=256,
|
||||
feedforward_channels=1024,
|
||||
num_fcs=2,
|
||||
ffn_drop=0.,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
),
|
||||
operation_order=None,
|
||||
norm_cfg=dict(type='LN'),
|
||||
init_cfg=None,
|
||||
batch_first=False,
|
||||
**kwargs):
|
||||
|
||||
deprecated_args = dict(
|
||||
feedforward_channels='feedforward_channels',
|
||||
ffn_dropout='ffn_drop',
|
||||
ffn_num_fcs='num_fcs')
|
||||
for ori_name, new_name in deprecated_args.items():
|
||||
if ori_name in kwargs:
|
||||
warnings.warn(
|
||||
f'The arguments `{ori_name}` in BaseTransformerLayer '
|
||||
f'has been deprecated, now you should set `{new_name}` '
|
||||
f'and other FFN related arguments '
|
||||
f'to a dict named `ffn_cfgs`. ')
|
||||
ffn_cfgs[new_name] = kwargs[ori_name]
|
||||
|
||||
super(BaseTransformerLayer, self).__init__(init_cfg)
|
||||
|
||||
self.batch_first = batch_first
|
||||
|
||||
assert set(operation_order) & set(
|
||||
['self_attn', 'norm', 'ffn', 'cross_attn']) == \
|
||||
set(operation_order), f'The operation_order of' \
|
||||
f' {self.__class__.__name__} should ' \
|
||||
f'contains all four operation type ' \
|
||||
f"{['self_attn', 'norm', 'ffn', 'cross_attn']}"
|
||||
|
||||
num_attn = operation_order.count('self_attn') + operation_order.count(
|
||||
'cross_attn')
|
||||
if isinstance(attn_cfgs, dict):
|
||||
attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]
|
||||
else:
|
||||
assert num_attn == len(attn_cfgs), f'The length ' \
|
||||
f'of attn_cfg {num_attn} is ' \
|
||||
f'not consistent with the number of attention' \
|
||||
f'in operation_order {operation_order}.'
|
||||
|
||||
self.num_attn = num_attn
|
||||
self.operation_order = operation_order
|
||||
self.norm_cfg = norm_cfg
|
||||
self.pre_norm = operation_order[0] == 'norm'
|
||||
self.attentions = ModuleList()
|
||||
|
||||
index = 0
|
||||
for operation_name in operation_order:
|
||||
if operation_name in ['self_attn', 'cross_attn']:
|
||||
if 'batch_first' in attn_cfgs[index]:
|
||||
assert self.batch_first == attn_cfgs[index]['batch_first']
|
||||
else:
|
||||
attn_cfgs[index]['batch_first'] = self.batch_first
|
||||
attention = build_attention(attn_cfgs[index])
|
||||
# Some custom attentions used as `self_attn`
|
||||
# or `cross_attn` can have different behavior.
|
||||
attention.operation_name = operation_name
|
||||
self.attentions.append(attention)
|
||||
index += 1
|
||||
|
||||
self.embed_dims = self.attentions[0].embed_dims
|
||||
|
||||
self.ffns = ModuleList()
|
||||
num_ffns = operation_order.count('ffn')
|
||||
if isinstance(ffn_cfgs, dict):
|
||||
ffn_cfgs = ConfigDict(ffn_cfgs)
|
||||
if isinstance(ffn_cfgs, dict):
|
||||
ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]
|
||||
assert len(ffn_cfgs) == num_ffns
|
||||
for ffn_index in range(num_ffns):
|
||||
if 'embed_dims' not in ffn_cfgs[ffn_index]:
|
||||
ffn_cfgs['embed_dims'] = self.embed_dims
|
||||
else:
|
||||
assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims
|
||||
self.ffns.append(
|
||||
build_feedforward_network(ffn_cfgs[ffn_index],
|
||||
dict(type='FFN')))
|
||||
|
||||
self.norms = ModuleList()
|
||||
num_norms = operation_order.count('norm')
|
||||
for _ in range(num_norms):
|
||||
self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])
|
||||
|
||||
def forward(self,
|
||||
query,
|
||||
key=None,
|
||||
value=None,
|
||||
query_pos=None,
|
||||
key_pos=None,
|
||||
attn_masks=None,
|
||||
query_key_padding_mask=None,
|
||||
key_padding_mask=None,
|
||||
**kwargs):
|
||||
"""Forward function for `TransformerDecoderLayer`.
|
||||
|
||||
**kwargs contains some specific arguments of attentions.
|
||||
|
||||
Args:
|
||||
query (Tensor): The input query with shape
|
||||
[num_queries, bs, embed_dims] if
|
||||
self.batch_first is False, else
|
||||
[bs, num_queries embed_dims].
|
||||
key (Tensor): The key tensor with shape [num_keys, bs,
|
||||
embed_dims] if self.batch_first is False, else
|
||||
[bs, num_keys, embed_dims] .
|
||||
value (Tensor): The value tensor with same shape as `key`.
|
||||
query_pos (Tensor): The positional encoding for `query`.
|
||||
Default: None.
|
||||
key_pos (Tensor): The positional encoding for `key`.
|
||||
Default: None.
|
||||
attn_masks (List[Tensor] | None): 2D Tensor used in
|
||||
calculation of corresponding attention. The length of
|
||||
it should equal to the number of `attention` in
|
||||
`operation_order`. Default: None.
|
||||
query_key_padding_mask (Tensor): ByteTensor for `query`, with
|
||||
shape [bs, num_queries]. Only used in `self_attn` layer.
|
||||
Defaults to None.
|
||||
key_padding_mask (Tensor): ByteTensor for `query`, with
|
||||
shape [bs, num_keys]. Default: None.
|
||||
|
||||
Returns:
|
||||
Tensor: forwarded results with shape [num_queries, bs, embed_dims].
|
||||
"""
|
||||
|
||||
norm_index = 0
|
||||
attn_index = 0
|
||||
ffn_index = 0
|
||||
identity = query
|
||||
if attn_masks is None:
|
||||
attn_masks = [None for _ in range(self.num_attn)]
|
||||
elif isinstance(attn_masks, torch.Tensor):
|
||||
attn_masks = [
|
||||
copy.deepcopy(attn_masks) for _ in range(self.num_attn)
|
||||
]
|
||||
warnings.warn(f'Use same attn_mask in all attentions in '
|
||||
f'{self.__class__.__name__} ')
|
||||
else:
|
||||
assert len(attn_masks) == self.num_attn, f'The length of ' \
|
||||
f'attn_masks {len(attn_masks)} must be equal ' \
|
||||
f'to the number of attention in ' \
|
||||
f'operation_order {self.num_attn}'
|
||||
|
||||
for layer in self.operation_order:
|
||||
if layer == 'self_attn':
|
||||
temp_key = temp_value = query
|
||||
query = self.attentions[attn_index](
|
||||
query,
|
||||
temp_key,
|
||||
temp_value,
|
||||
identity if self.pre_norm else None,
|
||||
query_pos=query_pos,
|
||||
key_pos=query_pos,
|
||||
attn_mask=attn_masks[attn_index],
|
||||
key_padding_mask=query_key_padding_mask,
|
||||
**kwargs)
|
||||
attn_index += 1
|
||||
identity = query
|
||||
|
||||
elif layer == 'norm':
|
||||
query = self.norms[norm_index](query)
|
||||
norm_index += 1
|
||||
|
||||
elif layer == 'cross_attn':
|
||||
query = self.attentions[attn_index](
|
||||
query,
|
||||
key,
|
||||
value,
|
||||
identity if self.pre_norm else None,
|
||||
query_pos=query_pos,
|
||||
key_pos=key_pos,
|
||||
attn_mask=attn_masks[attn_index],
|
||||
key_padding_mask=key_padding_mask,
|
||||
**kwargs)
|
||||
attn_index += 1
|
||||
identity = query
|
||||
|
||||
elif layer == 'ffn':
|
||||
query = self.ffns[ffn_index](
|
||||
query, identity if self.pre_norm else None)
|
||||
ffn_index += 1
|
||||
|
||||
return query
|
||||
|
||||
|
||||
@TRANSFORMER_LAYER_SEQUENCE.register_module()
|
||||
class TransformerLayerSequence(BaseModule):
|
||||
"""Base class for TransformerEncoder and TransformerDecoder in vision
|
||||
transformer.
|
||||
|
||||
As base-class of Encoder and Decoder in vision transformer.
|
||||
Support customization such as specifying different kind
|
||||
of `transformer_layer` in `transformer_coder`.
|
||||
|
||||
Args:
|
||||
transformerlayer (list[obj:`mmcv.ConfigDict`] |
|
||||
obj:`mmcv.ConfigDict`): Config of transformerlayer
|
||||
in TransformerCoder. If it is obj:`mmcv.ConfigDict`,
|
||||
it would be repeated `num_layer` times to a
|
||||
list[`mmcv.ConfigDict`]. Default: None.
|
||||
num_layers (int): The number of `TransformerLayer`. Default: None.
|
||||
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
|
||||
Default: None.
|
||||
"""
|
||||
|
||||
def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None):
|
||||
super(TransformerLayerSequence, self).__init__(init_cfg)
|
||||
if isinstance(transformerlayers, dict):
|
||||
transformerlayers = [
|
||||
copy.deepcopy(transformerlayers) for _ in range(num_layers)
|
||||
]
|
||||
else:
|
||||
assert isinstance(transformerlayers, list) and \
|
||||
len(transformerlayers) == num_layers
|
||||
self.num_layers = num_layers
|
||||
self.layers = ModuleList()
|
||||
for i in range(num_layers):
|
||||
self.layers.append(build_transformer_layer(transformerlayers[i]))
|
||||
self.embed_dims = self.layers[0].embed_dims
|
||||
self.pre_norm = self.layers[0].pre_norm
|
||||
|
||||
def forward(self,
|
||||
query,
|
||||
key,
|
||||
value,
|
||||
query_pos=None,
|
||||
key_pos=None,
|
||||
attn_masks=None,
|
||||
query_key_padding_mask=None,
|
||||
key_padding_mask=None,
|
||||
**kwargs):
|
||||
"""Forward function for `TransformerCoder`.
|
||||
|
||||
Args:
|
||||
query (Tensor): Input query with shape
|
||||
`(num_queries, bs, embed_dims)`.
|
||||
key (Tensor): The key tensor with shape
|
||||
`(num_keys, bs, embed_dims)`.
|
||||
value (Tensor): The value tensor with shape
|
||||
`(num_keys, bs, embed_dims)`.
|
||||
query_pos (Tensor): The positional encoding for `query`.
|
||||
Default: None.
|
||||
key_pos (Tensor): The positional encoding for `key`.
|
||||
Default: None.
|
||||
attn_masks (List[Tensor], optional): Each element is 2D Tensor
|
||||
which is used in calculation of corresponding attention in
|
||||
operation_order. Default: None.
|
||||
query_key_padding_mask (Tensor): ByteTensor for `query`, with
|
||||
shape [bs, num_queries]. Only used in self-attention
|
||||
Default: None.
|
||||
key_padding_mask (Tensor): ByteTensor for `query`, with
|
||||
shape [bs, num_keys]. Default: None.
|
||||
|
||||
Returns:
|
||||
Tensor: results with shape [num_queries, bs, embed_dims].
|
||||
"""
|
||||
for layer in self.layers:
|
||||
query = layer(
|
||||
query,
|
||||
key,
|
||||
value,
|
||||
query_pos=query_pos,
|
||||
key_pos=key_pos,
|
||||
attn_masks=attn_masks,
|
||||
query_key_padding_mask=query_key_padding_mask,
|
||||
key_padding_mask=key_padding_mask,
|
||||
**kwargs)
|
||||
return query
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from ..utils import xavier_init
|
||||
from .registry import UPSAMPLE_LAYERS
|
||||
|
||||
UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample)
|
||||
UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample)
|
||||
|
||||
|
||||
@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle')
|
||||
class PixelShufflePack(nn.Module):
|
||||
"""Pixel Shuffle upsample layer.
|
||||
|
||||
This module packs `F.pixel_shuffle()` and a nn.Conv2d module together to
|
||||
achieve a simple upsampling with pixel shuffle.
|
||||
|
||||
Args:
|
||||
in_channels (int): Number of input channels.
|
||||
out_channels (int): Number of output channels.
|
||||
scale_factor (int): Upsample ratio.
|
||||
upsample_kernel (int): Kernel size of the conv layer to expand the
|
||||
channels.
|
||||
"""
|
||||
|
||||
def __init__(self, in_channels, out_channels, scale_factor,
|
||||
upsample_kernel):
|
||||
super(PixelShufflePack, self).__init__()
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
self.scale_factor = scale_factor
|
||||
self.upsample_kernel = upsample_kernel
|
||||
self.upsample_conv = nn.Conv2d(
|
||||
self.in_channels,
|
||||
self.out_channels * scale_factor * scale_factor,
|
||||
self.upsample_kernel,
|
||||
padding=(self.upsample_kernel - 1) // 2)
|
||||
self.init_weights()
|
||||
|
||||
def init_weights(self):
|
||||
xavier_init(self.upsample_conv, distribution='uniform')
|
||||
|
||||
def forward(self, x):
|
||||
x = self.upsample_conv(x)
|
||||
x = F.pixel_shuffle(x, self.scale_factor)
|
||||
return x
|
||||
|
||||
|
||||
def build_upsample_layer(cfg, *args, **kwargs):
|
||||
"""Build upsample layer.
|
||||
|
||||
Args:
|
||||
cfg (dict): The upsample layer config, which should contain:
|
||||
|
||||
- type (str): Layer type.
|
||||
- scale_factor (int): Upsample ratio, which is not applicable to
|
||||
deconv.
|
||||
- layer args: Args needed to instantiate a upsample layer.
|
||||
args (argument list): Arguments passed to the ``__init__``
|
||||
method of the corresponding conv layer.
|
||||
kwargs (keyword arguments): Keyword arguments passed to the
|
||||
``__init__`` method of the corresponding conv layer.
|
||||
|
||||
Returns:
|
||||
nn.Module: Created upsample layer.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
raise TypeError(f'cfg must be a dict, but got {type(cfg)}')
|
||||
if 'type' not in cfg:
|
||||
raise KeyError(
|
||||
f'the cfg dict must contain the key "type", but got {cfg}')
|
||||
cfg_ = cfg.copy()
|
||||
|
||||
layer_type = cfg_.pop('type')
|
||||
if layer_type not in UPSAMPLE_LAYERS:
|
||||
raise KeyError(f'Unrecognized upsample type {layer_type}')
|
||||
else:
|
||||
upsample = UPSAMPLE_LAYERS.get(layer_type)
|
||||
|
||||
if upsample is nn.Upsample:
|
||||
cfg_['mode'] = layer_type
|
||||
layer = upsample(*args, **kwargs, **cfg_)
|
||||
return layer
|
||||
|
|
@ -0,0 +1,180 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
r"""Modified from https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/wrappers.py # noqa: E501
|
||||
|
||||
Wrap some nn modules to support empty tensor input. Currently, these wrappers
|
||||
are mainly used in mask heads like fcn_mask_head and maskiou_heads since mask
|
||||
heads are trained on only positive RoIs.
|
||||
"""
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn.modules.utils import _pair, _triple
|
||||
|
||||
from .registry import CONV_LAYERS, UPSAMPLE_LAYERS
|
||||
|
||||
if torch.__version__ == 'parrots':
|
||||
TORCH_VERSION = torch.__version__
|
||||
else:
|
||||
# torch.__version__ could be 1.3.1+cu92, we only need the first two
|
||||
# for comparison
|
||||
TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2])
|
||||
|
||||
|
||||
def obsolete_torch_version(torch_version, version_threshold):
|
||||
return torch_version == 'parrots' or torch_version <= version_threshold
|
||||
|
||||
|
||||
class NewEmptyTensorOp(torch.autograd.Function):
|
||||
|
||||
@staticmethod
|
||||
def forward(ctx, x, new_shape):
|
||||
ctx.shape = x.shape
|
||||
return x.new_empty(new_shape)
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad):
|
||||
shape = ctx.shape
|
||||
return NewEmptyTensorOp.apply(grad, shape), None
|
||||
|
||||
|
||||
@CONV_LAYERS.register_module('Conv', force=True)
|
||||
class Conv2d(nn.Conv2d):
|
||||
|
||||
def forward(self, x):
|
||||
if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
|
||||
out_shape = [x.shape[0], self.out_channels]
|
||||
for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size,
|
||||
self.padding, self.stride, self.dilation):
|
||||
o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1
|
||||
out_shape.append(o)
|
||||
empty = NewEmptyTensorOp.apply(x, out_shape)
|
||||
if self.training:
|
||||
# produce dummy gradient to avoid DDP warning.
|
||||
dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
|
||||
return empty + dummy
|
||||
else:
|
||||
return empty
|
||||
|
||||
return super().forward(x)
|
||||
|
||||
|
||||
@CONV_LAYERS.register_module('Conv3d', force=True)
|
||||
class Conv3d(nn.Conv3d):
|
||||
|
||||
def forward(self, x):
|
||||
if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
|
||||
out_shape = [x.shape[0], self.out_channels]
|
||||
for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size,
|
||||
self.padding, self.stride, self.dilation):
|
||||
o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1
|
||||
out_shape.append(o)
|
||||
empty = NewEmptyTensorOp.apply(x, out_shape)
|
||||
if self.training:
|
||||
# produce dummy gradient to avoid DDP warning.
|
||||
dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
|
||||
return empty + dummy
|
||||
else:
|
||||
return empty
|
||||
|
||||
return super().forward(x)
|
||||
|
||||
|
||||
@CONV_LAYERS.register_module()
|
||||
@CONV_LAYERS.register_module('deconv')
|
||||
@UPSAMPLE_LAYERS.register_module('deconv', force=True)
|
||||
class ConvTranspose2d(nn.ConvTranspose2d):
|
||||
|
||||
def forward(self, x):
|
||||
if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
|
||||
out_shape = [x.shape[0], self.out_channels]
|
||||
for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size,
|
||||
self.padding, self.stride,
|
||||
self.dilation, self.output_padding):
|
||||
out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op)
|
||||
empty = NewEmptyTensorOp.apply(x, out_shape)
|
||||
if self.training:
|
||||
# produce dummy gradient to avoid DDP warning.
|
||||
dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
|
||||
return empty + dummy
|
||||
else:
|
||||
return empty
|
||||
|
||||
return super().forward(x)
|
||||
|
||||
|
||||
@CONV_LAYERS.register_module()
|
||||
@CONV_LAYERS.register_module('deconv3d')
|
||||
@UPSAMPLE_LAYERS.register_module('deconv3d', force=True)
|
||||
class ConvTranspose3d(nn.ConvTranspose3d):
|
||||
|
||||
def forward(self, x):
|
||||
if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)):
|
||||
out_shape = [x.shape[0], self.out_channels]
|
||||
for i, k, p, s, d, op in zip(x.shape[-3:], self.kernel_size,
|
||||
self.padding, self.stride,
|
||||
self.dilation, self.output_padding):
|
||||
out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op)
|
||||
empty = NewEmptyTensorOp.apply(x, out_shape)
|
||||
if self.training:
|
||||
# produce dummy gradient to avoid DDP warning.
|
||||
dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
|
||||
return empty + dummy
|
||||
else:
|
||||
return empty
|
||||
|
||||
return super().forward(x)
|
||||
|
||||
|
||||
class MaxPool2d(nn.MaxPool2d):
|
||||
|
||||
def forward(self, x):
|
||||
# PyTorch 1.9 does not support empty tensor inference yet
|
||||
if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)):
|
||||
out_shape = list(x.shape[:2])
|
||||
for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size),
|
||||
_pair(self.padding), _pair(self.stride),
|
||||
_pair(self.dilation)):
|
||||
o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1
|
||||
o = math.ceil(o) if self.ceil_mode else math.floor(o)
|
||||
out_shape.append(o)
|
||||
empty = NewEmptyTensorOp.apply(x, out_shape)
|
||||
return empty
|
||||
|
||||
return super().forward(x)
|
||||
|
||||
|
||||
class MaxPool3d(nn.MaxPool3d):
|
||||
|
||||
def forward(self, x):
|
||||
# PyTorch 1.9 does not support empty tensor inference yet
|
||||
if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)):
|
||||
out_shape = list(x.shape[:2])
|
||||
for i, k, p, s, d in zip(x.shape[-3:], _triple(self.kernel_size),
|
||||
_triple(self.padding),
|
||||
_triple(self.stride),
|
||||
_triple(self.dilation)):
|
||||
o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1
|
||||
o = math.ceil(o) if self.ceil_mode else math.floor(o)
|
||||
out_shape.append(o)
|
||||
empty = NewEmptyTensorOp.apply(x, out_shape)
|
||||
return empty
|
||||
|
||||
return super().forward(x)
|
||||
|
||||
|
||||
class Linear(torch.nn.Linear):
|
||||
|
||||
def forward(self, x):
|
||||
# empty tensor forward of Linear layer is supported in Pytorch 1.6
|
||||
if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)):
|
||||
out_shape = [x.shape[0], self.out_features]
|
||||
empty = NewEmptyTensorOp.apply(x, out_shape)
|
||||
if self.training:
|
||||
# produce dummy gradient to avoid DDP warning.
|
||||
dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
|
||||
return empty + dummy
|
||||
else:
|
||||
return empty
|
||||
|
||||
return super().forward(x)
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from ..runner import Sequential
|
||||
from ..utils import Registry, build_from_cfg
|
||||
|
||||
|
||||
def build_model_from_cfg(cfg, registry, default_args=None):
|
||||
"""Build a PyTorch model from config dict(s). Different from
|
||||
``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built.
|
||||
|
||||
Args:
|
||||
cfg (dict, list[dict]): The config of modules, is is either a config
|
||||
dict or a list of config dicts. If cfg is a list, a
|
||||
the built modules will be wrapped with ``nn.Sequential``.
|
||||
registry (:obj:`Registry`): A registry the module belongs to.
|
||||
default_args (dict, optional): Default arguments to build the module.
|
||||
Defaults to None.
|
||||
|
||||
Returns:
|
||||
nn.Module: A built nn module.
|
||||
"""
|
||||
if isinstance(cfg, list):
|
||||
modules = [
|
||||
build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
|
||||
]
|
||||
return Sequential(*modules)
|
||||
else:
|
||||
return build_from_cfg(cfg, registry, default_args)
|
||||
|
||||
|
||||
MODELS = Registry('model', build_func=build_model_from_cfg)
|
||||
|
|
@ -0,0 +1,316 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import logging
|
||||
|
||||
import torch.nn as nn
|
||||
import torch.utils.checkpoint as cp
|
||||
|
||||
from .utils import constant_init, kaiming_init
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1, dilation=1):
|
||||
"""3x3 convolution with padding."""
|
||||
return nn.Conv2d(
|
||||
in_planes,
|
||||
out_planes,
|
||||
kernel_size=3,
|
||||
stride=stride,
|
||||
padding=dilation,
|
||||
dilation=dilation,
|
||||
bias=False)
|
||||
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
expansion = 1
|
||||
|
||||
def __init__(self,
|
||||
inplanes,
|
||||
planes,
|
||||
stride=1,
|
||||
dilation=1,
|
||||
downsample=None,
|
||||
style='pytorch',
|
||||
with_cp=False):
|
||||
super(BasicBlock, self).__init__()
|
||||
assert style in ['pytorch', 'caffe']
|
||||
self.conv1 = conv3x3(inplanes, planes, stride, dilation)
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.conv2 = conv3x3(planes, planes)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
self.dilation = dilation
|
||||
assert not with_cp
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self,
|
||||
inplanes,
|
||||
planes,
|
||||
stride=1,
|
||||
dilation=1,
|
||||
downsample=None,
|
||||
style='pytorch',
|
||||
with_cp=False):
|
||||
"""Bottleneck block.
|
||||
|
||||
If style is "pytorch", the stride-two layer is the 3x3 conv layer, if
|
||||
it is "caffe", the stride-two layer is the first 1x1 conv layer.
|
||||
"""
|
||||
super(Bottleneck, self).__init__()
|
||||
assert style in ['pytorch', 'caffe']
|
||||
if style == 'pytorch':
|
||||
conv1_stride = 1
|
||||
conv2_stride = stride
|
||||
else:
|
||||
conv1_stride = stride
|
||||
conv2_stride = 1
|
||||
self.conv1 = nn.Conv2d(
|
||||
inplanes, planes, kernel_size=1, stride=conv1_stride, bias=False)
|
||||
self.conv2 = nn.Conv2d(
|
||||
planes,
|
||||
planes,
|
||||
kernel_size=3,
|
||||
stride=conv2_stride,
|
||||
padding=dilation,
|
||||
dilation=dilation,
|
||||
bias=False)
|
||||
|
||||
self.bn1 = nn.BatchNorm2d(planes)
|
||||
self.bn2 = nn.BatchNorm2d(planes)
|
||||
self.conv3 = nn.Conv2d(
|
||||
planes, planes * self.expansion, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
self.dilation = dilation
|
||||
self.with_cp = with_cp
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
def _inner_forward(x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
|
||||
return out
|
||||
|
||||
if self.with_cp and x.requires_grad:
|
||||
out = cp.checkpoint(_inner_forward, x)
|
||||
else:
|
||||
out = _inner_forward(x)
|
||||
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
def make_res_layer(block,
|
||||
inplanes,
|
||||
planes,
|
||||
blocks,
|
||||
stride=1,
|
||||
dilation=1,
|
||||
style='pytorch',
|
||||
with_cp=False):
|
||||
downsample = None
|
||||
if stride != 1 or inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(
|
||||
inplanes,
|
||||
planes * block.expansion,
|
||||
kernel_size=1,
|
||||
stride=stride,
|
||||
bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(
|
||||
block(
|
||||
inplanes,
|
||||
planes,
|
||||
stride,
|
||||
dilation,
|
||||
downsample,
|
||||
style=style,
|
||||
with_cp=with_cp))
|
||||
inplanes = planes * block.expansion
|
||||
for _ in range(1, blocks):
|
||||
layers.append(
|
||||
block(inplanes, planes, 1, dilation, style=style, with_cp=with_cp))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
"""ResNet backbone.
|
||||
|
||||
Args:
|
||||
depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
|
||||
num_stages (int): Resnet stages, normally 4.
|
||||
strides (Sequence[int]): Strides of the first block of each stage.
|
||||
dilations (Sequence[int]): Dilation of each stage.
|
||||
out_indices (Sequence[int]): Output from which stages.
|
||||
style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
|
||||
layer is the 3x3 conv layer, otherwise the stride-two layer is
|
||||
the first 1x1 conv layer.
|
||||
frozen_stages (int): Stages to be frozen (all param fixed). -1 means
|
||||
not freezing any parameters.
|
||||
bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
|
||||
running stats (mean and var).
|
||||
bn_frozen (bool): Whether to freeze weight and bias of BN layers.
|
||||
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
|
||||
memory while slowing down the training speed.
|
||||
"""
|
||||
|
||||
arch_settings = {
|
||||
18: (BasicBlock, (2, 2, 2, 2)),
|
||||
34: (BasicBlock, (3, 4, 6, 3)),
|
||||
50: (Bottleneck, (3, 4, 6, 3)),
|
||||
101: (Bottleneck, (3, 4, 23, 3)),
|
||||
152: (Bottleneck, (3, 8, 36, 3))
|
||||
}
|
||||
|
||||
def __init__(self,
|
||||
depth,
|
||||
num_stages=4,
|
||||
strides=(1, 2, 2, 2),
|
||||
dilations=(1, 1, 1, 1),
|
||||
out_indices=(0, 1, 2, 3),
|
||||
style='pytorch',
|
||||
frozen_stages=-1,
|
||||
bn_eval=True,
|
||||
bn_frozen=False,
|
||||
with_cp=False):
|
||||
super(ResNet, self).__init__()
|
||||
if depth not in self.arch_settings:
|
||||
raise KeyError(f'invalid depth {depth} for resnet')
|
||||
assert num_stages >= 1 and num_stages <= 4
|
||||
block, stage_blocks = self.arch_settings[depth]
|
||||
stage_blocks = stage_blocks[:num_stages]
|
||||
assert len(strides) == len(dilations) == num_stages
|
||||
assert max(out_indices) < num_stages
|
||||
|
||||
self.out_indices = out_indices
|
||||
self.style = style
|
||||
self.frozen_stages = frozen_stages
|
||||
self.bn_eval = bn_eval
|
||||
self.bn_frozen = bn_frozen
|
||||
self.with_cp = with_cp
|
||||
|
||||
self.inplanes = 64
|
||||
self.conv1 = nn.Conv2d(
|
||||
3, 64, kernel_size=7, stride=2, padding=3, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
self.res_layers = []
|
||||
for i, num_blocks in enumerate(stage_blocks):
|
||||
stride = strides[i]
|
||||
dilation = dilations[i]
|
||||
planes = 64 * 2**i
|
||||
res_layer = make_res_layer(
|
||||
block,
|
||||
self.inplanes,
|
||||
planes,
|
||||
num_blocks,
|
||||
stride=stride,
|
||||
dilation=dilation,
|
||||
style=self.style,
|
||||
with_cp=with_cp)
|
||||
self.inplanes = planes * block.expansion
|
||||
layer_name = f'layer{i + 1}'
|
||||
self.add_module(layer_name, res_layer)
|
||||
self.res_layers.append(layer_name)
|
||||
|
||||
self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1)
|
||||
|
||||
def init_weights(self, pretrained=None):
|
||||
if isinstance(pretrained, str):
|
||||
logger = logging.getLogger()
|
||||
from ..runner import load_checkpoint
|
||||
load_checkpoint(self, pretrained, strict=False, logger=logger)
|
||||
elif pretrained is None:
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
kaiming_init(m)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
constant_init(m, 1)
|
||||
else:
|
||||
raise TypeError('pretrained must be a str or None')
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
outs = []
|
||||
for i, layer_name in enumerate(self.res_layers):
|
||||
res_layer = getattr(self, layer_name)
|
||||
x = res_layer(x)
|
||||
if i in self.out_indices:
|
||||
outs.append(x)
|
||||
if len(outs) == 1:
|
||||
return outs[0]
|
||||
else:
|
||||
return tuple(outs)
|
||||
|
||||
def train(self, mode=True):
|
||||
super(ResNet, self).train(mode)
|
||||
if self.bn_eval:
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.BatchNorm2d):
|
||||
m.eval()
|
||||
if self.bn_frozen:
|
||||
for params in m.parameters():
|
||||
params.requires_grad = False
|
||||
if mode and self.frozen_stages >= 0:
|
||||
for param in self.conv1.parameters():
|
||||
param.requires_grad = False
|
||||
for param in self.bn1.parameters():
|
||||
param.requires_grad = False
|
||||
self.bn1.eval()
|
||||
self.bn1.weight.requires_grad = False
|
||||
self.bn1.bias.requires_grad = False
|
||||
for i in range(1, self.frozen_stages + 1):
|
||||
mod = getattr(self, f'layer{i}')
|
||||
mod.eval()
|
||||
for param in mod.parameters():
|
||||
param.requires_grad = False
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .flops_counter import get_model_complexity_info
|
||||
from .fuse_conv_bn import fuse_conv_bn
|
||||
from .sync_bn import revert_sync_batchnorm
|
||||
from .weight_init import (INITIALIZERS, Caffe2XavierInit, ConstantInit,
|
||||
KaimingInit, NormalInit, PretrainedInit,
|
||||
TruncNormalInit, UniformInit, XavierInit,
|
||||
bias_init_with_prob, caffe2_xavier_init,
|
||||
constant_init, initialize, kaiming_init, normal_init,
|
||||
trunc_normal_init, uniform_init, xavier_init)
|
||||
|
||||
__all__ = [
|
||||
'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init',
|
||||
'constant_init', 'kaiming_init', 'normal_init', 'trunc_normal_init',
|
||||
'uniform_init', 'xavier_init', 'fuse_conv_bn', 'initialize',
|
||||
'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit',
|
||||
'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit',
|
||||
'Caffe2XavierInit', 'revert_sync_batchnorm'
|
||||
]
|
||||
|
|
@ -0,0 +1,599 @@
|
|||
# Modified from flops-counter.pytorch by Vladislav Sovrasov
|
||||
# original repo: https://github.com/sovrasov/flops-counter.pytorch
|
||||
|
||||
# MIT License
|
||||
|
||||
# Copyright (c) 2018 Vladislav Sovrasov
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import sys
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import annotator.uniformer.mmcv as mmcv
|
||||
|
||||
|
||||
def get_model_complexity_info(model,
|
||||
input_shape,
|
||||
print_per_layer_stat=True,
|
||||
as_strings=True,
|
||||
input_constructor=None,
|
||||
flush=False,
|
||||
ost=sys.stdout):
|
||||
"""Get complexity information of a model.
|
||||
|
||||
This method can calculate FLOPs and parameter counts of a model with
|
||||
corresponding input shape. It can also print complexity information for
|
||||
each layer in a model.
|
||||
|
||||
Supported layers are listed as below:
|
||||
- Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``.
|
||||
- Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, ``nn.LeakyReLU``,
|
||||
``nn.ReLU6``.
|
||||
- Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``,
|
||||
``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``,
|
||||
``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``,
|
||||
``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``,
|
||||
``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``.
|
||||
- BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``,
|
||||
``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``,
|
||||
``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``.
|
||||
- Linear: ``nn.Linear``.
|
||||
- Deconvolution: ``nn.ConvTranspose2d``.
|
||||
- Upsample: ``nn.Upsample``.
|
||||
|
||||
Args:
|
||||
model (nn.Module): The model for complexity calculation.
|
||||
input_shape (tuple): Input shape used for calculation.
|
||||
print_per_layer_stat (bool): Whether to print complexity information
|
||||
for each layer in a model. Default: True.
|
||||
as_strings (bool): Output FLOPs and params counts in a string form.
|
||||
Default: True.
|
||||
input_constructor (None | callable): If specified, it takes a callable
|
||||
method that generates input. otherwise, it will generate a random
|
||||
tensor with input shape to calculate FLOPs. Default: None.
|
||||
flush (bool): same as that in :func:`print`. Default: False.
|
||||
ost (stream): same as ``file`` param in :func:`print`.
|
||||
Default: sys.stdout.
|
||||
|
||||
Returns:
|
||||
tuple[float | str]: If ``as_strings`` is set to True, it will return
|
||||
FLOPs and parameter counts in a string format. otherwise, it will
|
||||
return those in a float number format.
|
||||
"""
|
||||
assert type(input_shape) is tuple
|
||||
assert len(input_shape) >= 1
|
||||
assert isinstance(model, nn.Module)
|
||||
flops_model = add_flops_counting_methods(model)
|
||||
flops_model.eval()
|
||||
flops_model.start_flops_count()
|
||||
if input_constructor:
|
||||
input = input_constructor(input_shape)
|
||||
_ = flops_model(**input)
|
||||
else:
|
||||
try:
|
||||
batch = torch.ones(()).new_empty(
|
||||
(1, *input_shape),
|
||||
dtype=next(flops_model.parameters()).dtype,
|
||||
device=next(flops_model.parameters()).device)
|
||||
except StopIteration:
|
||||
# Avoid StopIteration for models which have no parameters,
|
||||
# like `nn.Relu()`, `nn.AvgPool2d`, etc.
|
||||
batch = torch.ones(()).new_empty((1, *input_shape))
|
||||
|
||||
_ = flops_model(batch)
|
||||
|
||||
flops_count, params_count = flops_model.compute_average_flops_cost()
|
||||
if print_per_layer_stat:
|
||||
print_model_with_flops(
|
||||
flops_model, flops_count, params_count, ost=ost, flush=flush)
|
||||
flops_model.stop_flops_count()
|
||||
|
||||
if as_strings:
|
||||
return flops_to_string(flops_count), params_to_string(params_count)
|
||||
|
||||
return flops_count, params_count
|
||||
|
||||
|
||||
def flops_to_string(flops, units='GFLOPs', precision=2):
|
||||
"""Convert FLOPs number into a string.
|
||||
|
||||
Note that Here we take a multiply-add counts as one FLOP.
|
||||
|
||||
Args:
|
||||
flops (float): FLOPs number to be converted.
|
||||
units (str | None): Converted FLOPs units. Options are None, 'GFLOPs',
|
||||
'MFLOPs', 'KFLOPs', 'FLOPs'. If set to None, it will automatically
|
||||
choose the most suitable unit for FLOPs. Default: 'GFLOPs'.
|
||||
precision (int): Digit number after the decimal point. Default: 2.
|
||||
|
||||
Returns:
|
||||
str: The converted FLOPs number with units.
|
||||
|
||||
Examples:
|
||||
>>> flops_to_string(1e9)
|
||||
'1.0 GFLOPs'
|
||||
>>> flops_to_string(2e5, 'MFLOPs')
|
||||
'0.2 MFLOPs'
|
||||
>>> flops_to_string(3e-9, None)
|
||||
'3e-09 FLOPs'
|
||||
"""
|
||||
if units is None:
|
||||
if flops // 10**9 > 0:
|
||||
return str(round(flops / 10.**9, precision)) + ' GFLOPs'
|
||||
elif flops // 10**6 > 0:
|
||||
return str(round(flops / 10.**6, precision)) + ' MFLOPs'
|
||||
elif flops // 10**3 > 0:
|
||||
return str(round(flops / 10.**3, precision)) + ' KFLOPs'
|
||||
else:
|
||||
return str(flops) + ' FLOPs'
|
||||
else:
|
||||
if units == 'GFLOPs':
|
||||
return str(round(flops / 10.**9, precision)) + ' ' + units
|
||||
elif units == 'MFLOPs':
|
||||
return str(round(flops / 10.**6, precision)) + ' ' + units
|
||||
elif units == 'KFLOPs':
|
||||
return str(round(flops / 10.**3, precision)) + ' ' + units
|
||||
else:
|
||||
return str(flops) + ' FLOPs'
|
||||
|
||||
|
||||
def params_to_string(num_params, units=None, precision=2):
|
||||
"""Convert parameter number into a string.
|
||||
|
||||
Args:
|
||||
num_params (float): Parameter number to be converted.
|
||||
units (str | None): Converted FLOPs units. Options are None, 'M',
|
||||
'K' and ''. If set to None, it will automatically choose the most
|
||||
suitable unit for Parameter number. Default: None.
|
||||
precision (int): Digit number after the decimal point. Default: 2.
|
||||
|
||||
Returns:
|
||||
str: The converted parameter number with units.
|
||||
|
||||
Examples:
|
||||
>>> params_to_string(1e9)
|
||||
'1000.0 M'
|
||||
>>> params_to_string(2e5)
|
||||
'200.0 k'
|
||||
>>> params_to_string(3e-9)
|
||||
'3e-09'
|
||||
"""
|
||||
if units is None:
|
||||
if num_params // 10**6 > 0:
|
||||
return str(round(num_params / 10**6, precision)) + ' M'
|
||||
elif num_params // 10**3:
|
||||
return str(round(num_params / 10**3, precision)) + ' k'
|
||||
else:
|
||||
return str(num_params)
|
||||
else:
|
||||
if units == 'M':
|
||||
return str(round(num_params / 10.**6, precision)) + ' ' + units
|
||||
elif units == 'K':
|
||||
return str(round(num_params / 10.**3, precision)) + ' ' + units
|
||||
else:
|
||||
return str(num_params)
|
||||
|
||||
|
||||
def print_model_with_flops(model,
|
||||
total_flops,
|
||||
total_params,
|
||||
units='GFLOPs',
|
||||
precision=3,
|
||||
ost=sys.stdout,
|
||||
flush=False):
|
||||
"""Print a model with FLOPs for each layer.
|
||||
|
||||
Args:
|
||||
model (nn.Module): The model to be printed.
|
||||
total_flops (float): Total FLOPs of the model.
|
||||
total_params (float): Total parameter counts of the model.
|
||||
units (str | None): Converted FLOPs units. Default: 'GFLOPs'.
|
||||
precision (int): Digit number after the decimal point. Default: 3.
|
||||
ost (stream): same as `file` param in :func:`print`.
|
||||
Default: sys.stdout.
|
||||
flush (bool): same as that in :func:`print`. Default: False.
|
||||
|
||||
Example:
|
||||
>>> class ExampleModel(nn.Module):
|
||||
|
||||
>>> def __init__(self):
|
||||
>>> super().__init__()
|
||||
>>> self.conv1 = nn.Conv2d(3, 8, 3)
|
||||
>>> self.conv2 = nn.Conv2d(8, 256, 3)
|
||||
>>> self.conv3 = nn.Conv2d(256, 8, 3)
|
||||
>>> self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
|
||||
>>> self.flatten = nn.Flatten()
|
||||
>>> self.fc = nn.Linear(8, 1)
|
||||
|
||||
>>> def forward(self, x):
|
||||
>>> x = self.conv1(x)
|
||||
>>> x = self.conv2(x)
|
||||
>>> x = self.conv3(x)
|
||||
>>> x = self.avg_pool(x)
|
||||
>>> x = self.flatten(x)
|
||||
>>> x = self.fc(x)
|
||||
>>> return x
|
||||
|
||||
>>> model = ExampleModel()
|
||||
>>> x = (3, 16, 16)
|
||||
to print the complexity information state for each layer, you can use
|
||||
>>> get_model_complexity_info(model, x)
|
||||
or directly use
|
||||
>>> print_model_with_flops(model, 4579784.0, 37361)
|
||||
ExampleModel(
|
||||
0.037 M, 100.000% Params, 0.005 GFLOPs, 100.000% FLOPs,
|
||||
(conv1): Conv2d(0.0 M, 0.600% Params, 0.0 GFLOPs, 0.959% FLOPs, 3, 8, kernel_size=(3, 3), stride=(1, 1)) # noqa: E501
|
||||
(conv2): Conv2d(0.019 M, 50.020% Params, 0.003 GFLOPs, 58.760% FLOPs, 8, 256, kernel_size=(3, 3), stride=(1, 1))
|
||||
(conv3): Conv2d(0.018 M, 49.356% Params, 0.002 GFLOPs, 40.264% FLOPs, 256, 8, kernel_size=(3, 3), stride=(1, 1))
|
||||
(avg_pool): AdaptiveAvgPool2d(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.017% FLOPs, output_size=(1, 1))
|
||||
(flatten): Flatten(0.0 M, 0.000% Params, 0.0 GFLOPs, 0.000% FLOPs, )
|
||||
(fc): Linear(0.0 M, 0.024% Params, 0.0 GFLOPs, 0.000% FLOPs, in_features=8, out_features=1, bias=True)
|
||||
)
|
||||
"""
|
||||
|
||||
def accumulate_params(self):
|
||||
if is_supported_instance(self):
|
||||
return self.__params__
|
||||
else:
|
||||
sum = 0
|
||||
for m in self.children():
|
||||
sum += m.accumulate_params()
|
||||
return sum
|
||||
|
||||
def accumulate_flops(self):
|
||||
if is_supported_instance(self):
|
||||
return self.__flops__ / model.__batch_counter__
|
||||
else:
|
||||
sum = 0
|
||||
for m in self.children():
|
||||
sum += m.accumulate_flops()
|
||||
return sum
|
||||
|
||||
def flops_repr(self):
|
||||
accumulated_num_params = self.accumulate_params()
|
||||
accumulated_flops_cost = self.accumulate_flops()
|
||||
return ', '.join([
|
||||
params_to_string(
|
||||
accumulated_num_params, units='M', precision=precision),
|
||||
'{:.3%} Params'.format(accumulated_num_params / total_params),
|
||||
flops_to_string(
|
||||
accumulated_flops_cost, units=units, precision=precision),
|
||||
'{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops),
|
||||
self.original_extra_repr()
|
||||
])
|
||||
|
||||
def add_extra_repr(m):
|
||||
m.accumulate_flops = accumulate_flops.__get__(m)
|
||||
m.accumulate_params = accumulate_params.__get__(m)
|
||||
flops_extra_repr = flops_repr.__get__(m)
|
||||
if m.extra_repr != flops_extra_repr:
|
||||
m.original_extra_repr = m.extra_repr
|
||||
m.extra_repr = flops_extra_repr
|
||||
assert m.extra_repr != m.original_extra_repr
|
||||
|
||||
def del_extra_repr(m):
|
||||
if hasattr(m, 'original_extra_repr'):
|
||||
m.extra_repr = m.original_extra_repr
|
||||
del m.original_extra_repr
|
||||
if hasattr(m, 'accumulate_flops'):
|
||||
del m.accumulate_flops
|
||||
|
||||
model.apply(add_extra_repr)
|
||||
print(model, file=ost, flush=flush)
|
||||
model.apply(del_extra_repr)
|
||||
|
||||
|
||||
def get_model_parameters_number(model):
|
||||
"""Calculate parameter number of a model.
|
||||
|
||||
Args:
|
||||
model (nn.module): The model for parameter number calculation.
|
||||
|
||||
Returns:
|
||||
float: Parameter number of the model.
|
||||
"""
|
||||
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||
return num_params
|
||||
|
||||
|
||||
def add_flops_counting_methods(net_main_module):
|
||||
# adding additional methods to the existing module object,
|
||||
# this is done this way so that each function has access to self object
|
||||
net_main_module.start_flops_count = start_flops_count.__get__(
|
||||
net_main_module)
|
||||
net_main_module.stop_flops_count = stop_flops_count.__get__(
|
||||
net_main_module)
|
||||
net_main_module.reset_flops_count = reset_flops_count.__get__(
|
||||
net_main_module)
|
||||
net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__( # noqa: E501
|
||||
net_main_module)
|
||||
|
||||
net_main_module.reset_flops_count()
|
||||
|
||||
return net_main_module
|
||||
|
||||
|
||||
def compute_average_flops_cost(self):
|
||||
"""Compute average FLOPs cost.
|
||||
|
||||
A method to compute average FLOPs cost, which will be available after
|
||||
`add_flops_counting_methods()` is called on a desired net object.
|
||||
|
||||
Returns:
|
||||
float: Current mean flops consumption per image.
|
||||
"""
|
||||
batches_count = self.__batch_counter__
|
||||
flops_sum = 0
|
||||
for module in self.modules():
|
||||
if is_supported_instance(module):
|
||||
flops_sum += module.__flops__
|
||||
params_sum = get_model_parameters_number(self)
|
||||
return flops_sum / batches_count, params_sum
|
||||
|
||||
|
||||
def start_flops_count(self):
|
||||
"""Activate the computation of mean flops consumption per image.
|
||||
|
||||
A method to activate the computation of mean flops consumption per image.
|
||||
which will be available after ``add_flops_counting_methods()`` is called on
|
||||
a desired net object. It should be called before running the network.
|
||||
"""
|
||||
add_batch_counter_hook_function(self)
|
||||
|
||||
def add_flops_counter_hook_function(module):
|
||||
if is_supported_instance(module):
|
||||
if hasattr(module, '__flops_handle__'):
|
||||
return
|
||||
|
||||
else:
|
||||
handle = module.register_forward_hook(
|
||||
get_modules_mapping()[type(module)])
|
||||
|
||||
module.__flops_handle__ = handle
|
||||
|
||||
self.apply(partial(add_flops_counter_hook_function))
|
||||
|
||||
|
||||
def stop_flops_count(self):
|
||||
"""Stop computing the mean flops consumption per image.
|
||||
|
||||
A method to stop computing the mean flops consumption per image, which will
|
||||
be available after ``add_flops_counting_methods()`` is called on a desired
|
||||
net object. It can be called to pause the computation whenever.
|
||||
"""
|
||||
remove_batch_counter_hook_function(self)
|
||||
self.apply(remove_flops_counter_hook_function)
|
||||
|
||||
|
||||
def reset_flops_count(self):
|
||||
"""Reset statistics computed so far.
|
||||
|
||||
A method to Reset computed statistics, which will be available after
|
||||
`add_flops_counting_methods()` is called on a desired net object.
|
||||
"""
|
||||
add_batch_counter_variables_or_reset(self)
|
||||
self.apply(add_flops_counter_variable_or_reset)
|
||||
|
||||
|
||||
# ---- Internal functions
|
||||
def empty_flops_counter_hook(module, input, output):
|
||||
module.__flops__ += 0
|
||||
|
||||
|
||||
def upsample_flops_counter_hook(module, input, output):
|
||||
output_size = output[0]
|
||||
batch_size = output_size.shape[0]
|
||||
output_elements_count = batch_size
|
||||
for val in output_size.shape[1:]:
|
||||
output_elements_count *= val
|
||||
module.__flops__ += int(output_elements_count)
|
||||
|
||||
|
||||
def relu_flops_counter_hook(module, input, output):
|
||||
active_elements_count = output.numel()
|
||||
module.__flops__ += int(active_elements_count)
|
||||
|
||||
|
||||
def linear_flops_counter_hook(module, input, output):
|
||||
input = input[0]
|
||||
output_last_dim = output.shape[
|
||||
-1] # pytorch checks dimensions, so here we don't care much
|
||||
module.__flops__ += int(np.prod(input.shape) * output_last_dim)
|
||||
|
||||
|
||||
def pool_flops_counter_hook(module, input, output):
|
||||
input = input[0]
|
||||
module.__flops__ += int(np.prod(input.shape))
|
||||
|
||||
|
||||
def norm_flops_counter_hook(module, input, output):
|
||||
input = input[0]
|
||||
|
||||
batch_flops = np.prod(input.shape)
|
||||
if (getattr(module, 'affine', False)
|
||||
or getattr(module, 'elementwise_affine', False)):
|
||||
batch_flops *= 2
|
||||
module.__flops__ += int(batch_flops)
|
||||
|
||||
|
||||
def deconv_flops_counter_hook(conv_module, input, output):
|
||||
# Can have multiple inputs, getting the first one
|
||||
input = input[0]
|
||||
|
||||
batch_size = input.shape[0]
|
||||
input_height, input_width = input.shape[2:]
|
||||
|
||||
kernel_height, kernel_width = conv_module.kernel_size
|
||||
in_channels = conv_module.in_channels
|
||||
out_channels = conv_module.out_channels
|
||||
groups = conv_module.groups
|
||||
|
||||
filters_per_channel = out_channels // groups
|
||||
conv_per_position_flops = (
|
||||
kernel_height * kernel_width * in_channels * filters_per_channel)
|
||||
|
||||
active_elements_count = batch_size * input_height * input_width
|
||||
overall_conv_flops = conv_per_position_flops * active_elements_count
|
||||
bias_flops = 0
|
||||
if conv_module.bias is not None:
|
||||
output_height, output_width = output.shape[2:]
|
||||
bias_flops = out_channels * batch_size * output_height * output_height
|
||||
overall_flops = overall_conv_flops + bias_flops
|
||||
|
||||
conv_module.__flops__ += int(overall_flops)
|
||||
|
||||
|
||||
def conv_flops_counter_hook(conv_module, input, output):
|
||||
# Can have multiple inputs, getting the first one
|
||||
input = input[0]
|
||||
|
||||
batch_size = input.shape[0]
|
||||
output_dims = list(output.shape[2:])
|
||||
|
||||
kernel_dims = list(conv_module.kernel_size)
|
||||
in_channels = conv_module.in_channels
|
||||
out_channels = conv_module.out_channels
|
||||
groups = conv_module.groups
|
||||
|
||||
filters_per_channel = out_channels // groups
|
||||
conv_per_position_flops = int(
|
||||
np.prod(kernel_dims)) * in_channels * filters_per_channel
|
||||
|
||||
active_elements_count = batch_size * int(np.prod(output_dims))
|
||||
|
||||
overall_conv_flops = conv_per_position_flops * active_elements_count
|
||||
|
||||
bias_flops = 0
|
||||
|
||||
if conv_module.bias is not None:
|
||||
|
||||
bias_flops = out_channels * active_elements_count
|
||||
|
||||
overall_flops = overall_conv_flops + bias_flops
|
||||
|
||||
conv_module.__flops__ += int(overall_flops)
|
||||
|
||||
|
||||
def batch_counter_hook(module, input, output):
|
||||
batch_size = 1
|
||||
if len(input) > 0:
|
||||
# Can have multiple inputs, getting the first one
|
||||
input = input[0]
|
||||
batch_size = len(input)
|
||||
else:
|
||||
pass
|
||||
print('Warning! No positional inputs found for a module, '
|
||||
'assuming batch size is 1.')
|
||||
module.__batch_counter__ += batch_size
|
||||
|
||||
|
||||
def add_batch_counter_variables_or_reset(module):
|
||||
|
||||
module.__batch_counter__ = 0
|
||||
|
||||
|
||||
def add_batch_counter_hook_function(module):
|
||||
if hasattr(module, '__batch_counter_handle__'):
|
||||
return
|
||||
|
||||
handle = module.register_forward_hook(batch_counter_hook)
|
||||
module.__batch_counter_handle__ = handle
|
||||
|
||||
|
||||
def remove_batch_counter_hook_function(module):
|
||||
if hasattr(module, '__batch_counter_handle__'):
|
||||
module.__batch_counter_handle__.remove()
|
||||
del module.__batch_counter_handle__
|
||||
|
||||
|
||||
def add_flops_counter_variable_or_reset(module):
|
||||
if is_supported_instance(module):
|
||||
if hasattr(module, '__flops__') or hasattr(module, '__params__'):
|
||||
print('Warning: variables __flops__ or __params__ are already '
|
||||
'defined for the module' + type(module).__name__ +
|
||||
' ptflops can affect your code!')
|
||||
module.__flops__ = 0
|
||||
module.__params__ = get_model_parameters_number(module)
|
||||
|
||||
|
||||
def is_supported_instance(module):
|
||||
if type(module) in get_modules_mapping():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def remove_flops_counter_hook_function(module):
|
||||
if is_supported_instance(module):
|
||||
if hasattr(module, '__flops_handle__'):
|
||||
module.__flops_handle__.remove()
|
||||
del module.__flops_handle__
|
||||
|
||||
|
||||
def get_modules_mapping():
|
||||
return {
|
||||
# convolutions
|
||||
nn.Conv1d: conv_flops_counter_hook,
|
||||
nn.Conv2d: conv_flops_counter_hook,
|
||||
mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook,
|
||||
nn.Conv3d: conv_flops_counter_hook,
|
||||
mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook,
|
||||
# activations
|
||||
nn.ReLU: relu_flops_counter_hook,
|
||||
nn.PReLU: relu_flops_counter_hook,
|
||||
nn.ELU: relu_flops_counter_hook,
|
||||
nn.LeakyReLU: relu_flops_counter_hook,
|
||||
nn.ReLU6: relu_flops_counter_hook,
|
||||
# poolings
|
||||
nn.MaxPool1d: pool_flops_counter_hook,
|
||||
nn.AvgPool1d: pool_flops_counter_hook,
|
||||
nn.AvgPool2d: pool_flops_counter_hook,
|
||||
nn.MaxPool2d: pool_flops_counter_hook,
|
||||
mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook,
|
||||
nn.MaxPool3d: pool_flops_counter_hook,
|
||||
mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook,
|
||||
nn.AvgPool3d: pool_flops_counter_hook,
|
||||
nn.AdaptiveMaxPool1d: pool_flops_counter_hook,
|
||||
nn.AdaptiveAvgPool1d: pool_flops_counter_hook,
|
||||
nn.AdaptiveMaxPool2d: pool_flops_counter_hook,
|
||||
nn.AdaptiveAvgPool2d: pool_flops_counter_hook,
|
||||
nn.AdaptiveMaxPool3d: pool_flops_counter_hook,
|
||||
nn.AdaptiveAvgPool3d: pool_flops_counter_hook,
|
||||
# normalizations
|
||||
nn.BatchNorm1d: norm_flops_counter_hook,
|
||||
nn.BatchNorm2d: norm_flops_counter_hook,
|
||||
nn.BatchNorm3d: norm_flops_counter_hook,
|
||||
nn.GroupNorm: norm_flops_counter_hook,
|
||||
nn.InstanceNorm1d: norm_flops_counter_hook,
|
||||
nn.InstanceNorm2d: norm_flops_counter_hook,
|
||||
nn.InstanceNorm3d: norm_flops_counter_hook,
|
||||
nn.LayerNorm: norm_flops_counter_hook,
|
||||
# FC
|
||||
nn.Linear: linear_flops_counter_hook,
|
||||
mmcv.cnn.bricks.Linear: linear_flops_counter_hook,
|
||||
# Upscale
|
||||
nn.Upsample: upsample_flops_counter_hook,
|
||||
# Deconvolution
|
||||
nn.ConvTranspose2d: deconv_flops_counter_hook,
|
||||
mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook,
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
def _fuse_conv_bn(conv, bn):
|
||||
"""Fuse conv and bn into one module.
|
||||
|
||||
Args:
|
||||
conv (nn.Module): Conv to be fused.
|
||||
bn (nn.Module): BN to be fused.
|
||||
|
||||
Returns:
|
||||
nn.Module: Fused module.
|
||||
"""
|
||||
conv_w = conv.weight
|
||||
conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
|
||||
bn.running_mean)
|
||||
|
||||
factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
|
||||
conv.weight = nn.Parameter(conv_w *
|
||||
factor.reshape([conv.out_channels, 1, 1, 1]))
|
||||
conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
|
||||
return conv
|
||||
|
||||
|
||||
def fuse_conv_bn(module):
|
||||
"""Recursively fuse conv and bn in a module.
|
||||
|
||||
During inference, the functionary of batch norm layers is turned off
|
||||
but only the mean and var alone channels are used, which exposes the
|
||||
chance to fuse it with the preceding conv layers to save computations and
|
||||
simplify network structures.
|
||||
|
||||
Args:
|
||||
module (nn.Module): Module to be fused.
|
||||
|
||||
Returns:
|
||||
nn.Module: Fused module.
|
||||
"""
|
||||
last_conv = None
|
||||
last_conv_name = None
|
||||
|
||||
for name, child in module.named_children():
|
||||
if isinstance(child,
|
||||
(nn.modules.batchnorm._BatchNorm, nn.SyncBatchNorm)):
|
||||
if last_conv is None: # only fuse BN that is after Conv
|
||||
continue
|
||||
fused_conv = _fuse_conv_bn(last_conv, child)
|
||||
module._modules[last_conv_name] = fused_conv
|
||||
# To reduce changes, set BN as Identity instead of deleting it.
|
||||
module._modules[name] = nn.Identity()
|
||||
last_conv = None
|
||||
elif isinstance(child, nn.Conv2d):
|
||||
last_conv = child
|
||||
last_conv_name = name
|
||||
else:
|
||||
fuse_conv_bn(child)
|
||||
return module
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
import torch
|
||||
|
||||
import annotator.uniformer.mmcv as mmcv
|
||||
|
||||
|
||||
class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm):
|
||||
"""A general BatchNorm layer without input dimension check.
|
||||
|
||||
Reproduced from @kapily's work:
|
||||
(https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547)
|
||||
The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc
|
||||
is `_check_input_dim` that is designed for tensor sanity checks.
|
||||
The check has been bypassed in this class for the convenience of converting
|
||||
SyncBatchNorm.
|
||||
"""
|
||||
|
||||
def _check_input_dim(self, input):
|
||||
return
|
||||
|
||||
|
||||
def revert_sync_batchnorm(module):
|
||||
"""Helper function to convert all `SyncBatchNorm` (SyncBN) and
|
||||
`mmcv.ops.sync_bn.SyncBatchNorm`(MMSyncBN) layers in the model to
|
||||
`BatchNormXd` layers.
|
||||
|
||||
Adapted from @kapily's work:
|
||||
(https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547)
|
||||
|
||||
Args:
|
||||
module (nn.Module): The module containing `SyncBatchNorm` layers.
|
||||
|
||||
Returns:
|
||||
module_output: The converted module with `BatchNormXd` layers.
|
||||
"""
|
||||
module_output = module
|
||||
module_checklist = [torch.nn.modules.batchnorm.SyncBatchNorm]
|
||||
if hasattr(mmcv, 'ops'):
|
||||
module_checklist.append(mmcv.ops.SyncBatchNorm)
|
||||
if isinstance(module, tuple(module_checklist)):
|
||||
module_output = _BatchNormXd(module.num_features, module.eps,
|
||||
module.momentum, module.affine,
|
||||
module.track_running_stats)
|
||||
if module.affine:
|
||||
# no_grad() may not be needed here but
|
||||
# just to be consistent with `convert_sync_batchnorm()`
|
||||
with torch.no_grad():
|
||||
module_output.weight = module.weight
|
||||
module_output.bias = module.bias
|
||||
module_output.running_mean = module.running_mean
|
||||
module_output.running_var = module.running_var
|
||||
module_output.num_batches_tracked = module.num_batches_tracked
|
||||
module_output.training = module.training
|
||||
# qconfig exists in quantized models
|
||||
if hasattr(module, 'qconfig'):
|
||||
module_output.qconfig = module.qconfig
|
||||
for name, child in module.named_children():
|
||||
module_output.add_module(name, revert_sync_batchnorm(child))
|
||||
del module
|
||||
return module_output
|
||||
|
|
@ -0,0 +1,684 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import copy
|
||||
import math
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch import Tensor
|
||||
|
||||
from annotator.uniformer.mmcv.utils import Registry, build_from_cfg, get_logger, print_log
|
||||
|
||||
INITIALIZERS = Registry('initializer')
|
||||
|
||||
|
||||
def update_init_info(module, init_info):
|
||||
"""Update the `_params_init_info` in the module if the value of parameters
|
||||
are changed.
|
||||
|
||||
Args:
|
||||
module (obj:`nn.Module`): The module of PyTorch with a user-defined
|
||||
attribute `_params_init_info` which records the initialization
|
||||
information.
|
||||
init_info (str): The string that describes the initialization.
|
||||
"""
|
||||
assert hasattr(
|
||||
module,
|
||||
'_params_init_info'), f'Can not find `_params_init_info` in {module}'
|
||||
for name, param in module.named_parameters():
|
||||
|
||||
assert param in module._params_init_info, (
|
||||
f'Find a new :obj:`Parameter` '
|
||||
f'named `{name}` during executing the '
|
||||
f'`init_weights` of '
|
||||
f'`{module.__class__.__name__}`. '
|
||||
f'Please do not add or '
|
||||
f'replace parameters during executing '
|
||||
f'the `init_weights`. ')
|
||||
|
||||
# The parameter has been changed during executing the
|
||||
# `init_weights` of module
|
||||
mean_value = param.data.mean()
|
||||
if module._params_init_info[param]['tmp_mean_value'] != mean_value:
|
||||
module._params_init_info[param]['init_info'] = init_info
|
||||
module._params_init_info[param]['tmp_mean_value'] = mean_value
|
||||
|
||||
|
||||
def constant_init(module, val, bias=0):
|
||||
if hasattr(module, 'weight') and module.weight is not None:
|
||||
nn.init.constant_(module.weight, val)
|
||||
if hasattr(module, 'bias') and module.bias is not None:
|
||||
nn.init.constant_(module.bias, bias)
|
||||
|
||||
|
||||
def xavier_init(module, gain=1, bias=0, distribution='normal'):
|
||||
assert distribution in ['uniform', 'normal']
|
||||
if hasattr(module, 'weight') and module.weight is not None:
|
||||
if distribution == 'uniform':
|
||||
nn.init.xavier_uniform_(module.weight, gain=gain)
|
||||
else:
|
||||
nn.init.xavier_normal_(module.weight, gain=gain)
|
||||
if hasattr(module, 'bias') and module.bias is not None:
|
||||
nn.init.constant_(module.bias, bias)
|
||||
|
||||
|
||||
def normal_init(module, mean=0, std=1, bias=0):
|
||||
if hasattr(module, 'weight') and module.weight is not None:
|
||||
nn.init.normal_(module.weight, mean, std)
|
||||
if hasattr(module, 'bias') and module.bias is not None:
|
||||
nn.init.constant_(module.bias, bias)
|
||||
|
||||
|
||||
def trunc_normal_init(module: nn.Module,
|
||||
mean: float = 0,
|
||||
std: float = 1,
|
||||
a: float = -2,
|
||||
b: float = 2,
|
||||
bias: float = 0) -> None:
|
||||
if hasattr(module, 'weight') and module.weight is not None:
|
||||
trunc_normal_(module.weight, mean, std, a, b) # type: ignore
|
||||
if hasattr(module, 'bias') and module.bias is not None:
|
||||
nn.init.constant_(module.bias, bias) # type: ignore
|
||||
|
||||
|
||||
def uniform_init(module, a=0, b=1, bias=0):
|
||||
if hasattr(module, 'weight') and module.weight is not None:
|
||||
nn.init.uniform_(module.weight, a, b)
|
||||
if hasattr(module, 'bias') and module.bias is not None:
|
||||
nn.init.constant_(module.bias, bias)
|
||||
|
||||
|
||||
def kaiming_init(module,
|
||||
a=0,
|
||||
mode='fan_out',
|
||||
nonlinearity='relu',
|
||||
bias=0,
|
||||
distribution='normal'):
|
||||
assert distribution in ['uniform', 'normal']
|
||||
if hasattr(module, 'weight') and module.weight is not None:
|
||||
if distribution == 'uniform':
|
||||
nn.init.kaiming_uniform_(
|
||||
module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
|
||||
else:
|
||||
nn.init.kaiming_normal_(
|
||||
module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
|
||||
if hasattr(module, 'bias') and module.bias is not None:
|
||||
nn.init.constant_(module.bias, bias)
|
||||
|
||||
|
||||
def caffe2_xavier_init(module, bias=0):
|
||||
# `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch
|
||||
# Acknowledgment to FAIR's internal code
|
||||
kaiming_init(
|
||||
module,
|
||||
a=1,
|
||||
mode='fan_in',
|
||||
nonlinearity='leaky_relu',
|
||||
bias=bias,
|
||||
distribution='uniform')
|
||||
|
||||
|
||||
def bias_init_with_prob(prior_prob):
|
||||
"""initialize conv/fc bias value according to a given probability value."""
|
||||
bias_init = float(-np.log((1 - prior_prob) / prior_prob))
|
||||
return bias_init
|
||||
|
||||
|
||||
def _get_bases_name(m):
|
||||
return [b.__name__ for b in m.__class__.__bases__]
|
||||
|
||||
|
||||
class BaseInit(object):
|
||||
|
||||
def __init__(self, *, bias=0, bias_prob=None, layer=None):
|
||||
self.wholemodule = False
|
||||
if not isinstance(bias, (int, float)):
|
||||
raise TypeError(f'bias must be a number, but got a {type(bias)}')
|
||||
|
||||
if bias_prob is not None:
|
||||
if not isinstance(bias_prob, float):
|
||||
raise TypeError(f'bias_prob type must be float, \
|
||||
but got {type(bias_prob)}')
|
||||
|
||||
if layer is not None:
|
||||
if not isinstance(layer, (str, list)):
|
||||
raise TypeError(f'layer must be a str or a list of str, \
|
||||
but got a {type(layer)}')
|
||||
else:
|
||||
layer = []
|
||||
|
||||
if bias_prob is not None:
|
||||
self.bias = bias_init_with_prob(bias_prob)
|
||||
else:
|
||||
self.bias = bias
|
||||
self.layer = [layer] if isinstance(layer, str) else layer
|
||||
|
||||
def _get_init_info(self):
|
||||
info = f'{self.__class__.__name__}, bias={self.bias}'
|
||||
return info
|
||||
|
||||
|
||||
@INITIALIZERS.register_module(name='Constant')
|
||||
class ConstantInit(BaseInit):
|
||||
"""Initialize module parameters with constant values.
|
||||
|
||||
Args:
|
||||
val (int | float): the value to fill the weights in the module with
|
||||
bias (int | float): the value to fill the bias. Defaults to 0.
|
||||
bias_prob (float, optional): the probability for bias initialization.
|
||||
Defaults to None.
|
||||
layer (str | list[str], optional): the layer will be initialized.
|
||||
Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self, val, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.val = val
|
||||
|
||||
def __call__(self, module):
|
||||
|
||||
def init(m):
|
||||
if self.wholemodule:
|
||||
constant_init(m, self.val, self.bias)
|
||||
else:
|
||||
layername = m.__class__.__name__
|
||||
basesname = _get_bases_name(m)
|
||||
if len(set(self.layer) & set([layername] + basesname)):
|
||||
constant_init(m, self.val, self.bias)
|
||||
|
||||
module.apply(init)
|
||||
if hasattr(module, '_params_init_info'):
|
||||
update_init_info(module, init_info=self._get_init_info())
|
||||
|
||||
def _get_init_info(self):
|
||||
info = f'{self.__class__.__name__}: val={self.val}, bias={self.bias}'
|
||||
return info
|
||||
|
||||
|
||||
@INITIALIZERS.register_module(name='Xavier')
|
||||
class XavierInit(BaseInit):
|
||||
r"""Initialize module parameters with values according to the method
|
||||
described in `Understanding the difficulty of training deep feedforward
|
||||
neural networks - Glorot, X. & Bengio, Y. (2010).
|
||||
<http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
|
||||
|
||||
Args:
|
||||
gain (int | float): an optional scaling factor. Defaults to 1.
|
||||
bias (int | float): the value to fill the bias. Defaults to 0.
|
||||
bias_prob (float, optional): the probability for bias initialization.
|
||||
Defaults to None.
|
||||
distribution (str): distribution either be ``'normal'``
|
||||
or ``'uniform'``. Defaults to ``'normal'``.
|
||||
layer (str | list[str], optional): the layer will be initialized.
|
||||
Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self, gain=1, distribution='normal', **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.gain = gain
|
||||
self.distribution = distribution
|
||||
|
||||
def __call__(self, module):
|
||||
|
||||
def init(m):
|
||||
if self.wholemodule:
|
||||
xavier_init(m, self.gain, self.bias, self.distribution)
|
||||
else:
|
||||
layername = m.__class__.__name__
|
||||
basesname = _get_bases_name(m)
|
||||
if len(set(self.layer) & set([layername] + basesname)):
|
||||
xavier_init(m, self.gain, self.bias, self.distribution)
|
||||
|
||||
module.apply(init)
|
||||
if hasattr(module, '_params_init_info'):
|
||||
update_init_info(module, init_info=self._get_init_info())
|
||||
|
||||
def _get_init_info(self):
|
||||
info = f'{self.__class__.__name__}: gain={self.gain}, ' \
|
||||
f'distribution={self.distribution}, bias={self.bias}'
|
||||
return info
|
||||
|
||||
|
||||
@INITIALIZERS.register_module(name='Normal')
|
||||
class NormalInit(BaseInit):
|
||||
r"""Initialize module parameters with the values drawn from the normal
|
||||
distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.
|
||||
|
||||
Args:
|
||||
mean (int | float):the mean of the normal distribution. Defaults to 0.
|
||||
std (int | float): the standard deviation of the normal distribution.
|
||||
Defaults to 1.
|
||||
bias (int | float): the value to fill the bias. Defaults to 0.
|
||||
bias_prob (float, optional): the probability for bias initialization.
|
||||
Defaults to None.
|
||||
layer (str | list[str], optional): the layer will be initialized.
|
||||
Defaults to None.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, mean=0, std=1, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
|
||||
def __call__(self, module):
|
||||
|
||||
def init(m):
|
||||
if self.wholemodule:
|
||||
normal_init(m, self.mean, self.std, self.bias)
|
||||
else:
|
||||
layername = m.__class__.__name__
|
||||
basesname = _get_bases_name(m)
|
||||
if len(set(self.layer) & set([layername] + basesname)):
|
||||
normal_init(m, self.mean, self.std, self.bias)
|
||||
|
||||
module.apply(init)
|
||||
if hasattr(module, '_params_init_info'):
|
||||
update_init_info(module, init_info=self._get_init_info())
|
||||
|
||||
def _get_init_info(self):
|
||||
info = f'{self.__class__.__name__}: mean={self.mean},' \
|
||||
f' std={self.std}, bias={self.bias}'
|
||||
return info
|
||||
|
||||
|
||||
@INITIALIZERS.register_module(name='TruncNormal')
|
||||
class TruncNormalInit(BaseInit):
|
||||
r"""Initialize module parameters with the values drawn from the normal
|
||||
distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values
|
||||
outside :math:`[a, b]`.
|
||||
|
||||
Args:
|
||||
mean (float): the mean of the normal distribution. Defaults to 0.
|
||||
std (float): the standard deviation of the normal distribution.
|
||||
Defaults to 1.
|
||||
a (float): The minimum cutoff value.
|
||||
b ( float): The maximum cutoff value.
|
||||
bias (float): the value to fill the bias. Defaults to 0.
|
||||
bias_prob (float, optional): the probability for bias initialization.
|
||||
Defaults to None.
|
||||
layer (str | list[str], optional): the layer will be initialized.
|
||||
Defaults to None.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
mean: float = 0,
|
||||
std: float = 1,
|
||||
a: float = -2,
|
||||
b: float = 2,
|
||||
**kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
self.mean = mean
|
||||
self.std = std
|
||||
self.a = a
|
||||
self.b = b
|
||||
|
||||
def __call__(self, module: nn.Module) -> None:
|
||||
|
||||
def init(m):
|
||||
if self.wholemodule:
|
||||
trunc_normal_init(m, self.mean, self.std, self.a, self.b,
|
||||
self.bias)
|
||||
else:
|
||||
layername = m.__class__.__name__
|
||||
basesname = _get_bases_name(m)
|
||||
if len(set(self.layer) & set([layername] + basesname)):
|
||||
trunc_normal_init(m, self.mean, self.std, self.a, self.b,
|
||||
self.bias)
|
||||
|
||||
module.apply(init)
|
||||
if hasattr(module, '_params_init_info'):
|
||||
update_init_info(module, init_info=self._get_init_info())
|
||||
|
||||
def _get_init_info(self):
|
||||
info = f'{self.__class__.__name__}: a={self.a}, b={self.b},' \
|
||||
f' mean={self.mean}, std={self.std}, bias={self.bias}'
|
||||
return info
|
||||
|
||||
|
||||
@INITIALIZERS.register_module(name='Uniform')
|
||||
class UniformInit(BaseInit):
|
||||
r"""Initialize module parameters with values drawn from the uniform
|
||||
distribution :math:`\mathcal{U}(a, b)`.
|
||||
|
||||
Args:
|
||||
a (int | float): the lower bound of the uniform distribution.
|
||||
Defaults to 0.
|
||||
b (int | float): the upper bound of the uniform distribution.
|
||||
Defaults to 1.
|
||||
bias (int | float): the value to fill the bias. Defaults to 0.
|
||||
bias_prob (float, optional): the probability for bias initialization.
|
||||
Defaults to None.
|
||||
layer (str | list[str], optional): the layer will be initialized.
|
||||
Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self, a=0, b=1, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.a = a
|
||||
self.b = b
|
||||
|
||||
def __call__(self, module):
|
||||
|
||||
def init(m):
|
||||
if self.wholemodule:
|
||||
uniform_init(m, self.a, self.b, self.bias)
|
||||
else:
|
||||
layername = m.__class__.__name__
|
||||
basesname = _get_bases_name(m)
|
||||
if len(set(self.layer) & set([layername] + basesname)):
|
||||
uniform_init(m, self.a, self.b, self.bias)
|
||||
|
||||
module.apply(init)
|
||||
if hasattr(module, '_params_init_info'):
|
||||
update_init_info(module, init_info=self._get_init_info())
|
||||
|
||||
def _get_init_info(self):
|
||||
info = f'{self.__class__.__name__}: a={self.a},' \
|
||||
f' b={self.b}, bias={self.bias}'
|
||||
return info
|
||||
|
||||
|
||||
@INITIALIZERS.register_module(name='Kaiming')
|
||||
class KaimingInit(BaseInit):
|
||||
r"""Initialize module parameters with the values according to the method
|
||||
described in `Delving deep into rectifiers: Surpassing human-level
|
||||
performance on ImageNet classification - He, K. et al. (2015).
|
||||
<https://www.cv-foundation.org/openaccess/content_iccv_2015/
|
||||
papers/He_Delving_Deep_into_ICCV_2015_paper.pdf>`_
|
||||
|
||||
Args:
|
||||
a (int | float): the negative slope of the rectifier used after this
|
||||
layer (only used with ``'leaky_relu'``). Defaults to 0.
|
||||
mode (str): either ``'fan_in'`` or ``'fan_out'``. Choosing
|
||||
``'fan_in'`` preserves the magnitude of the variance of the weights
|
||||
in the forward pass. Choosing ``'fan_out'`` preserves the
|
||||
magnitudes in the backwards pass. Defaults to ``'fan_out'``.
|
||||
nonlinearity (str): the non-linear function (`nn.functional` name),
|
||||
recommended to use only with ``'relu'`` or ``'leaky_relu'`` .
|
||||
Defaults to 'relu'.
|
||||
bias (int | float): the value to fill the bias. Defaults to 0.
|
||||
bias_prob (float, optional): the probability for bias initialization.
|
||||
Defaults to None.
|
||||
distribution (str): distribution either be ``'normal'`` or
|
||||
``'uniform'``. Defaults to ``'normal'``.
|
||||
layer (str | list[str], optional): the layer will be initialized.
|
||||
Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
a=0,
|
||||
mode='fan_out',
|
||||
nonlinearity='relu',
|
||||
distribution='normal',
|
||||
**kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.a = a
|
||||
self.mode = mode
|
||||
self.nonlinearity = nonlinearity
|
||||
self.distribution = distribution
|
||||
|
||||
def __call__(self, module):
|
||||
|
||||
def init(m):
|
||||
if self.wholemodule:
|
||||
kaiming_init(m, self.a, self.mode, self.nonlinearity,
|
||||
self.bias, self.distribution)
|
||||
else:
|
||||
layername = m.__class__.__name__
|
||||
basesname = _get_bases_name(m)
|
||||
if len(set(self.layer) & set([layername] + basesname)):
|
||||
kaiming_init(m, self.a, self.mode, self.nonlinearity,
|
||||
self.bias, self.distribution)
|
||||
|
||||
module.apply(init)
|
||||
if hasattr(module, '_params_init_info'):
|
||||
update_init_info(module, init_info=self._get_init_info())
|
||||
|
||||
def _get_init_info(self):
|
||||
info = f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' \
|
||||
f'nonlinearity={self.nonlinearity}, ' \
|
||||
f'distribution ={self.distribution}, bias={self.bias}'
|
||||
return info
|
||||
|
||||
|
||||
@INITIALIZERS.register_module(name='Caffe2Xavier')
|
||||
class Caffe2XavierInit(KaimingInit):
|
||||
# `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch
|
||||
# Acknowledgment to FAIR's internal code
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(
|
||||
a=1,
|
||||
mode='fan_in',
|
||||
nonlinearity='leaky_relu',
|
||||
distribution='uniform',
|
||||
**kwargs)
|
||||
|
||||
def __call__(self, module):
|
||||
super().__call__(module)
|
||||
|
||||
|
||||
@INITIALIZERS.register_module(name='Pretrained')
|
||||
class PretrainedInit(object):
|
||||
"""Initialize module by loading a pretrained model.
|
||||
|
||||
Args:
|
||||
checkpoint (str): the checkpoint file of the pretrained model should
|
||||
be load.
|
||||
prefix (str, optional): the prefix of a sub-module in the pretrained
|
||||
model. it is for loading a part of the pretrained model to
|
||||
initialize. For example, if we would like to only load the
|
||||
backbone of a detector model, we can set ``prefix='backbone.'``.
|
||||
Defaults to None.
|
||||
map_location (str): map tensors into proper locations.
|
||||
"""
|
||||
|
||||
def __init__(self, checkpoint, prefix=None, map_location=None):
|
||||
self.checkpoint = checkpoint
|
||||
self.prefix = prefix
|
||||
self.map_location = map_location
|
||||
|
||||
def __call__(self, module):
|
||||
from annotator.uniformer.mmcv.runner import (_load_checkpoint_with_prefix, load_checkpoint,
|
||||
load_state_dict)
|
||||
logger = get_logger('mmcv')
|
||||
if self.prefix is None:
|
||||
print_log(f'load model from: {self.checkpoint}', logger=logger)
|
||||
load_checkpoint(
|
||||
module,
|
||||
self.checkpoint,
|
||||
map_location=self.map_location,
|
||||
strict=False,
|
||||
logger=logger)
|
||||
else:
|
||||
print_log(
|
||||
f'load {self.prefix} in model from: {self.checkpoint}',
|
||||
logger=logger)
|
||||
state_dict = _load_checkpoint_with_prefix(
|
||||
self.prefix, self.checkpoint, map_location=self.map_location)
|
||||
load_state_dict(module, state_dict, strict=False, logger=logger)
|
||||
|
||||
if hasattr(module, '_params_init_info'):
|
||||
update_init_info(module, init_info=self._get_init_info())
|
||||
|
||||
def _get_init_info(self):
|
||||
info = f'{self.__class__.__name__}: load from {self.checkpoint}'
|
||||
return info
|
||||
|
||||
|
||||
def _initialize(module, cfg, wholemodule=False):
|
||||
func = build_from_cfg(cfg, INITIALIZERS)
|
||||
# wholemodule flag is for override mode, there is no layer key in override
|
||||
# and initializer will give init values for the whole module with the name
|
||||
# in override.
|
||||
func.wholemodule = wholemodule
|
||||
func(module)
|
||||
|
||||
|
||||
def _initialize_override(module, override, cfg):
|
||||
if not isinstance(override, (dict, list)):
|
||||
raise TypeError(f'override must be a dict or a list of dict, \
|
||||
but got {type(override)}')
|
||||
|
||||
override = [override] if isinstance(override, dict) else override
|
||||
|
||||
for override_ in override:
|
||||
|
||||
cp_override = copy.deepcopy(override_)
|
||||
name = cp_override.pop('name', None)
|
||||
if name is None:
|
||||
raise ValueError('`override` must contain the key "name",'
|
||||
f'but got {cp_override}')
|
||||
# if override only has name key, it means use args in init_cfg
|
||||
if not cp_override:
|
||||
cp_override.update(cfg)
|
||||
# if override has name key and other args except type key, it will
|
||||
# raise error
|
||||
elif 'type' not in cp_override.keys():
|
||||
raise ValueError(
|
||||
f'`override` need "type" key, but got {cp_override}')
|
||||
|
||||
if hasattr(module, name):
|
||||
_initialize(getattr(module, name), cp_override, wholemodule=True)
|
||||
else:
|
||||
raise RuntimeError(f'module did not have attribute {name}, '
|
||||
f'but init_cfg is {cp_override}.')
|
||||
|
||||
|
||||
def initialize(module, init_cfg):
|
||||
"""Initialize a module.
|
||||
|
||||
Args:
|
||||
module (``torch.nn.Module``): the module will be initialized.
|
||||
init_cfg (dict | list[dict]): initialization configuration dict to
|
||||
define initializer. OpenMMLab has implemented 6 initializers
|
||||
including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``,
|
||||
``Kaiming``, and ``Pretrained``.
|
||||
Example:
|
||||
>>> module = nn.Linear(2, 3, bias=True)
|
||||
>>> init_cfg = dict(type='Constant', layer='Linear', val =1 , bias =2)
|
||||
>>> initialize(module, init_cfg)
|
||||
|
||||
>>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2))
|
||||
>>> # define key ``'layer'`` for initializing layer with different
|
||||
>>> # configuration
|
||||
>>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1),
|
||||
dict(type='Constant', layer='Linear', val=2)]
|
||||
>>> initialize(module, init_cfg)
|
||||
|
||||
>>> # define key``'override'`` to initialize some specific part in
|
||||
>>> # module
|
||||
>>> class FooNet(nn.Module):
|
||||
>>> def __init__(self):
|
||||
>>> super().__init__()
|
||||
>>> self.feat = nn.Conv2d(3, 16, 3)
|
||||
>>> self.reg = nn.Conv2d(16, 10, 3)
|
||||
>>> self.cls = nn.Conv2d(16, 5, 3)
|
||||
>>> model = FooNet()
|
||||
>>> init_cfg = dict(type='Constant', val=1, bias=2, layer='Conv2d',
|
||||
>>> override=dict(type='Constant', name='reg', val=3, bias=4))
|
||||
>>> initialize(model, init_cfg)
|
||||
|
||||
>>> model = ResNet(depth=50)
|
||||
>>> # Initialize weights with the pretrained model.
|
||||
>>> init_cfg = dict(type='Pretrained',
|
||||
checkpoint='torchvision://resnet50')
|
||||
>>> initialize(model, init_cfg)
|
||||
|
||||
>>> # Initialize weights of a sub-module with the specific part of
|
||||
>>> # a pretrained model by using "prefix".
|
||||
>>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\
|
||||
>>> 'retinanet_r50_fpn_1x_coco/'\
|
||||
>>> 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth'
|
||||
>>> init_cfg = dict(type='Pretrained',
|
||||
checkpoint=url, prefix='backbone.')
|
||||
"""
|
||||
if not isinstance(init_cfg, (dict, list)):
|
||||
raise TypeError(f'init_cfg must be a dict or a list of dict, \
|
||||
but got {type(init_cfg)}')
|
||||
|
||||
if isinstance(init_cfg, dict):
|
||||
init_cfg = [init_cfg]
|
||||
|
||||
for cfg in init_cfg:
|
||||
# should deeply copy the original config because cfg may be used by
|
||||
# other modules, e.g., one init_cfg shared by multiple bottleneck
|
||||
# blocks, the expected cfg will be changed after pop and will change
|
||||
# the initialization behavior of other modules
|
||||
cp_cfg = copy.deepcopy(cfg)
|
||||
override = cp_cfg.pop('override', None)
|
||||
_initialize(module, cp_cfg)
|
||||
|
||||
if override is not None:
|
||||
cp_cfg.pop('layer', None)
|
||||
_initialize_override(module, override, cp_cfg)
|
||||
else:
|
||||
# All attributes in module have same initialization.
|
||||
pass
|
||||
|
||||
|
||||
def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float,
|
||||
b: float) -> Tensor:
|
||||
# Method based on
|
||||
# https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
|
||||
# Modified from
|
||||
# https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py
|
||||
def norm_cdf(x):
|
||||
# Computes standard normal cumulative distribution function
|
||||
return (1. + math.erf(x / math.sqrt(2.))) / 2.
|
||||
|
||||
if (mean < a - 2 * std) or (mean > b + 2 * std):
|
||||
warnings.warn(
|
||||
'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. '
|
||||
'The distribution of values may be incorrect.',
|
||||
stacklevel=2)
|
||||
|
||||
with torch.no_grad():
|
||||
# Values are generated by using a truncated uniform distribution and
|
||||
# then using the inverse CDF for the normal distribution.
|
||||
# Get upper and lower cdf values
|
||||
lower = norm_cdf((a - mean) / std)
|
||||
upper = norm_cdf((b - mean) / std)
|
||||
|
||||
# Uniformly fill tensor with values from [lower, upper], then translate
|
||||
# to [2lower-1, 2upper-1].
|
||||
tensor.uniform_(2 * lower - 1, 2 * upper - 1)
|
||||
|
||||
# Use inverse cdf transform for normal distribution to get truncated
|
||||
# standard normal
|
||||
tensor.erfinv_()
|
||||
|
||||
# Transform to proper mean, std
|
||||
tensor.mul_(std * math.sqrt(2.))
|
||||
tensor.add_(mean)
|
||||
|
||||
# Clamp to ensure it's in the proper range
|
||||
tensor.clamp_(min=a, max=b)
|
||||
return tensor
|
||||
|
||||
|
||||
def trunc_normal_(tensor: Tensor,
|
||||
mean: float = 0.,
|
||||
std: float = 1.,
|
||||
a: float = -2.,
|
||||
b: float = 2.) -> Tensor:
|
||||
r"""Fills the input Tensor with values drawn from a truncated
|
||||
normal distribution. The values are effectively drawn from the
|
||||
normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
|
||||
with values outside :math:`[a, b]` redrawn until they are within
|
||||
the bounds. The method used for generating the random values works
|
||||
best when :math:`a \leq \text{mean} \leq b`.
|
||||
|
||||
Modified from
|
||||
https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py
|
||||
|
||||
Args:
|
||||
tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`.
|
||||
mean (float): the mean of the normal distribution.
|
||||
std (float): the standard deviation of the normal distribution.
|
||||
a (float): the minimum cutoff value.
|
||||
b (float): the maximum cutoff value.
|
||||
"""
|
||||
return _no_grad_trunc_normal_(tensor, mean, std, a, b)
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import logging
|
||||
|
||||
import torch.nn as nn
|
||||
|
||||
from .utils import constant_init, kaiming_init, normal_init
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, dilation=1):
|
||||
"""3x3 convolution with padding."""
|
||||
return nn.Conv2d(
|
||||
in_planes,
|
||||
out_planes,
|
||||
kernel_size=3,
|
||||
padding=dilation,
|
||||
dilation=dilation)
|
||||
|
||||
|
||||
def make_vgg_layer(inplanes,
|
||||
planes,
|
||||
num_blocks,
|
||||
dilation=1,
|
||||
with_bn=False,
|
||||
ceil_mode=False):
|
||||
layers = []
|
||||
for _ in range(num_blocks):
|
||||
layers.append(conv3x3(inplanes, planes, dilation))
|
||||
if with_bn:
|
||||
layers.append(nn.BatchNorm2d(planes))
|
||||
layers.append(nn.ReLU(inplace=True))
|
||||
inplanes = planes
|
||||
layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode))
|
||||
|
||||
return layers
|
||||
|
||||
|
||||
class VGG(nn.Module):
|
||||
"""VGG backbone.
|
||||
|
||||
Args:
|
||||
depth (int): Depth of vgg, from {11, 13, 16, 19}.
|
||||
with_bn (bool): Use BatchNorm or not.
|
||||
num_classes (int): number of classes for classification.
|
||||
num_stages (int): VGG stages, normally 5.
|
||||
dilations (Sequence[int]): Dilation of each stage.
|
||||
out_indices (Sequence[int]): Output from which stages.
|
||||
frozen_stages (int): Stages to be frozen (all param fixed). -1 means
|
||||
not freezing any parameters.
|
||||
bn_eval (bool): Whether to set BN layers as eval mode, namely, freeze
|
||||
running stats (mean and var).
|
||||
bn_frozen (bool): Whether to freeze weight and bias of BN layers.
|
||||
"""
|
||||
|
||||
arch_settings = {
|
||||
11: (1, 1, 2, 2, 2),
|
||||
13: (2, 2, 2, 2, 2),
|
||||
16: (2, 2, 3, 3, 3),
|
||||
19: (2, 2, 4, 4, 4)
|
||||
}
|
||||
|
||||
def __init__(self,
|
||||
depth,
|
||||
with_bn=False,
|
||||
num_classes=-1,
|
||||
num_stages=5,
|
||||
dilations=(1, 1, 1, 1, 1),
|
||||
out_indices=(0, 1, 2, 3, 4),
|
||||
frozen_stages=-1,
|
||||
bn_eval=True,
|
||||
bn_frozen=False,
|
||||
ceil_mode=False,
|
||||
with_last_pool=True):
|
||||
super(VGG, self).__init__()
|
||||
if depth not in self.arch_settings:
|
||||
raise KeyError(f'invalid depth {depth} for vgg')
|
||||
assert num_stages >= 1 and num_stages <= 5
|
||||
stage_blocks = self.arch_settings[depth]
|
||||
self.stage_blocks = stage_blocks[:num_stages]
|
||||
assert len(dilations) == num_stages
|
||||
assert max(out_indices) <= num_stages
|
||||
|
||||
self.num_classes = num_classes
|
||||
self.out_indices = out_indices
|
||||
self.frozen_stages = frozen_stages
|
||||
self.bn_eval = bn_eval
|
||||
self.bn_frozen = bn_frozen
|
||||
|
||||
self.inplanes = 3
|
||||
start_idx = 0
|
||||
vgg_layers = []
|
||||
self.range_sub_modules = []
|
||||
for i, num_blocks in enumerate(self.stage_blocks):
|
||||
num_modules = num_blocks * (2 + with_bn) + 1
|
||||
end_idx = start_idx + num_modules
|
||||
dilation = dilations[i]
|
||||
planes = 64 * 2**i if i < 4 else 512
|
||||
vgg_layer = make_vgg_layer(
|
||||
self.inplanes,
|
||||
planes,
|
||||
num_blocks,
|
||||
dilation=dilation,
|
||||
with_bn=with_bn,
|
||||
ceil_mode=ceil_mode)
|
||||
vgg_layers.extend(vgg_layer)
|
||||
self.inplanes = planes
|
||||
self.range_sub_modules.append([start_idx, end_idx])
|
||||
start_idx = end_idx
|
||||
if not with_last_pool:
|
||||
vgg_layers.pop(-1)
|
||||
self.range_sub_modules[-1][1] -= 1
|
||||
self.module_name = 'features'
|
||||
self.add_module(self.module_name, nn.Sequential(*vgg_layers))
|
||||
|
||||
if self.num_classes > 0:
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Linear(512 * 7 * 7, 4096),
|
||||
nn.ReLU(True),
|
||||
nn.Dropout(),
|
||||
nn.Linear(4096, 4096),
|
||||
nn.ReLU(True),
|
||||
nn.Dropout(),
|
||||
nn.Linear(4096, num_classes),
|
||||
)
|
||||
|
||||
def init_weights(self, pretrained=None):
|
||||
if isinstance(pretrained, str):
|
||||
logger = logging.getLogger()
|
||||
from ..runner import load_checkpoint
|
||||
load_checkpoint(self, pretrained, strict=False, logger=logger)
|
||||
elif pretrained is None:
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
kaiming_init(m)
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
constant_init(m, 1)
|
||||
elif isinstance(m, nn.Linear):
|
||||
normal_init(m, std=0.01)
|
||||
else:
|
||||
raise TypeError('pretrained must be a str or None')
|
||||
|
||||
def forward(self, x):
|
||||
outs = []
|
||||
vgg_layers = getattr(self, self.module_name)
|
||||
for i in range(len(self.stage_blocks)):
|
||||
for j in range(*self.range_sub_modules[i]):
|
||||
vgg_layer = vgg_layers[j]
|
||||
x = vgg_layer(x)
|
||||
if i in self.out_indices:
|
||||
outs.append(x)
|
||||
if self.num_classes > 0:
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.classifier(x)
|
||||
outs.append(x)
|
||||
if len(outs) == 1:
|
||||
return outs[0]
|
||||
else:
|
||||
return tuple(outs)
|
||||
|
||||
def train(self, mode=True):
|
||||
super(VGG, self).train(mode)
|
||||
if self.bn_eval:
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.BatchNorm2d):
|
||||
m.eval()
|
||||
if self.bn_frozen:
|
||||
for params in m.parameters():
|
||||
params.requires_grad = False
|
||||
vgg_layers = getattr(self, self.module_name)
|
||||
if mode and self.frozen_stages >= 0:
|
||||
for i in range(self.frozen_stages):
|
||||
for j in range(*self.range_sub_modules[i]):
|
||||
mod = vgg_layers[j]
|
||||
mod.eval()
|
||||
for param in mod.parameters():
|
||||
param.requires_grad = False
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .test import (collect_results_cpu, collect_results_gpu, multi_gpu_test,
|
||||
single_gpu_test)
|
||||
|
||||
__all__ = [
|
||||
'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test',
|
||||
'single_gpu_test'
|
||||
]
|
||||
|
|
@ -0,0 +1,202 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
import pickle
|
||||
import shutil
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
import annotator.uniformer.mmcv as mmcv
|
||||
from annotator.uniformer.mmcv.runner import get_dist_info
|
||||
|
||||
|
||||
def single_gpu_test(model, data_loader):
|
||||
"""Test model with a single gpu.
|
||||
|
||||
This method tests model with a single gpu and displays test progress bar.
|
||||
|
||||
Args:
|
||||
model (nn.Module): Model to be tested.
|
||||
data_loader (nn.Dataloader): Pytorch data loader.
|
||||
|
||||
Returns:
|
||||
list: The prediction results.
|
||||
"""
|
||||
model.eval()
|
||||
results = []
|
||||
dataset = data_loader.dataset
|
||||
prog_bar = mmcv.ProgressBar(len(dataset))
|
||||
for data in data_loader:
|
||||
with torch.no_grad():
|
||||
result = model(return_loss=False, **data)
|
||||
results.extend(result)
|
||||
|
||||
# Assume result has the same length of batch_size
|
||||
# refer to https://github.com/open-mmlab/mmcv/issues/985
|
||||
batch_size = len(result)
|
||||
for _ in range(batch_size):
|
||||
prog_bar.update()
|
||||
return results
|
||||
|
||||
|
||||
def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
|
||||
"""Test model with multiple gpus.
|
||||
|
||||
This method tests model with multiple gpus and collects the results
|
||||
under two different modes: gpu and cpu modes. By setting
|
||||
``gpu_collect=True``, it encodes results to gpu tensors and use gpu
|
||||
communication for results collection. On cpu mode it saves the results on
|
||||
different gpus to ``tmpdir`` and collects them by the rank 0 worker.
|
||||
|
||||
Args:
|
||||
model (nn.Module): Model to be tested.
|
||||
data_loader (nn.Dataloader): Pytorch data loader.
|
||||
tmpdir (str): Path of directory to save the temporary results from
|
||||
different gpus under cpu mode.
|
||||
gpu_collect (bool): Option to use either gpu or cpu to collect results.
|
||||
|
||||
Returns:
|
||||
list: The prediction results.
|
||||
"""
|
||||
model.eval()
|
||||
results = []
|
||||
dataset = data_loader.dataset
|
||||
rank, world_size = get_dist_info()
|
||||
if rank == 0:
|
||||
prog_bar = mmcv.ProgressBar(len(dataset))
|
||||
time.sleep(2) # This line can prevent deadlock problem in some cases.
|
||||
for i, data in enumerate(data_loader):
|
||||
with torch.no_grad():
|
||||
result = model(return_loss=False, **data)
|
||||
results.extend(result)
|
||||
|
||||
if rank == 0:
|
||||
batch_size = len(result)
|
||||
batch_size_all = batch_size * world_size
|
||||
if batch_size_all + prog_bar.completed > len(dataset):
|
||||
batch_size_all = len(dataset) - prog_bar.completed
|
||||
for _ in range(batch_size_all):
|
||||
prog_bar.update()
|
||||
|
||||
# collect results from all ranks
|
||||
if gpu_collect:
|
||||
results = collect_results_gpu(results, len(dataset))
|
||||
else:
|
||||
results = collect_results_cpu(results, len(dataset), tmpdir)
|
||||
return results
|
||||
|
||||
|
||||
def collect_results_cpu(result_part, size, tmpdir=None):
|
||||
"""Collect results under cpu mode.
|
||||
|
||||
On cpu mode, this function will save the results on different gpus to
|
||||
``tmpdir`` and collect them by the rank 0 worker.
|
||||
|
||||
Args:
|
||||
result_part (list): Result list containing result parts
|
||||
to be collected.
|
||||
size (int): Size of the results, commonly equal to length of
|
||||
the results.
|
||||
tmpdir (str | None): temporal directory for collected results to
|
||||
store. If set to None, it will create a random temporal directory
|
||||
for it.
|
||||
|
||||
Returns:
|
||||
list: The collected results.
|
||||
"""
|
||||
rank, world_size = get_dist_info()
|
||||
# create a tmp dir if it is not specified
|
||||
if tmpdir is None:
|
||||
MAX_LEN = 512
|
||||
# 32 is whitespace
|
||||
dir_tensor = torch.full((MAX_LEN, ),
|
||||
32,
|
||||
dtype=torch.uint8,
|
||||
device='cuda')
|
||||
if rank == 0:
|
||||
mmcv.mkdir_or_exist('.dist_test')
|
||||
tmpdir = tempfile.mkdtemp(dir='.dist_test')
|
||||
tmpdir = torch.tensor(
|
||||
bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')
|
||||
dir_tensor[:len(tmpdir)] = tmpdir
|
||||
dist.broadcast(dir_tensor, 0)
|
||||
tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
|
||||
else:
|
||||
mmcv.mkdir_or_exist(tmpdir)
|
||||
# dump the part result to the dir
|
||||
mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))
|
||||
dist.barrier()
|
||||
# collect all parts
|
||||
if rank != 0:
|
||||
return None
|
||||
else:
|
||||
# load results of all parts from tmp dir
|
||||
part_list = []
|
||||
for i in range(world_size):
|
||||
part_file = osp.join(tmpdir, f'part_{i}.pkl')
|
||||
part_result = mmcv.load(part_file)
|
||||
# When data is severely insufficient, an empty part_result
|
||||
# on a certain gpu could makes the overall outputs empty.
|
||||
if part_result:
|
||||
part_list.append(part_result)
|
||||
# sort the results
|
||||
ordered_results = []
|
||||
for res in zip(*part_list):
|
||||
ordered_results.extend(list(res))
|
||||
# the dataloader may pad some samples
|
||||
ordered_results = ordered_results[:size]
|
||||
# remove tmp dir
|
||||
shutil.rmtree(tmpdir)
|
||||
return ordered_results
|
||||
|
||||
|
||||
def collect_results_gpu(result_part, size):
|
||||
"""Collect results under gpu mode.
|
||||
|
||||
On gpu mode, this function will encode results to gpu tensors and use gpu
|
||||
communication for results collection.
|
||||
|
||||
Args:
|
||||
result_part (list): Result list containing result parts
|
||||
to be collected.
|
||||
size (int): Size of the results, commonly equal to length of
|
||||
the results.
|
||||
|
||||
Returns:
|
||||
list: The collected results.
|
||||
"""
|
||||
rank, world_size = get_dist_info()
|
||||
# dump result part to tensor with pickle
|
||||
part_tensor = torch.tensor(
|
||||
bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')
|
||||
# gather all result part tensor shape
|
||||
shape_tensor = torch.tensor(part_tensor.shape, device='cuda')
|
||||
shape_list = [shape_tensor.clone() for _ in range(world_size)]
|
||||
dist.all_gather(shape_list, shape_tensor)
|
||||
# padding result part tensor to max length
|
||||
shape_max = torch.tensor(shape_list).max()
|
||||
part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')
|
||||
part_send[:shape_tensor[0]] = part_tensor
|
||||
part_recv_list = [
|
||||
part_tensor.new_zeros(shape_max) for _ in range(world_size)
|
||||
]
|
||||
# gather all result part
|
||||
dist.all_gather(part_recv_list, part_send)
|
||||
|
||||
if rank == 0:
|
||||
part_list = []
|
||||
for recv, shape in zip(part_recv_list, shape_list):
|
||||
part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())
|
||||
# When data is severely insufficient, an empty part_result
|
||||
# on a certain gpu could makes the overall outputs empty.
|
||||
if part_result:
|
||||
part_list.append(part_result)
|
||||
# sort the results
|
||||
ordered_results = []
|
||||
for res in zip(*part_list):
|
||||
ordered_results.extend(list(res))
|
||||
# the dataloader may pad some samples
|
||||
ordered_results = ordered_results[:size]
|
||||
return ordered_results
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .file_client import BaseStorageBackend, FileClient
|
||||
from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
|
||||
from .io import dump, load, register_handler
|
||||
from .parse import dict_from_file, list_from_file
|
||||
|
||||
__all__ = [
|
||||
'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler',
|
||||
'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler',
|
||||
'list_from_file', 'dict_from_file'
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .base import BaseFileHandler
|
||||
from .json_handler import JsonHandler
|
||||
from .pickle_handler import PickleHandler
|
||||
from .yaml_handler import YamlHandler
|
||||
|
||||
__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler']
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
|
||||
class BaseFileHandler(metaclass=ABCMeta):
|
||||
# `str_like` is a flag to indicate whether the type of file object is
|
||||
# str-like object or bytes-like object. Pickle only processes bytes-like
|
||||
# objects but json only processes str-like object. If it is str-like
|
||||
# object, `StringIO` will be used to process the buffer.
|
||||
str_like = True
|
||||
|
||||
@abstractmethod
|
||||
def load_from_fileobj(self, file, **kwargs):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def dump_to_fileobj(self, obj, file, **kwargs):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def dump_to_str(self, obj, **kwargs):
|
||||
pass
|
||||
|
||||
def load_from_path(self, filepath, mode='r', **kwargs):
|
||||
with open(filepath, mode) as f:
|
||||
return self.load_from_fileobj(f, **kwargs)
|
||||
|
||||
def dump_to_path(self, obj, filepath, mode='w', **kwargs):
|
||||
with open(filepath, mode) as f:
|
||||
self.dump_to_fileobj(obj, f, **kwargs)
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .base import BaseFileHandler
|
||||
|
||||
|
||||
def set_default(obj):
|
||||
"""Set default json values for non-serializable values.
|
||||
|
||||
It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list.
|
||||
It also converts ``np.generic`` (including ``np.int32``, ``np.float32``,
|
||||
etc.) into plain numbers of plain python built-in types.
|
||||
"""
|
||||
if isinstance(obj, (set, range)):
|
||||
return list(obj)
|
||||
elif isinstance(obj, np.ndarray):
|
||||
return obj.tolist()
|
||||
elif isinstance(obj, np.generic):
|
||||
return obj.item()
|
||||
raise TypeError(f'{type(obj)} is unsupported for json dump')
|
||||
|
||||
|
||||
class JsonHandler(BaseFileHandler):
|
||||
|
||||
def load_from_fileobj(self, file):
|
||||
return json.load(file)
|
||||
|
||||
def dump_to_fileobj(self, obj, file, **kwargs):
|
||||
kwargs.setdefault('default', set_default)
|
||||
json.dump(obj, file, **kwargs)
|
||||
|
||||
def dump_to_str(self, obj, **kwargs):
|
||||
kwargs.setdefault('default', set_default)
|
||||
return json.dumps(obj, **kwargs)
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import pickle
|
||||
|
||||
from .base import BaseFileHandler
|
||||
|
||||
|
||||
class PickleHandler(BaseFileHandler):
|
||||
|
||||
str_like = False
|
||||
|
||||
def load_from_fileobj(self, file, **kwargs):
|
||||
return pickle.load(file, **kwargs)
|
||||
|
||||
def load_from_path(self, filepath, **kwargs):
|
||||
return super(PickleHandler, self).load_from_path(
|
||||
filepath, mode='rb', **kwargs)
|
||||
|
||||
def dump_to_str(self, obj, **kwargs):
|
||||
kwargs.setdefault('protocol', 2)
|
||||
return pickle.dumps(obj, **kwargs)
|
||||
|
||||
def dump_to_fileobj(self, obj, file, **kwargs):
|
||||
kwargs.setdefault('protocol', 2)
|
||||
pickle.dump(obj, file, **kwargs)
|
||||
|
||||
def dump_to_path(self, obj, filepath, **kwargs):
|
||||
super(PickleHandler, self).dump_to_path(
|
||||
obj, filepath, mode='wb', **kwargs)
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import yaml
|
||||
|
||||
try:
|
||||
from yaml import CLoader as Loader, CDumper as Dumper
|
||||
except ImportError:
|
||||
from yaml import Loader, Dumper
|
||||
|
||||
from .base import BaseFileHandler # isort:skip
|
||||
|
||||
|
||||
class YamlHandler(BaseFileHandler):
|
||||
|
||||
def load_from_fileobj(self, file, **kwargs):
|
||||
kwargs.setdefault('Loader', Loader)
|
||||
return yaml.load(file, **kwargs)
|
||||
|
||||
def dump_to_fileobj(self, obj, file, **kwargs):
|
||||
kwargs.setdefault('Dumper', Dumper)
|
||||
yaml.dump(obj, file, **kwargs)
|
||||
|
||||
def dump_to_str(self, obj, **kwargs):
|
||||
kwargs.setdefault('Dumper', Dumper)
|
||||
return yaml.dump(obj, **kwargs)
|
||||
|
|
@ -0,0 +1,151 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from io import BytesIO, StringIO
|
||||
from pathlib import Path
|
||||
|
||||
from ..utils import is_list_of, is_str
|
||||
from .file_client import FileClient
|
||||
from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
|
||||
|
||||
file_handlers = {
|
||||
'json': JsonHandler(),
|
||||
'yaml': YamlHandler(),
|
||||
'yml': YamlHandler(),
|
||||
'pickle': PickleHandler(),
|
||||
'pkl': PickleHandler()
|
||||
}
|
||||
|
||||
|
||||
def load(file, file_format=None, file_client_args=None, **kwargs):
|
||||
"""Load data from json/yaml/pickle files.
|
||||
|
||||
This method provides a unified api for loading data from serialized files.
|
||||
|
||||
Note:
|
||||
In v1.3.16 and later, ``load`` supports loading data from serialized
|
||||
files those can be storaged in different backends.
|
||||
|
||||
Args:
|
||||
file (str or :obj:`Path` or file-like object): Filename or a file-like
|
||||
object.
|
||||
file_format (str, optional): If not specified, the file format will be
|
||||
inferred from the file extension, otherwise use the specified one.
|
||||
Currently supported formats include "json", "yaml/yml" and
|
||||
"pickle/pkl".
|
||||
file_client_args (dict, optional): Arguments to instantiate a
|
||||
FileClient. See :class:`mmcv.fileio.FileClient` for details.
|
||||
Default: None.
|
||||
|
||||
Examples:
|
||||
>>> load('/path/of/your/file') # file is storaged in disk
|
||||
>>> load('https://path/of/your/file') # file is storaged in Internet
|
||||
>>> load('s3://path/of/your/file') # file is storaged in petrel
|
||||
|
||||
Returns:
|
||||
The content from the file.
|
||||
"""
|
||||
if isinstance(file, Path):
|
||||
file = str(file)
|
||||
if file_format is None and is_str(file):
|
||||
file_format = file.split('.')[-1]
|
||||
if file_format not in file_handlers:
|
||||
raise TypeError(f'Unsupported format: {file_format}')
|
||||
|
||||
handler = file_handlers[file_format]
|
||||
if is_str(file):
|
||||
file_client = FileClient.infer_client(file_client_args, file)
|
||||
if handler.str_like:
|
||||
with StringIO(file_client.get_text(file)) as f:
|
||||
obj = handler.load_from_fileobj(f, **kwargs)
|
||||
else:
|
||||
with BytesIO(file_client.get(file)) as f:
|
||||
obj = handler.load_from_fileobj(f, **kwargs)
|
||||
elif hasattr(file, 'read'):
|
||||
obj = handler.load_from_fileobj(file, **kwargs)
|
||||
else:
|
||||
raise TypeError('"file" must be a filepath str or a file-object')
|
||||
return obj
|
||||
|
||||
|
||||
def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs):
|
||||
"""Dump data to json/yaml/pickle strings or files.
|
||||
|
||||
This method provides a unified api for dumping data as strings or to files,
|
||||
and also supports custom arguments for each file format.
|
||||
|
||||
Note:
|
||||
In v1.3.16 and later, ``dump`` supports dumping data as strings or to
|
||||
files which is saved to different backends.
|
||||
|
||||
Args:
|
||||
obj (any): The python object to be dumped.
|
||||
file (str or :obj:`Path` or file-like object, optional): If not
|
||||
specified, then the object is dumped to a str, otherwise to a file
|
||||
specified by the filename or file-like object.
|
||||
file_format (str, optional): Same as :func:`load`.
|
||||
file_client_args (dict, optional): Arguments to instantiate a
|
||||
FileClient. See :class:`mmcv.fileio.FileClient` for details.
|
||||
Default: None.
|
||||
|
||||
Examples:
|
||||
>>> dump('hello world', '/path/of/your/file') # disk
|
||||
>>> dump('hello world', 's3://path/of/your/file') # ceph or petrel
|
||||
|
||||
Returns:
|
||||
bool: True for success, False otherwise.
|
||||
"""
|
||||
if isinstance(file, Path):
|
||||
file = str(file)
|
||||
if file_format is None:
|
||||
if is_str(file):
|
||||
file_format = file.split('.')[-1]
|
||||
elif file is None:
|
||||
raise ValueError(
|
||||
'file_format must be specified since file is None')
|
||||
if file_format not in file_handlers:
|
||||
raise TypeError(f'Unsupported format: {file_format}')
|
||||
|
||||
handler = file_handlers[file_format]
|
||||
if file is None:
|
||||
return handler.dump_to_str(obj, **kwargs)
|
||||
elif is_str(file):
|
||||
file_client = FileClient.infer_client(file_client_args, file)
|
||||
if handler.str_like:
|
||||
with StringIO() as f:
|
||||
handler.dump_to_fileobj(obj, f, **kwargs)
|
||||
file_client.put_text(f.getvalue(), file)
|
||||
else:
|
||||
with BytesIO() as f:
|
||||
handler.dump_to_fileobj(obj, f, **kwargs)
|
||||
file_client.put(f.getvalue(), file)
|
||||
elif hasattr(file, 'write'):
|
||||
handler.dump_to_fileobj(obj, file, **kwargs)
|
||||
else:
|
||||
raise TypeError('"file" must be a filename str or a file-object')
|
||||
|
||||
|
||||
def _register_handler(handler, file_formats):
|
||||
"""Register a handler for some file extensions.
|
||||
|
||||
Args:
|
||||
handler (:obj:`BaseFileHandler`): Handler to be registered.
|
||||
file_formats (str or list[str]): File formats to be handled by this
|
||||
handler.
|
||||
"""
|
||||
if not isinstance(handler, BaseFileHandler):
|
||||
raise TypeError(
|
||||
f'handler must be a child of BaseFileHandler, not {type(handler)}')
|
||||
if isinstance(file_formats, str):
|
||||
file_formats = [file_formats]
|
||||
if not is_list_of(file_formats, str):
|
||||
raise TypeError('file_formats must be a str or a list of str')
|
||||
for ext in file_formats:
|
||||
file_handlers[ext] = handler
|
||||
|
||||
|
||||
def register_handler(file_formats, **kwargs):
|
||||
|
||||
def wrap(cls):
|
||||
_register_handler(cls(**kwargs), file_formats)
|
||||
return cls
|
||||
|
||||
return wrap
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
from io import StringIO
|
||||
|
||||
from .file_client import FileClient
|
||||
|
||||
|
||||
def list_from_file(filename,
|
||||
prefix='',
|
||||
offset=0,
|
||||
max_num=0,
|
||||
encoding='utf-8',
|
||||
file_client_args=None):
|
||||
"""Load a text file and parse the content as a list of strings.
|
||||
|
||||
Note:
|
||||
In v1.3.16 and later, ``list_from_file`` supports loading a text file
|
||||
which can be storaged in different backends and parsing the content as
|
||||
a list for strings.
|
||||
|
||||
Args:
|
||||
filename (str): Filename.
|
||||
prefix (str): The prefix to be inserted to the beginning of each item.
|
||||
offset (int): The offset of lines.
|
||||
max_num (int): The maximum number of lines to be read,
|
||||
zeros and negatives mean no limitation.
|
||||
encoding (str): Encoding used to open the file. Default utf-8.
|
||||
file_client_args (dict, optional): Arguments to instantiate a
|
||||
FileClient. See :class:`mmcv.fileio.FileClient` for details.
|
||||
Default: None.
|
||||
|
||||
Examples:
|
||||
>>> list_from_file('/path/of/your/file') # disk
|
||||
['hello', 'world']
|
||||
>>> list_from_file('s3://path/of/your/file') # ceph or petrel
|
||||
['hello', 'world']
|
||||
|
||||
Returns:
|
||||
list[str]: A list of strings.
|
||||
"""
|
||||
cnt = 0
|
||||
item_list = []
|
||||
file_client = FileClient.infer_client(file_client_args, filename)
|
||||
with StringIO(file_client.get_text(filename, encoding)) as f:
|
||||
for _ in range(offset):
|
||||
f.readline()
|
||||
for line in f:
|
||||
if 0 < max_num <= cnt:
|
||||
break
|
||||
item_list.append(prefix + line.rstrip('\n\r'))
|
||||
cnt += 1
|
||||
return item_list
|
||||
|
||||
|
||||
def dict_from_file(filename,
|
||||
key_type=str,
|
||||
encoding='utf-8',
|
||||
file_client_args=None):
|
||||
"""Load a text file and parse the content as a dict.
|
||||
|
||||
Each line of the text file will be two or more columns split by
|
||||
whitespaces or tabs. The first column will be parsed as dict keys, and
|
||||
the following columns will be parsed as dict values.
|
||||
|
||||
Note:
|
||||
In v1.3.16 and later, ``dict_from_file`` supports loading a text file
|
||||
which can be storaged in different backends and parsing the content as
|
||||
a dict.
|
||||
|
||||
Args:
|
||||
filename(str): Filename.
|
||||
key_type(type): Type of the dict keys. str is user by default and
|
||||
type conversion will be performed if specified.
|
||||
encoding (str): Encoding used to open the file. Default utf-8.
|
||||
file_client_args (dict, optional): Arguments to instantiate a
|
||||
FileClient. See :class:`mmcv.fileio.FileClient` for details.
|
||||
Default: None.
|
||||
|
||||
Examples:
|
||||
>>> dict_from_file('/path/of/your/file') # disk
|
||||
{'key1': 'value1', 'key2': 'value2'}
|
||||
>>> dict_from_file('s3://path/of/your/file') # ceph or petrel
|
||||
{'key1': 'value1', 'key2': 'value2'}
|
||||
|
||||
Returns:
|
||||
dict: The parsed contents.
|
||||
"""
|
||||
mapping = {}
|
||||
file_client = FileClient.infer_client(file_client_args, filename)
|
||||
with StringIO(file_client.get_text(filename, encoding)) as f:
|
||||
for line in f:
|
||||
items = line.rstrip('\n').split()
|
||||
assert len(items) >= 2
|
||||
key = key_type(items[0])
|
||||
val = items[1:] if len(items) > 2 else items[1]
|
||||
mapping[key] = val
|
||||
return mapping
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr,
|
||||
gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert,
|
||||
rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb)
|
||||
from .geometric import (cutout, imcrop, imflip, imflip_, impad,
|
||||
impad_to_multiple, imrescale, imresize, imresize_like,
|
||||
imresize_to_multiple, imrotate, imshear, imtranslate,
|
||||
rescale_size)
|
||||
from .io import imfrombytes, imread, imwrite, supported_backends, use_backend
|
||||
from .misc import tensor2imgs
|
||||
from .photometric import (adjust_brightness, adjust_color, adjust_contrast,
|
||||
adjust_lighting, adjust_sharpness, auto_contrast,
|
||||
clahe, imdenormalize, imequalize, iminvert,
|
||||
imnormalize, imnormalize_, lut_transform, posterize,
|
||||
solarize)
|
||||
|
||||
__all__ = [
|
||||
'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb',
|
||||
'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale',
|
||||
'imresize', 'imresize_like', 'imresize_to_multiple', 'rescale_size',
|
||||
'imcrop', 'imflip', 'imflip_', 'impad', 'impad_to_multiple', 'imrotate',
|
||||
'imfrombytes', 'imread', 'imwrite', 'supported_backends', 'use_backend',
|
||||
'imdenormalize', 'imnormalize', 'imnormalize_', 'iminvert', 'posterize',
|
||||
'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr',
|
||||
'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize',
|
||||
'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe',
|
||||
'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting'
|
||||
]
|
||||
|
|
@ -0,0 +1,306 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def imconvert(img, src, dst):
|
||||
"""Convert an image from the src colorspace to dst colorspace.
|
||||
|
||||
Args:
|
||||
img (ndarray): The input image.
|
||||
src (str): The source colorspace, e.g., 'rgb', 'hsv'.
|
||||
dst (str): The destination colorspace, e.g., 'rgb', 'hsv'.
|
||||
|
||||
Returns:
|
||||
ndarray: The converted image.
|
||||
"""
|
||||
code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
|
||||
out_img = cv2.cvtColor(img, code)
|
||||
return out_img
|
||||
|
||||
|
||||
def bgr2gray(img, keepdim=False):
|
||||
"""Convert a BGR image to grayscale image.
|
||||
|
||||
Args:
|
||||
img (ndarray): The input image.
|
||||
keepdim (bool): If False (by default), then return the grayscale image
|
||||
with 2 dims, otherwise 3 dims.
|
||||
|
||||
Returns:
|
||||
ndarray: The converted grayscale image.
|
||||
"""
|
||||
out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
if keepdim:
|
||||
out_img = out_img[..., None]
|
||||
return out_img
|
||||
|
||||
|
||||
def rgb2gray(img, keepdim=False):
|
||||
"""Convert a RGB image to grayscale image.
|
||||
|
||||
Args:
|
||||
img (ndarray): The input image.
|
||||
keepdim (bool): If False (by default), then return the grayscale image
|
||||
with 2 dims, otherwise 3 dims.
|
||||
|
||||
Returns:
|
||||
ndarray: The converted grayscale image.
|
||||
"""
|
||||
out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
|
||||
if keepdim:
|
||||
out_img = out_img[..., None]
|
||||
return out_img
|
||||
|
||||
|
||||
def gray2bgr(img):
|
||||
"""Convert a grayscale image to BGR image.
|
||||
|
||||
Args:
|
||||
img (ndarray): The input image.
|
||||
|
||||
Returns:
|
||||
ndarray: The converted BGR image.
|
||||
"""
|
||||
img = img[..., None] if img.ndim == 2 else img
|
||||
out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||
return out_img
|
||||
|
||||
|
||||
def gray2rgb(img):
|
||||
"""Convert a grayscale image to RGB image.
|
||||
|
||||
Args:
|
||||
img (ndarray): The input image.
|
||||
|
||||
Returns:
|
||||
ndarray: The converted RGB image.
|
||||
"""
|
||||
img = img[..., None] if img.ndim == 2 else img
|
||||
out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
|
||||
return out_img
|
||||
|
||||
|
||||
def _convert_input_type_range(img):
|
||||
"""Convert the type and range of the input image.
|
||||
|
||||
It converts the input image to np.float32 type and range of [0, 1].
|
||||
It is mainly used for pre-processing the input image in colorspace
|
||||
conversion functions such as rgb2ycbcr and ycbcr2rgb.
|
||||
|
||||
Args:
|
||||
img (ndarray): The input image. It accepts:
|
||||
1. np.uint8 type with range [0, 255];
|
||||
2. np.float32 type with range [0, 1].
|
||||
|
||||
Returns:
|
||||
(ndarray): The converted image with type of np.float32 and range of
|
||||
[0, 1].
|
||||
"""
|
||||
img_type = img.dtype
|
||||
img = img.astype(np.float32)
|
||||
if img_type == np.float32:
|
||||
pass
|
||||
elif img_type == np.uint8:
|
||||
img /= 255.
|
||||
else:
|
||||
raise TypeError('The img type should be np.float32 or np.uint8, '
|
||||
f'but got {img_type}')
|
||||
return img
|
||||
|
||||
|
||||
def _convert_output_type_range(img, dst_type):
|
||||
"""Convert the type and range of the image according to dst_type.
|
||||
|
||||
It converts the image to desired type and range. If `dst_type` is np.uint8,
|
||||
images will be converted to np.uint8 type with range [0, 255]. If
|
||||
`dst_type` is np.float32, it converts the image to np.float32 type with
|
||||
range [0, 1].
|
||||
It is mainly used for post-processing images in colorspace conversion
|
||||
functions such as rgb2ycbcr and ycbcr2rgb.
|
||||
|
||||
Args:
|
||||
img (ndarray): The image to be converted with np.float32 type and
|
||||
range [0, 255].
|
||||
dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it
|
||||
converts the image to np.uint8 type with range [0, 255]. If
|
||||
dst_type is np.float32, it converts the image to np.float32 type
|
||||
with range [0, 1].
|
||||
|
||||
Returns:
|
||||
(ndarray): The converted image with desired type and range.
|
||||
"""
|
||||
if dst_type not in (np.uint8, np.float32):
|
||||
raise TypeError('The dst_type should be np.float32 or np.uint8, '
|
||||
f'but got {dst_type}')
|
||||
if dst_type == np.uint8:
|
||||
img = img.round()
|
||||
else:
|
||||
img /= 255.
|
||||
return img.astype(dst_type)
|
||||
|
||||
|
||||
def rgb2ycbcr(img, y_only=False):
|
||||
"""Convert a RGB image to YCbCr image.
|
||||
|
||||
This function produces the same results as Matlab's `rgb2ycbcr` function.
|
||||
It implements the ITU-R BT.601 conversion for standard-definition
|
||||
television. See more details in
|
||||
https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
|
||||
|
||||
It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`.
|
||||
In OpenCV, it implements a JPEG conversion. See more details in
|
||||
https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
|
||||
|
||||
Args:
|
||||
img (ndarray): The input image. It accepts:
|
||||
1. np.uint8 type with range [0, 255];
|
||||
2. np.float32 type with range [0, 1].
|
||||
y_only (bool): Whether to only return Y channel. Default: False.
|
||||
|
||||
Returns:
|
||||
ndarray: The converted YCbCr image. The output image has the same type
|
||||
and range as input image.
|
||||
"""
|
||||
img_type = img.dtype
|
||||
img = _convert_input_type_range(img)
|
||||
if y_only:
|
||||
out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0
|
||||
else:
|
||||
out_img = np.matmul(
|
||||
img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
|
||||
[24.966, 112.0, -18.214]]) + [16, 128, 128]
|
||||
out_img = _convert_output_type_range(out_img, img_type)
|
||||
return out_img
|
||||
|
||||
|
||||
def bgr2ycbcr(img, y_only=False):
|
||||
"""Convert a BGR image to YCbCr image.
|
||||
|
||||
The bgr version of rgb2ycbcr.
|
||||
It implements the ITU-R BT.601 conversion for standard-definition
|
||||
television. See more details in
|
||||
https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
|
||||
|
||||
It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`.
|
||||
In OpenCV, it implements a JPEG conversion. See more details in
|
||||
https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
|
||||
|
||||
Args:
|
||||
img (ndarray): The input image. It accepts:
|
||||
1. np.uint8 type with range [0, 255];
|
||||
2. np.float32 type with range [0, 1].
|
||||
y_only (bool): Whether to only return Y channel. Default: False.
|
||||
|
||||
Returns:
|
||||
ndarray: The converted YCbCr image. The output image has the same type
|
||||
and range as input image.
|
||||
"""
|
||||
img_type = img.dtype
|
||||
img = _convert_input_type_range(img)
|
||||
if y_only:
|
||||
out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0
|
||||
else:
|
||||
out_img = np.matmul(
|
||||
img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
|
||||
[65.481, -37.797, 112.0]]) + [16, 128, 128]
|
||||
out_img = _convert_output_type_range(out_img, img_type)
|
||||
return out_img
|
||||
|
||||
|
||||
def ycbcr2rgb(img):
|
||||
"""Convert a YCbCr image to RGB image.
|
||||
|
||||
This function produces the same results as Matlab's ycbcr2rgb function.
|
||||
It implements the ITU-R BT.601 conversion for standard-definition
|
||||
television. See more details in
|
||||
https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
|
||||
|
||||
It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`.
|
||||
In OpenCV, it implements a JPEG conversion. See more details in
|
||||
https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
|
||||
|
||||
Args:
|
||||
img (ndarray): The input image. It accepts:
|
||||
1. np.uint8 type with range [0, 255];
|
||||
2. np.float32 type with range [0, 1].
|
||||
|
||||
Returns:
|
||||
ndarray: The converted RGB image. The output image has the same type
|
||||
and range as input image.
|
||||
"""
|
||||
img_type = img.dtype
|
||||
img = _convert_input_type_range(img) * 255
|
||||
out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
|
||||
[0, -0.00153632, 0.00791071],
|
||||
[0.00625893, -0.00318811, 0]]) * 255.0 + [
|
||||
-222.921, 135.576, -276.836
|
||||
]
|
||||
out_img = _convert_output_type_range(out_img, img_type)
|
||||
return out_img
|
||||
|
||||
|
||||
def ycbcr2bgr(img):
|
||||
"""Convert a YCbCr image to BGR image.
|
||||
|
||||
The bgr version of ycbcr2rgb.
|
||||
It implements the ITU-R BT.601 conversion for standard-definition
|
||||
television. See more details in
|
||||
https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
|
||||
|
||||
It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`.
|
||||
In OpenCV, it implements a JPEG conversion. See more details in
|
||||
https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
|
||||
|
||||
Args:
|
||||
img (ndarray): The input image. It accepts:
|
||||
1. np.uint8 type with range [0, 255];
|
||||
2. np.float32 type with range [0, 1].
|
||||
|
||||
Returns:
|
||||
ndarray: The converted BGR image. The output image has the same type
|
||||
and range as input image.
|
||||
"""
|
||||
img_type = img.dtype
|
||||
img = _convert_input_type_range(img) * 255
|
||||
out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
|
||||
[0.00791071, -0.00153632, 0],
|
||||
[0, -0.00318811, 0.00625893]]) * 255.0 + [
|
||||
-276.836, 135.576, -222.921
|
||||
]
|
||||
out_img = _convert_output_type_range(out_img, img_type)
|
||||
return out_img
|
||||
|
||||
|
||||
def convert_color_factory(src, dst):
|
||||
|
||||
code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
|
||||
|
||||
def convert_color(img):
|
||||
out_img = cv2.cvtColor(img, code)
|
||||
return out_img
|
||||
|
||||
convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()}
|
||||
image.
|
||||
|
||||
Args:
|
||||
img (ndarray or str): The input image.
|
||||
|
||||
Returns:
|
||||
ndarray: The converted {dst.upper()} image.
|
||||
"""
|
||||
|
||||
return convert_color
|
||||
|
||||
|
||||
bgr2rgb = convert_color_factory('bgr', 'rgb')
|
||||
|
||||
rgb2bgr = convert_color_factory('rgb', 'bgr')
|
||||
|
||||
bgr2hsv = convert_color_factory('bgr', 'hsv')
|
||||
|
||||
hsv2bgr = convert_color_factory('hsv', 'bgr')
|
||||
|
||||
bgr2hls = convert_color_factory('bgr', 'hls')
|
||||
|
||||
hls2bgr = convert_color_factory('hls', 'bgr')
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue