mirror of https://github.com/vladmandic/automatic
parent
a1d46b3ecd
commit
8ea9d428d5
|
|
@ -41,6 +41,7 @@ TBD
|
|||
- ui: **themes** add *CTD-NT64Light*, *CTD-NT64Medium* and *CTD-NT64Dark*, thanks @resonantsky
|
||||
- ui: **gallery** add option to auto-refresh gallery, thanks @awsr
|
||||
- **Internal**
|
||||
- `python==3.13` full support
|
||||
- `python==3.14` initial support
|
||||
see [docs](https://vladmandic.github.io/sdnext-docs/Python/) for details
|
||||
- remove hard-dependnecies:
|
||||
|
|
@ -64,9 +65,11 @@ TBD
|
|||
- use `threading` for deferable operatios
|
||||
- use `threading` for io-independent parallel operations
|
||||
- remove requirements: `clip`, `open-clip`
|
||||
- remove `normalbae` pre-processor
|
||||
- captioning part-2, thanks @CalamitousFelicitousness
|
||||
- add new build of `insightface`, thanks @hameerabbasi
|
||||
- **Obsolete**
|
||||
- remove `normalbae` pre-processor
|
||||
- remove `dwpose` pre-processor
|
||||
- **Checks**
|
||||
- switch to `pyproject.toml` for tool configs
|
||||
- update `lint` rules, thanks @awsr
|
||||
|
|
|
|||
|
|
@ -1,149 +0,0 @@
|
|||
# Openpose
|
||||
# Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
|
||||
# 2nd Edited by https://github.com/Hzzone/pytorch-openpose
|
||||
# 3rd Edited by ControlNet
|
||||
# 4th Edited by ControlNet (added face and correct hands)
|
||||
|
||||
from typing import Type, Optional, Union, List
|
||||
import os
|
||||
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from installer import installed, pip
|
||||
from modules.logger import log
|
||||
from modules.control.util import HWC3, resize_image
|
||||
from .draw import draw_bodypose, draw_handpose, draw_facepose
|
||||
checked_ok = False
|
||||
busy = False
|
||||
|
||||
|
||||
def _register_module(self, module: Type, module_name: Optional[Union[str, List[str]]] = None, force: bool = False) -> None:
|
||||
if not callable(module):
|
||||
raise TypeError(f'module must be Callable, but got {type(module)}')
|
||||
if module_name is None:
|
||||
module_name = module.__name__
|
||||
if isinstance(module_name, str):
|
||||
module_name = [module_name]
|
||||
for name in module_name:
|
||||
if not force and name in self._module_dict: # pylint: disable=protected-access
|
||||
pass # patch for 'Adafactor is already registered in optimizer at torch.optim'
|
||||
self._module_dict[name] = module # pylint: disable=protected-access
|
||||
|
||||
|
||||
def check_dependencies():
|
||||
global checked_ok, busy # pylint: disable=global-statement
|
||||
busy = True
|
||||
debug = log.trace if os.environ.get('SD_DWPOSE_DEBUG', None) is not None else lambda *args, **kwargs: None
|
||||
# pip install --upgrade --no-deps --force-reinstall termcolor xtcocotools terminaltables pycocotools munkres shapely openmim==0.3.9 mmengine==0.10.5 mmcv==2.1.0 mmpose==1.3.2 mmdet==3.3.0
|
||||
packages = [
|
||||
'termcolor',
|
||||
'xtcocotools',
|
||||
'terminaltables',
|
||||
'pycocotools',
|
||||
'munkres',
|
||||
'shapely',
|
||||
'openmim==0.3.9',
|
||||
'mmengine==0.10.5',
|
||||
'mmcv==2.1.0',
|
||||
'mmpose==1.3.2',
|
||||
'mmdet==3.3.0',
|
||||
]
|
||||
status = [installed(p, quiet=True) for p in packages]
|
||||
debug(f'DWPose required={packages} status={status}')
|
||||
if not all(status):
|
||||
log.info(f'Installing dependencies: for=dwpose packages={packages}')
|
||||
cmd = 'install --upgrade --no-deps --force-reinstall '
|
||||
pkgs = ' '.join(packages)
|
||||
pip(cmd + pkgs, ignore=False, quiet=True, uv=False)
|
||||
try:
|
||||
import mmcv # pylint: disable=unused-import
|
||||
import mmengine # pylint: disable=unused-import
|
||||
from mmengine.registry import Registry
|
||||
Registry._register_module = _register_module # pylint: disable=protected-access
|
||||
import mmpose # pylint: disable=unused-import
|
||||
import mmdet # pylint: disable=unused-import
|
||||
debug('DWPose import ok')
|
||||
checked_ok = True
|
||||
except Exception as e:
|
||||
log.error(f'DWPose: {e}')
|
||||
# from modules import errors
|
||||
# errors.display(e, 'DWPose')
|
||||
busy = False
|
||||
return checked_ok
|
||||
|
||||
|
||||
def draw_pose(pose, H, W):
|
||||
bodies = pose['bodies']
|
||||
faces = pose['faces']
|
||||
hands = pose['hands']
|
||||
candidate = bodies['candidate']
|
||||
subset = bodies['subset']
|
||||
|
||||
canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
|
||||
canvas = draw_bodypose(canvas, candidate, subset)
|
||||
canvas = draw_handpose(canvas, hands)
|
||||
canvas = draw_facepose(canvas, faces)
|
||||
return canvas
|
||||
|
||||
|
||||
class DWposeDetector:
|
||||
def __init__(self, det_config=None, det_ckpt=None, pose_config=None, pose_ckpt=None, device="cpu"):
|
||||
self.pose_estimation = None
|
||||
if not checked_ok:
|
||||
if not check_dependencies():
|
||||
return
|
||||
Wholebody = None
|
||||
try:
|
||||
from .wholebody import Wholebody
|
||||
except Exception as e:
|
||||
log.error(f'DWPose: {e}')
|
||||
if Wholebody is not None:
|
||||
self.pose_estimation = Wholebody(det_config, det_ckpt, pose_config, pose_ckpt, device)
|
||||
|
||||
def to(self, device):
|
||||
self.pose_estimation.to(device)
|
||||
return self
|
||||
|
||||
def __call__(self, input_image, detect_resolution=512, image_resolution=512, output_type="pil", min_confidence=0.3, **kwargs):
|
||||
if self.pose_estimation is None:
|
||||
log.error("DWPose: not loaded")
|
||||
return None
|
||||
input_image = cv2.cvtColor(np.array(input_image, dtype=np.uint8), cv2.COLOR_RGB2BGR)
|
||||
|
||||
input_image = HWC3(input_image)
|
||||
input_image = resize_image(input_image, detect_resolution)
|
||||
H, W, _C = input_image.shape
|
||||
|
||||
candidate, subset = self.pose_estimation(input_image)
|
||||
if candidate is None:
|
||||
return Image.fromarray(input_image)
|
||||
nums, _keys, locs = candidate.shape
|
||||
candidate[..., 0] /= float(W)
|
||||
candidate[..., 1] /= float(H)
|
||||
body = candidate[:,:18].copy()
|
||||
body = body.reshape(nums*18, locs)
|
||||
score = subset[:,:18]
|
||||
|
||||
for i in range(len(score)):
|
||||
for j in range(len(score[i])):
|
||||
if score[i][j] > min_confidence:
|
||||
score[i][j] = int(18*i+j)
|
||||
else:
|
||||
score[i][j] = -1
|
||||
un_visible = subset < min_confidence
|
||||
candidate[un_visible] = -1
|
||||
_foot = candidate[:,18:24]
|
||||
faces = candidate[:,24:92]
|
||||
hands = candidate[:,92:113]
|
||||
hands = np.vstack([hands, candidate[:,113:]])
|
||||
bodies = dict(candidate=body, subset=score)
|
||||
pose = dict(bodies=bodies, hands=hands, faces=faces)
|
||||
detected_map = draw_pose(pose, H, W)
|
||||
detected_map = HWC3(detected_map)
|
||||
img = resize_image(input_image, image_resolution)
|
||||
H, W, _C = img.shape
|
||||
detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
|
||||
if output_type == "pil":
|
||||
detected_map = Image.fromarray(detected_map)
|
||||
return detected_map
|
||||
|
|
@ -1,257 +0,0 @@
|
|||
# runtime
|
||||
max_epochs = 270
|
||||
stage2_num_epochs = 30
|
||||
base_lr = 4e-3
|
||||
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
|
||||
randomness = dict(seed=21)
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
|
||||
|
||||
# learning rate
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1.0e-5,
|
||||
by_epoch=False,
|
||||
begin=0,
|
||||
end=1000),
|
||||
dict(
|
||||
# use cosine lr from 150 to 300 epoch
|
||||
type='CosineAnnealingLR',
|
||||
eta_min=base_lr * 0.05,
|
||||
begin=max_epochs // 2,
|
||||
end=max_epochs,
|
||||
T_max=max_epochs // 2,
|
||||
by_epoch=True,
|
||||
convert_to_iter_based=True),
|
||||
]
|
||||
|
||||
# automatically scaling LR based on the actual training batch size
|
||||
auto_scale_lr = dict(base_batch_size=512)
|
||||
|
||||
# codec settings
|
||||
codec = dict(
|
||||
type='SimCCLabel',
|
||||
input_size=(288, 384),
|
||||
sigma=(6., 6.93),
|
||||
simcc_split_ratio=2.0,
|
||||
normalize=False,
|
||||
use_dark=False)
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type='TopdownPoseEstimator',
|
||||
data_preprocessor=dict(
|
||||
type='PoseDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True),
|
||||
backbone=dict(
|
||||
_scope_='mmdet',
|
||||
type='CSPNeXt',
|
||||
arch='P5',
|
||||
expand_ratio=0.5,
|
||||
deepen_factor=1.,
|
||||
widen_factor=1.,
|
||||
out_indices=(4, ),
|
||||
channel_attention=True,
|
||||
norm_cfg=dict(type='SyncBN'),
|
||||
act_cfg=dict(type='SiLU'),
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
prefix='backbone.',
|
||||
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
|
||||
'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'
|
||||
)),
|
||||
head=dict(
|
||||
type='RTMCCHead',
|
||||
in_channels=1024,
|
||||
out_channels=133,
|
||||
input_size=codec['input_size'],
|
||||
in_featuremap_size=(9, 12),
|
||||
simcc_split_ratio=codec['simcc_split_ratio'],
|
||||
final_layer_kernel_size=7,
|
||||
gau_cfg=dict(
|
||||
hidden_dims=256,
|
||||
s=128,
|
||||
expansion_factor=2,
|
||||
dropout_rate=0.,
|
||||
drop_path=0.,
|
||||
act_fn='SiLU',
|
||||
use_rel_bias=False,
|
||||
pos_enc=False),
|
||||
loss=dict(
|
||||
type='KLDiscretLoss',
|
||||
use_target_weight=True,
|
||||
beta=10.,
|
||||
label_softmax=True),
|
||||
decoder=codec),
|
||||
test_cfg=dict(flip_test=True, ))
|
||||
|
||||
# base dataset settings
|
||||
dataset_type = 'CocoWholeBodyDataset'
|
||||
data_mode = 'topdown'
|
||||
data_root = '/data/'
|
||||
|
||||
backend_args = dict(backend='local')
|
||||
# backend_args = dict(
|
||||
# backend='petrel',
|
||||
# path_mapping=dict({
|
||||
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
|
||||
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
|
||||
# }))
|
||||
|
||||
# pipelines
|
||||
train_pipeline = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='RandomFlip', direction='horizontal'),
|
||||
dict(type='RandomHalfBody'),
|
||||
dict(
|
||||
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(
|
||||
type='Albumentation',
|
||||
transforms=[
|
||||
dict(type='Blur', p=0.1),
|
||||
dict(type='MedianBlur', p=0.1),
|
||||
dict(
|
||||
type='CoarseDropout',
|
||||
max_holes=1,
|
||||
max_height=0.4,
|
||||
max_width=0.4,
|
||||
min_holes=1,
|
||||
min_height=0.2,
|
||||
min_width=0.2,
|
||||
p=1.0),
|
||||
]),
|
||||
dict(type='GenerateTarget', encoder=codec),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
val_pipeline = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
|
||||
train_pipeline_stage2 = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='RandomFlip', direction='horizontal'),
|
||||
dict(type='RandomHalfBody'),
|
||||
dict(
|
||||
type='RandomBBoxTransform',
|
||||
shift_factor=0.,
|
||||
scale_factor=[0.75, 1.25],
|
||||
rotate_factor=60),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(
|
||||
type='Albumentation',
|
||||
transforms=[
|
||||
dict(type='Blur', p=0.1),
|
||||
dict(type='MedianBlur', p=0.1),
|
||||
dict(
|
||||
type='CoarseDropout',
|
||||
max_holes=1,
|
||||
max_height=0.4,
|
||||
max_width=0.4,
|
||||
min_holes=1,
|
||||
min_height=0.2,
|
||||
min_width=0.2,
|
||||
p=0.5),
|
||||
]),
|
||||
dict(type='GenerateTarget', encoder=codec),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
|
||||
datasets = []
|
||||
dataset_coco=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
|
||||
data_prefix=dict(img='coco/train2017/'),
|
||||
pipeline=[],
|
||||
)
|
||||
datasets.append(dataset_coco)
|
||||
|
||||
scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class',
|
||||
'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow',
|
||||
'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference']
|
||||
|
||||
for i in range(len(scene)):
|
||||
datasets.append(
|
||||
dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json',
|
||||
data_prefix=dict(img='UBody/images/'+scene[i]+'/'),
|
||||
pipeline=[],
|
||||
)
|
||||
)
|
||||
|
||||
# data loaders
|
||||
train_dataloader = dict(
|
||||
batch_size=32,
|
||||
num_workers=10,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='CombinedDataset',
|
||||
metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
|
||||
datasets=datasets,
|
||||
pipeline=train_pipeline,
|
||||
test_mode=False,
|
||||
))
|
||||
val_dataloader = dict(
|
||||
batch_size=32,
|
||||
num_workers=10,
|
||||
persistent_workers=True,
|
||||
drop_last=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
|
||||
bbox_file=f'{data_root}coco/person_detection_results/'
|
||||
'COCO_val2017_detections_AP_H_56_person.json',
|
||||
data_prefix=dict(img='coco/val2017/'),
|
||||
test_mode=True,
|
||||
pipeline=val_pipeline,
|
||||
))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
# hooks
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=max_epochs - stage2_num_epochs,
|
||||
switch_pipeline=train_pipeline_stage2)
|
||||
]
|
||||
|
||||
# evaluators
|
||||
val_evaluator = dict(
|
||||
type='CocoWholeBodyMetric',
|
||||
ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
|
@ -1,259 +0,0 @@
|
|||
# _base_ = ['../../../_base_/default_runtime.py']
|
||||
|
||||
# runtime
|
||||
max_epochs = 270
|
||||
stage2_num_epochs = 30
|
||||
base_lr = 4e-3
|
||||
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
|
||||
randomness = dict(seed=21)
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
|
||||
|
||||
# learning rate
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1.0e-5,
|
||||
by_epoch=False,
|
||||
begin=0,
|
||||
end=1000),
|
||||
dict(
|
||||
# use cosine lr from 150 to 300 epoch
|
||||
type='CosineAnnealingLR',
|
||||
eta_min=base_lr * 0.05,
|
||||
begin=max_epochs // 2,
|
||||
end=max_epochs,
|
||||
T_max=max_epochs // 2,
|
||||
by_epoch=True,
|
||||
convert_to_iter_based=True),
|
||||
]
|
||||
|
||||
# automatically scaling LR based on the actual training batch size
|
||||
auto_scale_lr = dict(base_batch_size=512)
|
||||
|
||||
# codec settings
|
||||
codec = dict(
|
||||
type='SimCCLabel',
|
||||
input_size=(288, 384),
|
||||
sigma=(6., 6.93),
|
||||
simcc_split_ratio=2.0,
|
||||
normalize=False,
|
||||
use_dark=False)
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type='TopdownPoseEstimator',
|
||||
data_preprocessor=dict(
|
||||
type='PoseDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True),
|
||||
backbone=dict(
|
||||
_scope_='mmdet',
|
||||
type='CSPNeXt',
|
||||
arch='P5',
|
||||
expand_ratio=0.5,
|
||||
deepen_factor=1.,
|
||||
widen_factor=1.,
|
||||
out_indices=(4, ),
|
||||
channel_attention=True,
|
||||
norm_cfg=dict(type='SyncBN'),
|
||||
act_cfg=dict(type='SiLU'),
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
prefix='backbone.',
|
||||
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
|
||||
'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'
|
||||
)),
|
||||
head=dict(
|
||||
type='RTMCCHead',
|
||||
in_channels=1024,
|
||||
out_channels=133,
|
||||
input_size=codec['input_size'],
|
||||
in_featuremap_size=(9, 12),
|
||||
simcc_split_ratio=codec['simcc_split_ratio'],
|
||||
final_layer_kernel_size=7,
|
||||
gau_cfg=dict(
|
||||
hidden_dims=256,
|
||||
s=128,
|
||||
expansion_factor=2,
|
||||
dropout_rate=0.,
|
||||
drop_path=0.,
|
||||
act_fn='SiLU',
|
||||
use_rel_bias=False,
|
||||
pos_enc=False),
|
||||
loss=dict(
|
||||
type='KLDiscretLoss',
|
||||
use_target_weight=True,
|
||||
beta=10.,
|
||||
label_softmax=True),
|
||||
decoder=codec),
|
||||
test_cfg=dict(flip_test=True, ))
|
||||
|
||||
# base dataset settings
|
||||
dataset_type = 'CocoWholeBodyDataset'
|
||||
data_mode = 'topdown'
|
||||
data_root = 'data/'
|
||||
|
||||
backend_args = dict(backend='local')
|
||||
# backend_args = dict(
|
||||
# backend='petrel',
|
||||
# path_mapping=dict({
|
||||
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
|
||||
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
|
||||
# }))
|
||||
|
||||
# pipelines
|
||||
train_pipeline = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='RandomFlip', direction='horizontal'),
|
||||
dict(type='RandomHalfBody'),
|
||||
dict(
|
||||
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(
|
||||
type='Albumentation',
|
||||
transforms=[
|
||||
dict(type='Blur', p=0.1),
|
||||
dict(type='MedianBlur', p=0.1),
|
||||
dict(
|
||||
type='CoarseDropout',
|
||||
max_holes=1,
|
||||
max_height=0.4,
|
||||
max_width=0.4,
|
||||
min_holes=1,
|
||||
min_height=0.2,
|
||||
min_width=0.2,
|
||||
p=1.0),
|
||||
]),
|
||||
dict(type='GenerateTarget', encoder=codec),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
val_pipeline = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
|
||||
train_pipeline_stage2 = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='RandomFlip', direction='horizontal'),
|
||||
dict(type='RandomHalfBody'),
|
||||
dict(
|
||||
type='RandomBBoxTransform',
|
||||
shift_factor=0.,
|
||||
scale_factor=[0.75, 1.25],
|
||||
rotate_factor=60),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(
|
||||
type='Albumentation',
|
||||
transforms=[
|
||||
dict(type='Blur', p=0.1),
|
||||
dict(type='MedianBlur', p=0.1),
|
||||
dict(
|
||||
type='CoarseDropout',
|
||||
max_holes=1,
|
||||
max_height=0.4,
|
||||
max_width=0.4,
|
||||
min_holes=1,
|
||||
min_height=0.2,
|
||||
min_width=0.2,
|
||||
p=0.5),
|
||||
]),
|
||||
dict(type='GenerateTarget', encoder=codec),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
|
||||
datasets = []
|
||||
dataset_coco=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
|
||||
data_prefix=dict(img='coco/train2017/'),
|
||||
pipeline=[],
|
||||
)
|
||||
datasets.append(dataset_coco)
|
||||
|
||||
scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class',
|
||||
'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow',
|
||||
'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference']
|
||||
|
||||
for i in range(len(scene)):
|
||||
datasets.append(
|
||||
dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json',
|
||||
data_prefix=dict(img='UBody/images/'+scene[i]+'/'),
|
||||
pipeline=[],
|
||||
)
|
||||
)
|
||||
|
||||
# data loaders
|
||||
train_dataloader = dict(
|
||||
batch_size=32,
|
||||
num_workers=10,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='CombinedDataset',
|
||||
metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
|
||||
datasets=datasets,
|
||||
pipeline=train_pipeline,
|
||||
test_mode=False,
|
||||
))
|
||||
val_dataloader = dict(
|
||||
batch_size=32,
|
||||
num_workers=10,
|
||||
persistent_workers=True,
|
||||
drop_last=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
|
||||
bbox_file=f'{data_root}coco/person_detection_results/'
|
||||
'COCO_val2017_detections_AP_H_56_person.json',
|
||||
data_prefix=dict(img='coco/val2017/'),
|
||||
test_mode=True,
|
||||
pipeline=val_pipeline,
|
||||
))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
# hooks
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=max_epochs - stage2_num_epochs,
|
||||
switch_pipeline=train_pipeline_stage2)
|
||||
]
|
||||
|
||||
# evaluators
|
||||
val_evaluator = dict(
|
||||
type='CocoWholeBodyMetric',
|
||||
ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
|
@ -1,259 +0,0 @@
|
|||
# _base_ = ['../../../_base_/default_runtime.py']
|
||||
|
||||
# runtime
|
||||
max_epochs = 270
|
||||
stage2_num_epochs = 30
|
||||
base_lr = 4e-3
|
||||
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
|
||||
randomness = dict(seed=21)
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
|
||||
|
||||
# learning rate
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1.0e-5,
|
||||
by_epoch=False,
|
||||
begin=0,
|
||||
end=1000),
|
||||
dict(
|
||||
# use cosine lr from 150 to 300 epoch
|
||||
type='CosineAnnealingLR',
|
||||
eta_min=base_lr * 0.05,
|
||||
begin=max_epochs // 2,
|
||||
end=max_epochs,
|
||||
T_max=max_epochs // 2,
|
||||
by_epoch=True,
|
||||
convert_to_iter_based=True),
|
||||
]
|
||||
|
||||
# automatically scaling LR based on the actual training batch size
|
||||
auto_scale_lr = dict(base_batch_size=512)
|
||||
|
||||
# codec settings
|
||||
codec = dict(
|
||||
type='SimCCLabel',
|
||||
input_size=(192, 256),
|
||||
sigma=(4.9, 5.66),
|
||||
simcc_split_ratio=2.0,
|
||||
normalize=False,
|
||||
use_dark=False)
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type='TopdownPoseEstimator',
|
||||
data_preprocessor=dict(
|
||||
type='PoseDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True),
|
||||
backbone=dict(
|
||||
_scope_='mmdet',
|
||||
type='CSPNeXt',
|
||||
arch='P5',
|
||||
expand_ratio=0.5,
|
||||
deepen_factor=0.67,
|
||||
widen_factor=0.75,
|
||||
out_indices=(4, ),
|
||||
channel_attention=True,
|
||||
norm_cfg=dict(type='SyncBN'),
|
||||
act_cfg=dict(type='SiLU'),
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
prefix='backbone.',
|
||||
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
|
||||
'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'
|
||||
)),
|
||||
head=dict(
|
||||
type='RTMCCHead',
|
||||
in_channels=768,
|
||||
out_channels=133,
|
||||
input_size=codec['input_size'],
|
||||
in_featuremap_size=(6, 8),
|
||||
simcc_split_ratio=codec['simcc_split_ratio'],
|
||||
final_layer_kernel_size=7,
|
||||
gau_cfg=dict(
|
||||
hidden_dims=256,
|
||||
s=128,
|
||||
expansion_factor=2,
|
||||
dropout_rate=0.,
|
||||
drop_path=0.,
|
||||
act_fn='SiLU',
|
||||
use_rel_bias=False,
|
||||
pos_enc=False),
|
||||
loss=dict(
|
||||
type='KLDiscretLoss',
|
||||
use_target_weight=True,
|
||||
beta=10.,
|
||||
label_softmax=True),
|
||||
decoder=codec),
|
||||
test_cfg=dict(flip_test=True, ))
|
||||
|
||||
# base dataset settings
|
||||
dataset_type = 'CocoWholeBodyDataset'
|
||||
data_mode = 'topdown'
|
||||
data_root = 'data/'
|
||||
|
||||
backend_args = dict(backend='local')
|
||||
# backend_args = dict(
|
||||
# backend='petrel',
|
||||
# path_mapping=dict({
|
||||
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
|
||||
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
|
||||
# }))
|
||||
|
||||
# pipelines
|
||||
train_pipeline = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='RandomFlip', direction='horizontal'),
|
||||
dict(type='RandomHalfBody'),
|
||||
dict(
|
||||
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(
|
||||
type='Albumentation',
|
||||
transforms=[
|
||||
dict(type='Blur', p=0.1),
|
||||
dict(type='MedianBlur', p=0.1),
|
||||
dict(
|
||||
type='CoarseDropout',
|
||||
max_holes=1,
|
||||
max_height=0.4,
|
||||
max_width=0.4,
|
||||
min_holes=1,
|
||||
min_height=0.2,
|
||||
min_width=0.2,
|
||||
p=1.0),
|
||||
]),
|
||||
dict(type='GenerateTarget', encoder=codec),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
val_pipeline = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
|
||||
train_pipeline_stage2 = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='RandomFlip', direction='horizontal'),
|
||||
dict(type='RandomHalfBody'),
|
||||
dict(
|
||||
type='RandomBBoxTransform',
|
||||
shift_factor=0.,
|
||||
scale_factor=[0.75, 1.25],
|
||||
rotate_factor=60),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(
|
||||
type='Albumentation',
|
||||
transforms=[
|
||||
dict(type='Blur', p=0.1),
|
||||
dict(type='MedianBlur', p=0.1),
|
||||
dict(
|
||||
type='CoarseDropout',
|
||||
max_holes=1,
|
||||
max_height=0.4,
|
||||
max_width=0.4,
|
||||
min_holes=1,
|
||||
min_height=0.2,
|
||||
min_width=0.2,
|
||||
p=0.5),
|
||||
]),
|
||||
dict(type='GenerateTarget', encoder=codec),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
|
||||
datasets = []
|
||||
dataset_coco=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
|
||||
data_prefix=dict(img='coco/train2017/'),
|
||||
pipeline=[],
|
||||
)
|
||||
datasets.append(dataset_coco)
|
||||
|
||||
scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class',
|
||||
'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow',
|
||||
'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference']
|
||||
|
||||
for i in range(len(scene)):
|
||||
datasets.append(
|
||||
dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json',
|
||||
data_prefix=dict(img='UBody/images/'+scene[i]+'/'),
|
||||
pipeline=[],
|
||||
)
|
||||
)
|
||||
|
||||
# data loaders
|
||||
train_dataloader = dict(
|
||||
batch_size=64,
|
||||
num_workers=10,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='CombinedDataset',
|
||||
metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
|
||||
datasets=datasets,
|
||||
pipeline=train_pipeline,
|
||||
test_mode=False,
|
||||
))
|
||||
val_dataloader = dict(
|
||||
batch_size=32,
|
||||
num_workers=10,
|
||||
persistent_workers=True,
|
||||
drop_last=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
|
||||
bbox_file=f'{data_root}coco/person_detection_results/'
|
||||
'COCO_val2017_detections_AP_H_56_person.json',
|
||||
data_prefix=dict(img='coco/val2017/'),
|
||||
test_mode=True,
|
||||
pipeline=val_pipeline,
|
||||
))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
# hooks
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=max_epochs - stage2_num_epochs,
|
||||
switch_pipeline=train_pipeline_stage2)
|
||||
]
|
||||
|
||||
# evaluators
|
||||
val_evaluator = dict(
|
||||
type='CocoWholeBodyMetric',
|
||||
ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
|
@ -1,259 +0,0 @@
|
|||
# _base_ = ['../../../_base_/default_runtime.py']
|
||||
|
||||
# runtime
|
||||
max_epochs = 270
|
||||
stage2_num_epochs = 30
|
||||
base_lr = 4e-3
|
||||
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
|
||||
randomness = dict(seed=21)
|
||||
|
||||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
|
||||
paramwise_cfg=dict(
|
||||
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
|
||||
|
||||
# learning rate
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='LinearLR',
|
||||
start_factor=1.0e-5,
|
||||
by_epoch=False,
|
||||
begin=0,
|
||||
end=1000),
|
||||
dict(
|
||||
# use cosine lr from 150 to 300 epoch
|
||||
type='CosineAnnealingLR',
|
||||
eta_min=base_lr * 0.05,
|
||||
begin=max_epochs // 2,
|
||||
end=max_epochs,
|
||||
T_max=max_epochs // 2,
|
||||
by_epoch=True,
|
||||
convert_to_iter_based=True),
|
||||
]
|
||||
|
||||
# automatically scaling LR based on the actual training batch size
|
||||
auto_scale_lr = dict(base_batch_size=512)
|
||||
|
||||
# codec settings
|
||||
codec = dict(
|
||||
type='SimCCLabel',
|
||||
input_size=(192, 256),
|
||||
sigma=(4.9, 5.66),
|
||||
simcc_split_ratio=2.0,
|
||||
normalize=False,
|
||||
use_dark=False)
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type='TopdownPoseEstimator',
|
||||
data_preprocessor=dict(
|
||||
type='PoseDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True),
|
||||
backbone=dict(
|
||||
_scope_='mmdet',
|
||||
type='CSPNeXt',
|
||||
arch='P5',
|
||||
expand_ratio=0.5,
|
||||
deepen_factor=0.167,
|
||||
widen_factor=0.375,
|
||||
out_indices=(4, ),
|
||||
channel_attention=True,
|
||||
norm_cfg=dict(type='SyncBN'),
|
||||
act_cfg=dict(type='SiLU'),
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
prefix='backbone.',
|
||||
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
|
||||
'rtmpose/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth'
|
||||
)),
|
||||
head=dict(
|
||||
type='RTMCCHead',
|
||||
in_channels=384,
|
||||
out_channels=133,
|
||||
input_size=codec['input_size'],
|
||||
in_featuremap_size=(6, 8),
|
||||
simcc_split_ratio=codec['simcc_split_ratio'],
|
||||
final_layer_kernel_size=7,
|
||||
gau_cfg=dict(
|
||||
hidden_dims=256,
|
||||
s=128,
|
||||
expansion_factor=2,
|
||||
dropout_rate=0.,
|
||||
drop_path=0.,
|
||||
act_fn='SiLU',
|
||||
use_rel_bias=False,
|
||||
pos_enc=False),
|
||||
loss=dict(
|
||||
type='KLDiscretLoss',
|
||||
use_target_weight=True,
|
||||
beta=10.,
|
||||
label_softmax=True),
|
||||
decoder=codec),
|
||||
test_cfg=dict(flip_test=True, ))
|
||||
|
||||
# base dataset settings
|
||||
dataset_type = 'CocoWholeBodyDataset'
|
||||
data_mode = 'topdown'
|
||||
data_root = 'data/'
|
||||
|
||||
backend_args = dict(backend='local')
|
||||
# backend_args = dict(
|
||||
# backend='petrel',
|
||||
# path_mapping=dict({
|
||||
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
|
||||
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
|
||||
# }))
|
||||
|
||||
# pipelines
|
||||
train_pipeline = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='RandomFlip', direction='horizontal'),
|
||||
dict(type='RandomHalfBody'),
|
||||
dict(
|
||||
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(
|
||||
type='Albumentation',
|
||||
transforms=[
|
||||
dict(type='Blur', p=0.1),
|
||||
dict(type='MedianBlur', p=0.1),
|
||||
dict(
|
||||
type='CoarseDropout',
|
||||
max_holes=1,
|
||||
max_height=0.4,
|
||||
max_width=0.4,
|
||||
min_holes=1,
|
||||
min_height=0.2,
|
||||
min_width=0.2,
|
||||
p=1.0),
|
||||
]),
|
||||
dict(type='GenerateTarget', encoder=codec),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
val_pipeline = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
|
||||
train_pipeline_stage2 = [
|
||||
dict(type='LoadImage', backend_args=backend_args),
|
||||
dict(type='GetBBoxCenterScale'),
|
||||
dict(type='RandomFlip', direction='horizontal'),
|
||||
dict(type='RandomHalfBody'),
|
||||
dict(
|
||||
type='RandomBBoxTransform',
|
||||
shift_factor=0.,
|
||||
scale_factor=[0.75, 1.25],
|
||||
rotate_factor=60),
|
||||
dict(type='TopdownAffine', input_size=codec['input_size']),
|
||||
dict(type='mmdet.YOLOXHSVRandomAug'),
|
||||
dict(
|
||||
type='Albumentation',
|
||||
transforms=[
|
||||
dict(type='Blur', p=0.1),
|
||||
dict(type='MedianBlur', p=0.1),
|
||||
dict(
|
||||
type='CoarseDropout',
|
||||
max_holes=1,
|
||||
max_height=0.4,
|
||||
max_width=0.4,
|
||||
min_holes=1,
|
||||
min_height=0.2,
|
||||
min_width=0.2,
|
||||
p=0.5),
|
||||
]),
|
||||
dict(type='GenerateTarget', encoder=codec),
|
||||
dict(type='PackPoseInputs')
|
||||
]
|
||||
|
||||
datasets = []
|
||||
dataset_coco=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
|
||||
data_prefix=dict(img='coco/train2017/'),
|
||||
pipeline=[],
|
||||
)
|
||||
datasets.append(dataset_coco)
|
||||
|
||||
scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class',
|
||||
'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow',
|
||||
'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference']
|
||||
|
||||
for i in range(len(scene)):
|
||||
datasets.append(
|
||||
dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json',
|
||||
data_prefix=dict(img='UBody/images/'+scene[i]+'/'),
|
||||
pipeline=[],
|
||||
)
|
||||
)
|
||||
|
||||
# data loaders
|
||||
train_dataloader = dict(
|
||||
batch_size=64,
|
||||
num_workers=10,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='CombinedDataset',
|
||||
metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
|
||||
datasets=datasets,
|
||||
pipeline=train_pipeline,
|
||||
test_mode=False,
|
||||
))
|
||||
val_dataloader = dict(
|
||||
batch_size=32,
|
||||
num_workers=10,
|
||||
persistent_workers=True,
|
||||
drop_last=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_mode=data_mode,
|
||||
ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
|
||||
bbox_file=f'{data_root}coco/person_detection_results/'
|
||||
'COCO_val2017_detections_AP_H_56_person.json',
|
||||
data_prefix=dict(img='coco/val2017/'),
|
||||
test_mode=True,
|
||||
pipeline=val_pipeline,
|
||||
))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
# hooks
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0002,
|
||||
update_buffers=True,
|
||||
priority=49),
|
||||
dict(
|
||||
type='mmdet.PipelineSwitchHook',
|
||||
switch_epoch=max_epochs - stage2_num_epochs,
|
||||
switch_pipeline=train_pipeline_stage2)
|
||||
]
|
||||
|
||||
# evaluators
|
||||
val_evaluator = dict(
|
||||
type='CocoWholeBodyMetric',
|
||||
ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json')
|
||||
test_evaluator = val_evaluator
|
||||
|
|
@ -1,244 +0,0 @@
|
|||
img_scale = (640, 640) # width, height
|
||||
|
||||
# model settings
|
||||
model = dict(
|
||||
type='YOLOX',
|
||||
data_preprocessor=dict(
|
||||
type='DetDataPreprocessor',
|
||||
pad_size_divisor=32,
|
||||
batch_augments=[
|
||||
dict(
|
||||
type='BatchSyncRandomResize',
|
||||
random_size_range=(480, 800),
|
||||
size_divisor=32,
|
||||
interval=10)
|
||||
]),
|
||||
backbone=dict(
|
||||
type='CSPDarknet',
|
||||
deepen_factor=1.0,
|
||||
widen_factor=1.0,
|
||||
out_indices=(2, 3, 4),
|
||||
use_depthwise=False,
|
||||
spp_kernal_sizes=(5, 9, 13),
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
act_cfg=dict(type='Swish'),
|
||||
),
|
||||
neck=dict(
|
||||
type='YOLOXPAFPN',
|
||||
in_channels=[256, 512, 1024],
|
||||
out_channels=256,
|
||||
num_csp_blocks=3,
|
||||
use_depthwise=False,
|
||||
upsample_cfg=dict(scale_factor=2, mode='nearest'),
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
act_cfg=dict(type='Swish')),
|
||||
bbox_head=dict(
|
||||
type='YOLOXHead',
|
||||
num_classes=80,
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
stacked_convs=2,
|
||||
strides=(8, 16, 32),
|
||||
use_depthwise=False,
|
||||
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
||||
act_cfg=dict(type='Swish'),
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
reduction='sum',
|
||||
loss_weight=1.0),
|
||||
loss_bbox=dict(
|
||||
type='IoULoss',
|
||||
mode='square',
|
||||
eps=1e-16,
|
||||
reduction='sum',
|
||||
loss_weight=5.0),
|
||||
loss_obj=dict(
|
||||
type='CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
reduction='sum',
|
||||
loss_weight=1.0),
|
||||
loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
|
||||
train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
|
||||
# In order to align the source code, the threshold of the val phase is
|
||||
# 0.01, and the threshold of the test phase is 0.001.
|
||||
test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
|
||||
|
||||
# dataset settings
|
||||
data_root = 'data/coco/'
|
||||
dataset_type = 'CocoDataset'
|
||||
|
||||
# Example to use different file client
|
||||
# Method 1: simply set the data root and let the file I/O module
|
||||
# automatically infer from prefix (not support LMDB and Memcache yet)
|
||||
|
||||
# data_root = 's3://openmmlab/datasets/detection/coco/'
|
||||
|
||||
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
|
||||
# backend_args = dict(
|
||||
# backend='petrel',
|
||||
# path_mapping=dict({
|
||||
# './data/': 's3://openmmlab/datasets/detection/',
|
||||
# 'data/': 's3://openmmlab/datasets/detection/'
|
||||
# }))
|
||||
backend_args = None
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
|
||||
dict(
|
||||
type='RandomAffine',
|
||||
scaling_ratio_range=(0.1, 2),
|
||||
# img_scale is (width, height)
|
||||
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
|
||||
dict(
|
||||
type='MixUp',
|
||||
img_scale=img_scale,
|
||||
ratio_range=(0.8, 1.6),
|
||||
pad_val=114.0),
|
||||
dict(type='YOLOXHSVRandomAug'),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
# According to the official implementation, multi-scale
|
||||
# training is not considered here but in the
|
||||
# 'mmdet/models/detectors/yolox.py'.
|
||||
# Resize and Pad are for the last 15 epochs when Mosaic,
|
||||
# RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
dict(
|
||||
type='Pad',
|
||||
pad_to_square=True,
|
||||
# If the image is three-channel, the pad value needs
|
||||
# to be set separately for each channel.
|
||||
pad_val=dict(img=(114.0, 114.0, 114.0))),
|
||||
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
|
||||
dict(type='PackDetInputs')
|
||||
]
|
||||
|
||||
train_dataset = dict(
|
||||
# use MultiImageMixDataset wrapper to support mosaic and mixup
|
||||
type='MultiImageMixDataset',
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/instances_train2017.json',
|
||||
data_prefix=dict(img='train2017/'),
|
||||
pipeline=[
|
||||
dict(type='LoadImageFromFile', backend_args=backend_args),
|
||||
dict(type='LoadAnnotations', with_bbox=True)
|
||||
],
|
||||
filter_cfg=dict(filter_empty_gt=False, min_size=32),
|
||||
backend_args=backend_args),
|
||||
pipeline=train_pipeline)
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=backend_args),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
dict(
|
||||
type='Pad',
|
||||
pad_to_square=True,
|
||||
pad_val=dict(img=(114.0, 114.0, 114.0))),
|
||||
dict(type='LoadAnnotations', with_bbox=True),
|
||||
dict(
|
||||
type='PackDetInputs',
|
||||
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
|
||||
'scale_factor'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=train_dataset)
|
||||
val_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
drop_last=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
ann_file='annotations/instances_val2017.json',
|
||||
data_prefix=dict(img='val2017/'),
|
||||
test_mode=True,
|
||||
pipeline=test_pipeline,
|
||||
backend_args=backend_args))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(
|
||||
type='CocoMetric',
|
||||
ann_file=data_root + 'annotations/instances_val2017.json',
|
||||
metric='bbox',
|
||||
backend_args=backend_args)
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
# training settings
|
||||
max_epochs = 300
|
||||
num_last_epochs = 15
|
||||
interval = 10
|
||||
|
||||
train_cfg = dict(max_epochs=max_epochs, val_interval=interval)
|
||||
|
||||
# optimizer
|
||||
# default 8 gpu
|
||||
base_lr = 0.01
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(
|
||||
type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
|
||||
nesterov=True),
|
||||
paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
|
||||
|
||||
# learning rate
|
||||
param_scheduler = [
|
||||
dict(
|
||||
# use quadratic formula to warm up 5 epochs
|
||||
# and lr is updated by iteration
|
||||
type='mmdet.QuadraticWarmupLR',
|
||||
by_epoch=True,
|
||||
begin=0,
|
||||
end=5,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
# use cosine lr from 5 to 285 epoch
|
||||
type='CosineAnnealingLR',
|
||||
eta_min=base_lr * 0.05,
|
||||
begin=5,
|
||||
T_max=max_epochs - num_last_epochs,
|
||||
end=max_epochs - num_last_epochs,
|
||||
by_epoch=True,
|
||||
convert_to_iter_based=True),
|
||||
dict(
|
||||
# use fixed lr during last 15 epochs
|
||||
type='ConstantLR',
|
||||
by_epoch=True,
|
||||
factor=1,
|
||||
begin=max_epochs - num_last_epochs,
|
||||
end=max_epochs,
|
||||
)
|
||||
]
|
||||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
interval=interval,
|
||||
max_keep_ckpts=3 # only keep latest 3 checkpoints
|
||||
))
|
||||
|
||||
custom_hooks = [
|
||||
dict(
|
||||
type='YOLOXModeSwitchHook',
|
||||
num_last_epochs=num_last_epochs,
|
||||
priority=48),
|
||||
dict(type='SyncNormHook', priority=48),
|
||||
dict(
|
||||
type='EMAHook',
|
||||
ema_type='ExpMomentumEMA',
|
||||
momentum=0.0001,
|
||||
update_buffers=True,
|
||||
priority=49)
|
||||
]
|
||||
|
||||
# NOTE: `auto_scale_lr` is for automatically scaling LR,
|
||||
# USER SHOULD NOT CHANGE ITS VALUES.
|
||||
# base_batch_size = (8 GPUs) x (8 samples per GPU)
|
||||
auto_scale_lr = dict(base_batch_size=64)
|
||||
|
|
@ -1,307 +0,0 @@
|
|||
import math
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
|
||||
eps = 0.01
|
||||
|
||||
|
||||
def smart_resize(x, s):
|
||||
Ht, Wt = s
|
||||
if x.ndim == 2:
|
||||
Ho, Wo = x.shape
|
||||
Co = 1
|
||||
else:
|
||||
Ho, Wo, Co = x.shape
|
||||
if Co == 3 or Co == 1:
|
||||
k = float(Ht + Wt) / float(Ho + Wo)
|
||||
return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
|
||||
else:
|
||||
return np.stack([smart_resize(x[:, :, i], s) for i in range(Co)], axis=2)
|
||||
|
||||
|
||||
def smart_resize_k(x, fx, fy):
|
||||
if x.ndim == 2:
|
||||
Ho, Wo = x.shape
|
||||
Co = 1
|
||||
else:
|
||||
Ho, Wo, Co = x.shape
|
||||
Ht, Wt = Ho * fy, Wo * fx
|
||||
if Co == 3 or Co == 1:
|
||||
k = float(Ht + Wt) / float(Ho + Wo)
|
||||
return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
|
||||
else:
|
||||
return np.stack([smart_resize_k(x[:, :, i], fx, fy) for i in range(Co)], axis=2)
|
||||
|
||||
|
||||
def padRightDownCorner(img, stride, padValue):
|
||||
h = img.shape[0]
|
||||
w = img.shape[1]
|
||||
|
||||
pad = 4 * [None]
|
||||
pad[0] = 0 # up
|
||||
pad[1] = 0 # left
|
||||
pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
|
||||
pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
|
||||
|
||||
img_padded = img
|
||||
pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
|
||||
img_padded = np.concatenate((pad_up, img_padded), axis=0)
|
||||
pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
|
||||
img_padded = np.concatenate((pad_left, img_padded), axis=1)
|
||||
pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
|
||||
img_padded = np.concatenate((img_padded, pad_down), axis=0)
|
||||
pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
|
||||
img_padded = np.concatenate((img_padded, pad_right), axis=1)
|
||||
|
||||
return img_padded, pad
|
||||
|
||||
|
||||
def transfer(model, model_weights):
|
||||
transfered_model_weights = {}
|
||||
for weights_name in model.state_dict().keys():
|
||||
transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
|
||||
return transfered_model_weights
|
||||
|
||||
|
||||
def draw_bodypose(canvas, candidate, subset):
|
||||
H, W, _C = canvas.shape
|
||||
candidate = np.array(candidate)
|
||||
subset = np.array(subset)
|
||||
|
||||
stickwidth = 4
|
||||
|
||||
limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
|
||||
[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
|
||||
[1, 16], [16, 18], [3, 17], [6, 18]]
|
||||
|
||||
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
|
||||
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
|
||||
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
|
||||
|
||||
for i in range(17):
|
||||
for n in range(len(subset)):
|
||||
index = subset[n][np.array(limbSeq[i]) - 1]
|
||||
if -1 in index:
|
||||
continue
|
||||
Y = candidate[index.astype(int), 0] * float(W)
|
||||
X = candidate[index.astype(int), 1] * float(H)
|
||||
mX = np.mean(X)
|
||||
mY = np.mean(Y)
|
||||
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
|
||||
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
|
||||
polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
||||
cv2.fillConvexPoly(canvas, polygon, colors[i])
|
||||
|
||||
canvas = (canvas * 0.6).astype(np.uint8)
|
||||
|
||||
for i in range(18):
|
||||
for n in range(len(subset)):
|
||||
index = int(subset[n][i])
|
||||
if index == -1:
|
||||
continue
|
||||
x, y = candidate[index][0:2]
|
||||
x = int(x * W)
|
||||
y = int(y * H)
|
||||
cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
|
||||
|
||||
return canvas
|
||||
|
||||
|
||||
def draw_handpose(canvas, all_hand_peaks):
|
||||
import matplotlib as mpl
|
||||
|
||||
H, W, _C = canvas.shape
|
||||
|
||||
edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
|
||||
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
|
||||
|
||||
# (person_number*2, 21, 2)
|
||||
for i in range(len(all_hand_peaks)):
|
||||
peaks = all_hand_peaks[i]
|
||||
peaks = np.array(peaks)
|
||||
|
||||
for ie, e in enumerate(edges):
|
||||
|
||||
x1, y1 = peaks[e[0]]
|
||||
x2, y2 = peaks[e[1]]
|
||||
|
||||
x1 = int(x1 * W)
|
||||
y1 = int(y1 * H)
|
||||
x2 = int(x2 * W)
|
||||
y2 = int(y2 * H)
|
||||
if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
|
||||
cv2.line(canvas, (x1, y1), (x2, y2), mpl.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255, thickness=2)
|
||||
|
||||
for _, keyponit in enumerate(peaks):
|
||||
x, y = keyponit
|
||||
|
||||
x = int(x * W)
|
||||
y = int(y * H)
|
||||
if x > eps and y > eps:
|
||||
cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
|
||||
return canvas
|
||||
|
||||
|
||||
def draw_facepose(canvas, all_lmks):
|
||||
H, W, _C = canvas.shape
|
||||
for lmks in all_lmks:
|
||||
lmks = np.array(lmks)
|
||||
for lmk in lmks:
|
||||
x, y = lmk
|
||||
x = int(x * W)
|
||||
y = int(y * H)
|
||||
if x > eps and y > eps:
|
||||
cv2.circle(canvas, (x, y), 3, (255, 255, 255), thickness=-1)
|
||||
return canvas
|
||||
|
||||
|
||||
# detect hand according to body pose keypoints
|
||||
# please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
|
||||
def handDetect(candidate, subset, oriImg):
|
||||
# right hand: wrist 4, elbow 3, shoulder 2
|
||||
# left hand: wrist 7, elbow 6, shoulder 5
|
||||
ratioWristElbow = 0.33
|
||||
detect_result = []
|
||||
image_height, image_width = oriImg.shape[0:2]
|
||||
for person in subset.astype(int):
|
||||
# if any of three not detected
|
||||
has_left = np.sum(person[[5, 6, 7]] == -1) == 0
|
||||
has_right = np.sum(person[[2, 3, 4]] == -1) == 0
|
||||
if not (has_left or has_right):
|
||||
continue
|
||||
hands = []
|
||||
#left hand
|
||||
if has_left:
|
||||
left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
|
||||
x1, y1 = candidate[left_shoulder_index][:2]
|
||||
x2, y2 = candidate[left_elbow_index][:2]
|
||||
x3, y3 = candidate[left_wrist_index][:2]
|
||||
hands.append([x1, y1, x2, y2, x3, y3, True])
|
||||
# right hand
|
||||
if has_right:
|
||||
right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
|
||||
x1, y1 = candidate[right_shoulder_index][:2]
|
||||
x2, y2 = candidate[right_elbow_index][:2]
|
||||
x3, y3 = candidate[right_wrist_index][:2]
|
||||
hands.append([x1, y1, x2, y2, x3, y3, False])
|
||||
|
||||
for x1, y1, x2, y2, x3, y3, is_left in hands:
|
||||
# pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
|
||||
# handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
|
||||
# handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
|
||||
# const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
|
||||
# const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
|
||||
# handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
|
||||
x = x3 + ratioWristElbow * (x3 - x2)
|
||||
y = y3 + ratioWristElbow * (y3 - y2)
|
||||
distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
|
||||
distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
|
||||
width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
|
||||
# x-y refers to the center --> offset to topLeft point
|
||||
# handRectangle.x -= handRectangle.width / 2.f;
|
||||
# handRectangle.y -= handRectangle.height / 2.f;
|
||||
x -= width / 2
|
||||
y -= width / 2 # width = height
|
||||
# overflow the image
|
||||
if x < 0:
|
||||
x = 0
|
||||
if y < 0:
|
||||
y = 0
|
||||
width1 = width
|
||||
width2 = width
|
||||
if x + width > image_width:
|
||||
width1 = image_width - x
|
||||
if y + width > image_height:
|
||||
width2 = image_height - y
|
||||
width = min(width1, width2)
|
||||
# the max hand box value is 20 pixels
|
||||
if width >= 20:
|
||||
detect_result.append([int(x), int(y), int(width), is_left])
|
||||
|
||||
'''
|
||||
return value: [[x, y, w, True if left hand else False]].
|
||||
width=height since the network require squared input.
|
||||
x, y is the coordinate of top left
|
||||
'''
|
||||
return detect_result
|
||||
|
||||
|
||||
# Written by Lvmin
|
||||
def faceDetect(candidate, subset, oriImg):
|
||||
# left right eye ear 14 15 16 17
|
||||
detect_result = []
|
||||
image_height, image_width = oriImg.shape[0:2]
|
||||
for person in subset.astype(int):
|
||||
has_head = person[0] > -1
|
||||
if not has_head:
|
||||
continue
|
||||
|
||||
has_left_eye = person[14] > -1
|
||||
has_right_eye = person[15] > -1
|
||||
has_left_ear = person[16] > -1
|
||||
has_right_ear = person[17] > -1
|
||||
|
||||
if not (has_left_eye or has_right_eye or has_left_ear or has_right_ear):
|
||||
continue
|
||||
|
||||
head, left_eye, right_eye, left_ear, right_ear = person[[0, 14, 15, 16, 17]]
|
||||
|
||||
width = 0.0
|
||||
x0, y0 = candidate[head][:2]
|
||||
|
||||
if has_left_eye:
|
||||
x1, y1 = candidate[left_eye][:2]
|
||||
d = max(abs(x0 - x1), abs(y0 - y1))
|
||||
width = max(width, d * 3.0)
|
||||
|
||||
if has_right_eye:
|
||||
x1, y1 = candidate[right_eye][:2]
|
||||
d = max(abs(x0 - x1), abs(y0 - y1))
|
||||
width = max(width, d * 3.0)
|
||||
|
||||
if has_left_ear:
|
||||
x1, y1 = candidate[left_ear][:2]
|
||||
d = max(abs(x0 - x1), abs(y0 - y1))
|
||||
width = max(width, d * 1.5)
|
||||
|
||||
if has_right_ear:
|
||||
x1, y1 = candidate[right_ear][:2]
|
||||
d = max(abs(x0 - x1), abs(y0 - y1))
|
||||
width = max(width, d * 1.5)
|
||||
|
||||
x, y = x0, y0
|
||||
|
||||
x -= width
|
||||
y -= width
|
||||
|
||||
if x < 0:
|
||||
x = 0
|
||||
|
||||
if y < 0:
|
||||
y = 0
|
||||
|
||||
width1 = width * 2
|
||||
width2 = width * 2
|
||||
|
||||
if x + width > image_width:
|
||||
width1 = image_width - x
|
||||
|
||||
if y + width > image_height:
|
||||
width2 = image_height - y
|
||||
|
||||
width = min(width1, width2)
|
||||
|
||||
if width >= 20:
|
||||
detect_result.append([int(x), int(y), int(width)])
|
||||
|
||||
return detect_result
|
||||
|
||||
|
||||
# get max index of 2d array
|
||||
def npmax(array):
|
||||
arrayindex = array.argmax(1)
|
||||
arrayvalue = array.max(1)
|
||||
i = arrayvalue.argmax()
|
||||
j = arrayindex[i]
|
||||
return i, j
|
||||
|
|
@ -1,111 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os
|
||||
import numpy as np
|
||||
from modules.shared import log
|
||||
|
||||
mmok = True
|
||||
|
||||
try:
|
||||
import mmcv # pylint: disable=unused-import
|
||||
except ImportError as e:
|
||||
mmok = False
|
||||
log.error(f"Control processor DWPose: {e}")
|
||||
try:
|
||||
from mmpose.apis import inference_topdown
|
||||
from mmpose.apis import init_model as init_pose_estimator
|
||||
from mmpose.evaluation.functional import nms
|
||||
from mmpose.utils import adapt_mmdet_pipeline
|
||||
from mmpose.structures import merge_data_samples
|
||||
except ImportError as e:
|
||||
mmok = False
|
||||
log.error(f"Control processor DWPose: {e}")
|
||||
|
||||
try:
|
||||
from mmdet.apis import inference_detector, init_detector
|
||||
except ImportError as e:
|
||||
mmok = False
|
||||
log.error(f"Control processor DWPose: {e}")
|
||||
|
||||
def inference_detector(*args, **kwargs):
|
||||
return lambda *args, **kwargs: None
|
||||
|
||||
if not mmok:
|
||||
log.error('Control processor DWPose: OpenMMLab is not installed')
|
||||
|
||||
|
||||
class Wholebody:
|
||||
def __init__(self, det_config=None, det_ckpt=None, pose_config=None, pose_ckpt=None, device="cpu"):
|
||||
if not mmok:
|
||||
self.detector = lambda *args, **kwargs: None
|
||||
return None
|
||||
prefix = os.path.dirname(__file__)
|
||||
if det_config is None:
|
||||
det_config = "config/yolox_l_8xb8-300e_coco.py"
|
||||
if pose_config is None:
|
||||
pose_config = "config/dwpose-l_384x288.py"
|
||||
if not det_config.startswith('prefix'):
|
||||
det_config = os.path.join(prefix, det_config)
|
||||
if not pose_config.startswith('prefix'):
|
||||
pose_config = os.path.join(prefix, pose_config)
|
||||
if det_ckpt is None:
|
||||
det_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth'
|
||||
if pose_ckpt is None:
|
||||
pose_ckpt = "https://huggingface.co/wanghaofan/dw-ll_ucoco_384/resolve/main/dw-ll_ucoco_384.pth"
|
||||
# build detector
|
||||
self.detector = init_detector(det_config, det_ckpt, device=device)
|
||||
self.detector.cfg = adapt_mmdet_pipeline(self.detector.cfg)
|
||||
# build pose estimator
|
||||
self.pose_estimator = init_pose_estimator(
|
||||
pose_config,
|
||||
pose_ckpt,
|
||||
device=device)
|
||||
|
||||
def to(self, device):
|
||||
self.detector.to(device)
|
||||
self.pose_estimator.to(device)
|
||||
return self
|
||||
|
||||
def __call__(self, oriImg):
|
||||
if not mmok:
|
||||
return None, None
|
||||
# predict bbox
|
||||
det_result = inference_detector(self.detector, oriImg)
|
||||
pred_instance = det_result.pred_instances.cpu().numpy()
|
||||
bboxes = np.concatenate((pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
|
||||
bboxes = bboxes[np.logical_and(pred_instance.labels == 0, pred_instance.scores > 0.5)]
|
||||
# set NMS threshold
|
||||
bboxes = bboxes[nms(bboxes, 0.7), :4]
|
||||
# predict keypoints
|
||||
if len(bboxes) == 0:
|
||||
pose_results = inference_topdown(self.pose_estimator, oriImg)
|
||||
else:
|
||||
pose_results = inference_topdown(self.pose_estimator, oriImg, bboxes)
|
||||
preds = merge_data_samples(pose_results)
|
||||
preds = preds.pred_instances
|
||||
# preds = pose_results[0].pred_instances
|
||||
keypoints = preds.get('transformed_keypoints', preds.keypoints)
|
||||
if 'keypoint_scores' in preds:
|
||||
scores = preds.keypoint_scores
|
||||
else:
|
||||
scores = np.ones(keypoints.shape[:-1])
|
||||
if 'keypoints_visible' in preds:
|
||||
visible = preds.keypoints_visible
|
||||
else:
|
||||
visible = np.ones(keypoints.shape[:-1])
|
||||
keypoints_info = np.concatenate(
|
||||
(keypoints, scores[..., None], visible[..., None]),
|
||||
axis=-1)
|
||||
# compute neck joint
|
||||
neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
|
||||
# neck score when visualizing pred
|
||||
neck[:, 2:4] = np.logical_and(
|
||||
keypoints_info[:, 5, 2:4] > 0.3,
|
||||
keypoints_info[:, 6, 2:4] > 0.3).astype(int)
|
||||
new_keypoints_info = np.insert(
|
||||
keypoints_info, 17, neck, axis=1)
|
||||
mmpose_idx = [17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3]
|
||||
openpose_idx = [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17]
|
||||
new_keypoints_info[:, openpose_idx] = new_keypoints_info[:, mmpose_idx]
|
||||
keypoints_info = new_keypoints_info
|
||||
keypoints, scores, visible = keypoints_info[..., :2], keypoints_info[..., 2], keypoints_info[..., 3]
|
||||
return keypoints, scores
|
||||
|
|
@ -60,7 +60,6 @@ def delay_load_config():
|
|||
from modules.control.proc.midas import MidasDetector
|
||||
from modules.control.proc.mlsd import MLSDdetector
|
||||
from modules.control.proc.openpose import OpenposeDetector
|
||||
from modules.control.proc.dwpose import DWposeDetector
|
||||
from modules.control.proc.segment_anything import SamDetector
|
||||
from modules.control.proc.zoe import ZoeDetector
|
||||
from modules.control.proc.marigold import MarigoldDetector
|
||||
|
|
@ -73,7 +72,6 @@ def delay_load_config():
|
|||
'None': {},
|
||||
# pose models
|
||||
'OpenPose': {'class': OpenposeDetector, 'checkpoint': True, 'params': {'include_body': True, 'include_hand': False, 'include_face': False}},
|
||||
'DWPose': {'class': DWposeDetector, 'checkpoint': False, 'model': 'Tiny', 'params': {'min_confidence': 0.3}},
|
||||
'MediaPipe Face': {'class': MediapipeFaceDetector, 'checkpoint': False, 'params': {'max_faces': 1, 'min_confidence': 0.5}},
|
||||
# outline models
|
||||
'Canny': {'class': CannyDetector, 'checkpoint': False, 'params': {'low_threshold': 100, 'high_threshold': 200}},
|
||||
|
|
|
|||
Loading…
Reference in New Issue