remove dwpose

Signed-off-by: Vladimir Mandic <mandic00@live.com>
pull/4663/head
Vladimir Mandic 2026-02-22 09:21:24 +01:00
parent a1d46b3ecd
commit 8ea9d428d5
10 changed files with 4 additions and 1848 deletions

View File

@ -41,6 +41,7 @@ TBD
- ui: **themes** add *CTD-NT64Light*, *CTD-NT64Medium* and *CTD-NT64Dark*, thanks @resonantsky
- ui: **gallery** add option to auto-refresh gallery, thanks @awsr
- **Internal**
- `python==3.13` full support
- `python==3.14` initial support
see [docs](https://vladmandic.github.io/sdnext-docs/Python/) for details
- remove hard-dependnecies:
@ -64,9 +65,11 @@ TBD
- use `threading` for deferable operatios
- use `threading` for io-independent parallel operations
- remove requirements: `clip`, `open-clip`
- remove `normalbae` pre-processor
- captioning part-2, thanks @CalamitousFelicitousness
- add new build of `insightface`, thanks @hameerabbasi
- **Obsolete**
- remove `normalbae` pre-processor
- remove `dwpose` pre-processor
- **Checks**
- switch to `pyproject.toml` for tool configs
- update `lint` rules, thanks @awsr

View File

@ -1,149 +0,0 @@
# Openpose
# Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
# 2nd Edited by https://github.com/Hzzone/pytorch-openpose
# 3rd Edited by ControlNet
# 4th Edited by ControlNet (added face and correct hands)
from typing import Type, Optional, Union, List
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import cv2
import numpy as np
from PIL import Image
from installer import installed, pip
from modules.logger import log
from modules.control.util import HWC3, resize_image
from .draw import draw_bodypose, draw_handpose, draw_facepose
checked_ok = False
busy = False
def _register_module(self, module: Type, module_name: Optional[Union[str, List[str]]] = None, force: bool = False) -> None:
if not callable(module):
raise TypeError(f'module must be Callable, but got {type(module)}')
if module_name is None:
module_name = module.__name__
if isinstance(module_name, str):
module_name = [module_name]
for name in module_name:
if not force and name in self._module_dict: # pylint: disable=protected-access
pass # patch for 'Adafactor is already registered in optimizer at torch.optim'
self._module_dict[name] = module # pylint: disable=protected-access
def check_dependencies():
global checked_ok, busy # pylint: disable=global-statement
busy = True
debug = log.trace if os.environ.get('SD_DWPOSE_DEBUG', None) is not None else lambda *args, **kwargs: None
# pip install --upgrade --no-deps --force-reinstall termcolor xtcocotools terminaltables pycocotools munkres shapely openmim==0.3.9 mmengine==0.10.5 mmcv==2.1.0 mmpose==1.3.2 mmdet==3.3.0
packages = [
'termcolor',
'xtcocotools',
'terminaltables',
'pycocotools',
'munkres',
'shapely',
'openmim==0.3.9',
'mmengine==0.10.5',
'mmcv==2.1.0',
'mmpose==1.3.2',
'mmdet==3.3.0',
]
status = [installed(p, quiet=True) for p in packages]
debug(f'DWPose required={packages} status={status}')
if not all(status):
log.info(f'Installing dependencies: for=dwpose packages={packages}')
cmd = 'install --upgrade --no-deps --force-reinstall '
pkgs = ' '.join(packages)
pip(cmd + pkgs, ignore=False, quiet=True, uv=False)
try:
import mmcv # pylint: disable=unused-import
import mmengine # pylint: disable=unused-import
from mmengine.registry import Registry
Registry._register_module = _register_module # pylint: disable=protected-access
import mmpose # pylint: disable=unused-import
import mmdet # pylint: disable=unused-import
debug('DWPose import ok')
checked_ok = True
except Exception as e:
log.error(f'DWPose: {e}')
# from modules import errors
# errors.display(e, 'DWPose')
busy = False
return checked_ok
def draw_pose(pose, H, W):
bodies = pose['bodies']
faces = pose['faces']
hands = pose['hands']
candidate = bodies['candidate']
subset = bodies['subset']
canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
canvas = draw_bodypose(canvas, candidate, subset)
canvas = draw_handpose(canvas, hands)
canvas = draw_facepose(canvas, faces)
return canvas
class DWposeDetector:
def __init__(self, det_config=None, det_ckpt=None, pose_config=None, pose_ckpt=None, device="cpu"):
self.pose_estimation = None
if not checked_ok:
if not check_dependencies():
return
Wholebody = None
try:
from .wholebody import Wholebody
except Exception as e:
log.error(f'DWPose: {e}')
if Wholebody is not None:
self.pose_estimation = Wholebody(det_config, det_ckpt, pose_config, pose_ckpt, device)
def to(self, device):
self.pose_estimation.to(device)
return self
def __call__(self, input_image, detect_resolution=512, image_resolution=512, output_type="pil", min_confidence=0.3, **kwargs):
if self.pose_estimation is None:
log.error("DWPose: not loaded")
return None
input_image = cv2.cvtColor(np.array(input_image, dtype=np.uint8), cv2.COLOR_RGB2BGR)
input_image = HWC3(input_image)
input_image = resize_image(input_image, detect_resolution)
H, W, _C = input_image.shape
candidate, subset = self.pose_estimation(input_image)
if candidate is None:
return Image.fromarray(input_image)
nums, _keys, locs = candidate.shape
candidate[..., 0] /= float(W)
candidate[..., 1] /= float(H)
body = candidate[:,:18].copy()
body = body.reshape(nums*18, locs)
score = subset[:,:18]
for i in range(len(score)):
for j in range(len(score[i])):
if score[i][j] > min_confidence:
score[i][j] = int(18*i+j)
else:
score[i][j] = -1
un_visible = subset < min_confidence
candidate[un_visible] = -1
_foot = candidate[:,18:24]
faces = candidate[:,24:92]
hands = candidate[:,92:113]
hands = np.vstack([hands, candidate[:,113:]])
bodies = dict(candidate=body, subset=score)
pose = dict(bodies=bodies, hands=hands, faces=faces)
detected_map = draw_pose(pose, H, W)
detected_map = HWC3(detected_map)
img = resize_image(input_image, image_resolution)
H, W, _C = img.shape
detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
if output_type == "pil":
detected_map = Image.fromarray(detected_map)
return detected_map

View File

@ -1,257 +0,0 @@
# runtime
max_epochs = 270
stage2_num_epochs = 30
base_lr = 4e-3
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
randomness = dict(seed=21)
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# learning rate
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
by_epoch=False,
begin=0,
end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
# codec settings
codec = dict(
type='SimCCLabel',
input_size=(288, 384),
sigma=(6., 6.93),
simcc_split_ratio=2.0,
normalize=False,
use_dark=False)
# model settings
model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
_scope_='mmdet',
type='CSPNeXt',
arch='P5',
expand_ratio=0.5,
deepen_factor=1.,
widen_factor=1.,
out_indices=(4, ),
channel_attention=True,
norm_cfg=dict(type='SyncBN'),
act_cfg=dict(type='SiLU'),
init_cfg=dict(
type='Pretrained',
prefix='backbone.',
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'
)),
head=dict(
type='RTMCCHead',
in_channels=1024,
out_channels=133,
input_size=codec['input_size'],
in_featuremap_size=(9, 12),
simcc_split_ratio=codec['simcc_split_ratio'],
final_layer_kernel_size=7,
gau_cfg=dict(
hidden_dims=256,
s=128,
expansion_factor=2,
dropout_rate=0.,
drop_path=0.,
act_fn='SiLU',
use_rel_bias=False,
pos_enc=False),
loss=dict(
type='KLDiscretLoss',
use_target_weight=True,
beta=10.,
label_softmax=True),
decoder=codec),
test_cfg=dict(flip_test=True, ))
# base dataset settings
dataset_type = 'CocoWholeBodyDataset'
data_mode = 'topdown'
data_root = '/data/'
backend_args = dict(backend='local')
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
# }))
# pipelines
train_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=1.0),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
val_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
]
train_pipeline_stage2 = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform',
shift_factor=0.,
scale_factor=[0.75, 1.25],
rotate_factor=60),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=0.5),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
datasets = []
dataset_coco=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
data_prefix=dict(img='coco/train2017/'),
pipeline=[],
)
datasets.append(dataset_coco)
scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class',
'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow',
'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference']
for i in range(len(scene)):
datasets.append(
dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json',
data_prefix=dict(img='UBody/images/'+scene[i]+'/'),
pipeline=[],
)
)
# data loaders
train_dataloader = dict(
batch_size=32,
num_workers=10,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='CombinedDataset',
metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
datasets=datasets,
pipeline=train_pipeline,
test_mode=False,
))
val_dataloader = dict(
batch_size=32,
num_workers=10,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
bbox_file=f'{data_root}coco/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
data_prefix=dict(img='coco/val2017/'),
test_mode=True,
pipeline=val_pipeline,
))
test_dataloader = val_dataloader
# hooks
default_hooks = dict(
checkpoint=dict(
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - stage2_num_epochs,
switch_pipeline=train_pipeline_stage2)
]
# evaluators
val_evaluator = dict(
type='CocoWholeBodyMetric',
ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json')
test_evaluator = val_evaluator

View File

@ -1,259 +0,0 @@
# _base_ = ['../../../_base_/default_runtime.py']
# runtime
max_epochs = 270
stage2_num_epochs = 30
base_lr = 4e-3
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
randomness = dict(seed=21)
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# learning rate
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
by_epoch=False,
begin=0,
end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
# codec settings
codec = dict(
type='SimCCLabel',
input_size=(288, 384),
sigma=(6., 6.93),
simcc_split_ratio=2.0,
normalize=False,
use_dark=False)
# model settings
model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
_scope_='mmdet',
type='CSPNeXt',
arch='P5',
expand_ratio=0.5,
deepen_factor=1.,
widen_factor=1.,
out_indices=(4, ),
channel_attention=True,
norm_cfg=dict(type='SyncBN'),
act_cfg=dict(type='SiLU'),
init_cfg=dict(
type='Pretrained',
prefix='backbone.',
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
'rtmpose/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth'
)),
head=dict(
type='RTMCCHead',
in_channels=1024,
out_channels=133,
input_size=codec['input_size'],
in_featuremap_size=(9, 12),
simcc_split_ratio=codec['simcc_split_ratio'],
final_layer_kernel_size=7,
gau_cfg=dict(
hidden_dims=256,
s=128,
expansion_factor=2,
dropout_rate=0.,
drop_path=0.,
act_fn='SiLU',
use_rel_bias=False,
pos_enc=False),
loss=dict(
type='KLDiscretLoss',
use_target_weight=True,
beta=10.,
label_softmax=True),
decoder=codec),
test_cfg=dict(flip_test=True, ))
# base dataset settings
dataset_type = 'CocoWholeBodyDataset'
data_mode = 'topdown'
data_root = 'data/'
backend_args = dict(backend='local')
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
# }))
# pipelines
train_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=1.0),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
val_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
]
train_pipeline_stage2 = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform',
shift_factor=0.,
scale_factor=[0.75, 1.25],
rotate_factor=60),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=0.5),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
datasets = []
dataset_coco=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
data_prefix=dict(img='coco/train2017/'),
pipeline=[],
)
datasets.append(dataset_coco)
scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class',
'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow',
'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference']
for i in range(len(scene)):
datasets.append(
dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json',
data_prefix=dict(img='UBody/images/'+scene[i]+'/'),
pipeline=[],
)
)
# data loaders
train_dataloader = dict(
batch_size=32,
num_workers=10,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='CombinedDataset',
metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
datasets=datasets,
pipeline=train_pipeline,
test_mode=False,
))
val_dataloader = dict(
batch_size=32,
num_workers=10,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
bbox_file=f'{data_root}coco/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
data_prefix=dict(img='coco/val2017/'),
test_mode=True,
pipeline=val_pipeline,
))
test_dataloader = val_dataloader
# hooks
default_hooks = dict(
checkpoint=dict(
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - stage2_num_epochs,
switch_pipeline=train_pipeline_stage2)
]
# evaluators
val_evaluator = dict(
type='CocoWholeBodyMetric',
ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json')
test_evaluator = val_evaluator

View File

@ -1,259 +0,0 @@
# _base_ = ['../../../_base_/default_runtime.py']
# runtime
max_epochs = 270
stage2_num_epochs = 30
base_lr = 4e-3
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
randomness = dict(seed=21)
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# learning rate
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
by_epoch=False,
begin=0,
end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
# codec settings
codec = dict(
type='SimCCLabel',
input_size=(192, 256),
sigma=(4.9, 5.66),
simcc_split_ratio=2.0,
normalize=False,
use_dark=False)
# model settings
model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
_scope_='mmdet',
type='CSPNeXt',
arch='P5',
expand_ratio=0.5,
deepen_factor=0.67,
widen_factor=0.75,
out_indices=(4, ),
channel_attention=True,
norm_cfg=dict(type='SyncBN'),
act_cfg=dict(type='SiLU'),
init_cfg=dict(
type='Pretrained',
prefix='backbone.',
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth'
)),
head=dict(
type='RTMCCHead',
in_channels=768,
out_channels=133,
input_size=codec['input_size'],
in_featuremap_size=(6, 8),
simcc_split_ratio=codec['simcc_split_ratio'],
final_layer_kernel_size=7,
gau_cfg=dict(
hidden_dims=256,
s=128,
expansion_factor=2,
dropout_rate=0.,
drop_path=0.,
act_fn='SiLU',
use_rel_bias=False,
pos_enc=False),
loss=dict(
type='KLDiscretLoss',
use_target_weight=True,
beta=10.,
label_softmax=True),
decoder=codec),
test_cfg=dict(flip_test=True, ))
# base dataset settings
dataset_type = 'CocoWholeBodyDataset'
data_mode = 'topdown'
data_root = 'data/'
backend_args = dict(backend='local')
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
# }))
# pipelines
train_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=1.0),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
val_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
]
train_pipeline_stage2 = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform',
shift_factor=0.,
scale_factor=[0.75, 1.25],
rotate_factor=60),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=0.5),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
datasets = []
dataset_coco=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
data_prefix=dict(img='coco/train2017/'),
pipeline=[],
)
datasets.append(dataset_coco)
scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class',
'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow',
'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference']
for i in range(len(scene)):
datasets.append(
dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json',
data_prefix=dict(img='UBody/images/'+scene[i]+'/'),
pipeline=[],
)
)
# data loaders
train_dataloader = dict(
batch_size=64,
num_workers=10,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='CombinedDataset',
metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
datasets=datasets,
pipeline=train_pipeline,
test_mode=False,
))
val_dataloader = dict(
batch_size=32,
num_workers=10,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
bbox_file=f'{data_root}coco/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
data_prefix=dict(img='coco/val2017/'),
test_mode=True,
pipeline=val_pipeline,
))
test_dataloader = val_dataloader
# hooks
default_hooks = dict(
checkpoint=dict(
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - stage2_num_epochs,
switch_pipeline=train_pipeline_stage2)
]
# evaluators
val_evaluator = dict(
type='CocoWholeBodyMetric',
ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json')
test_evaluator = val_evaluator

View File

@ -1,259 +0,0 @@
# _base_ = ['../../../_base_/default_runtime.py']
# runtime
max_epochs = 270
stage2_num_epochs = 30
base_lr = 4e-3
train_cfg = dict(max_epochs=max_epochs, val_interval=10)
randomness = dict(seed=21)
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
paramwise_cfg=dict(
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
# learning rate
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0e-5,
by_epoch=False,
begin=0,
end=1000),
dict(
# use cosine lr from 150 to 300 epoch
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=max_epochs // 2,
end=max_epochs,
T_max=max_epochs // 2,
by_epoch=True,
convert_to_iter_based=True),
]
# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=512)
# codec settings
codec = dict(
type='SimCCLabel',
input_size=(192, 256),
sigma=(4.9, 5.66),
simcc_split_ratio=2.0,
normalize=False,
use_dark=False)
# model settings
model = dict(
type='TopdownPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
_scope_='mmdet',
type='CSPNeXt',
arch='P5',
expand_ratio=0.5,
deepen_factor=0.167,
widen_factor=0.375,
out_indices=(4, ),
channel_attention=True,
norm_cfg=dict(type='SyncBN'),
act_cfg=dict(type='SiLU'),
init_cfg=dict(
type='Pretrained',
prefix='backbone.',
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
'rtmpose/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth'
)),
head=dict(
type='RTMCCHead',
in_channels=384,
out_channels=133,
input_size=codec['input_size'],
in_featuremap_size=(6, 8),
simcc_split_ratio=codec['simcc_split_ratio'],
final_layer_kernel_size=7,
gau_cfg=dict(
hidden_dims=256,
s=128,
expansion_factor=2,
dropout_rate=0.,
drop_path=0.,
act_fn='SiLU',
use_rel_bias=False,
pos_enc=False),
loss=dict(
type='KLDiscretLoss',
use_target_weight=True,
beta=10.,
label_softmax=True),
decoder=codec),
test_cfg=dict(flip_test=True, ))
# base dataset settings
dataset_type = 'CocoWholeBodyDataset'
data_mode = 'topdown'
data_root = 'data/'
backend_args = dict(backend='local')
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
# }))
# pipelines
train_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=1.0),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
val_pipeline = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='PackPoseInputs')
]
train_pipeline_stage2 = [
dict(type='LoadImage', backend_args=backend_args),
dict(type='GetBBoxCenterScale'),
dict(type='RandomFlip', direction='horizontal'),
dict(type='RandomHalfBody'),
dict(
type='RandomBBoxTransform',
shift_factor=0.,
scale_factor=[0.75, 1.25],
rotate_factor=60),
dict(type='TopdownAffine', input_size=codec['input_size']),
dict(type='mmdet.YOLOXHSVRandomAug'),
dict(
type='Albumentation',
transforms=[
dict(type='Blur', p=0.1),
dict(type='MedianBlur', p=0.1),
dict(
type='CoarseDropout',
max_holes=1,
max_height=0.4,
max_width=0.4,
min_holes=1,
min_height=0.2,
min_width=0.2,
p=0.5),
]),
dict(type='GenerateTarget', encoder=codec),
dict(type='PackPoseInputs')
]
datasets = []
dataset_coco=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
data_prefix=dict(img='coco/train2017/'),
pipeline=[],
)
datasets.append(dataset_coco)
scene = ['Magic_show', 'Entertainment', 'ConductMusic', 'Online_class',
'TalkShow', 'Speech', 'Fitness', 'Interview', 'Olympic', 'TVShow',
'Singing', 'SignLanguage', 'Movie', 'LiveVlog', 'VideoConference']
for i in range(len(scene)):
datasets.append(
dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='UBody/annotations/'+scene[i]+'/keypoint_annotation.json',
data_prefix=dict(img='UBody/images/'+scene[i]+'/'),
pipeline=[],
)
)
# data loaders
train_dataloader = dict(
batch_size=64,
num_workers=10,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='CombinedDataset',
metainfo=dict(from_file='configs/_base_/datasets/coco_wholebody.py'),
datasets=datasets,
pipeline=train_pipeline,
test_mode=False,
))
val_dataloader = dict(
batch_size=32,
num_workers=10,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
bbox_file=f'{data_root}coco/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json',
data_prefix=dict(img='coco/val2017/'),
test_mode=True,
pipeline=val_pipeline,
))
test_dataloader = val_dataloader
# hooks
default_hooks = dict(
checkpoint=dict(
save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0002,
update_buffers=True,
priority=49),
dict(
type='mmdet.PipelineSwitchHook',
switch_epoch=max_epochs - stage2_num_epochs,
switch_pipeline=train_pipeline_stage2)
]
# evaluators
val_evaluator = dict(
type='CocoWholeBodyMetric',
ann_file=data_root + 'coco/annotations/coco_wholebody_val_v1.0.json')
test_evaluator = val_evaluator

View File

@ -1,244 +0,0 @@
img_scale = (640, 640) # width, height
# model settings
model = dict(
type='YOLOX',
data_preprocessor=dict(
type='DetDataPreprocessor',
pad_size_divisor=32,
batch_augments=[
dict(
type='BatchSyncRandomResize',
random_size_range=(480, 800),
size_divisor=32,
interval=10)
]),
backbone=dict(
type='CSPDarknet',
deepen_factor=1.0,
widen_factor=1.0,
out_indices=(2, 3, 4),
use_depthwise=False,
spp_kernal_sizes=(5, 9, 13),
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
act_cfg=dict(type='Swish'),
),
neck=dict(
type='YOLOXPAFPN',
in_channels=[256, 512, 1024],
out_channels=256,
num_csp_blocks=3,
use_depthwise=False,
upsample_cfg=dict(scale_factor=2, mode='nearest'),
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
act_cfg=dict(type='Swish')),
bbox_head=dict(
type='YOLOXHead',
num_classes=80,
in_channels=256,
feat_channels=256,
stacked_convs=2,
strides=(8, 16, 32),
use_depthwise=False,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
act_cfg=dict(type='Swish'),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
loss_bbox=dict(
type='IoULoss',
mode='square',
eps=1e-16,
reduction='sum',
loss_weight=5.0),
loss_obj=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
# In order to align the source code, the threshold of the val phase is
# 0.01, and the threshold of the test phase is 0.001.
test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))
# dataset settings
data_root = 'data/coco/'
dataset_type = 'CocoDataset'
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection/coco/'
# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection/',
# 'data/': 's3://openmmlab/datasets/detection/'
# }))
backend_args = None
train_pipeline = [
dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
dict(
type='RandomAffine',
scaling_ratio_range=(0.1, 2),
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
dict(
type='MixUp',
img_scale=img_scale,
ratio_range=(0.8, 1.6),
pad_val=114.0),
dict(type='YOLOXHSVRandomAug'),
dict(type='RandomFlip', prob=0.5),
# According to the official implementation, multi-scale
# training is not considered here but in the
# 'mmdet/models/detectors/yolox.py'.
# Resize and Pad are for the last 15 epochs when Mosaic,
# RandomAffine, and MixUp are closed by YOLOXModeSwitchHook.
dict(type='Resize', scale=img_scale, keep_ratio=True),
dict(
type='Pad',
pad_to_square=True,
# If the image is three-channel, the pad value needs
# to be set separately for each channel.
pad_val=dict(img=(114.0, 114.0, 114.0))),
dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),
dict(type='PackDetInputs')
]
train_dataset = dict(
# use MultiImageMixDataset wrapper to support mosaic and mixup
type='MultiImageMixDataset',
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotations/instances_train2017.json',
data_prefix=dict(img='train2017/'),
pipeline=[
dict(type='LoadImageFromFile', backend_args=backend_args),
dict(type='LoadAnnotations', with_bbox=True)
],
filter_cfg=dict(filter_empty_gt=False, min_size=32),
backend_args=backend_args),
pipeline=train_pipeline)
test_pipeline = [
dict(type='LoadImageFromFile', backend_args=backend_args),
dict(type='Resize', scale=img_scale, keep_ratio=True),
dict(
type='Pad',
pad_to_square=True,
pad_val=dict(img=(114.0, 114.0, 114.0))),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=train_dataset)
val_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='annotations/instances_val2017.json',
data_prefix=dict(img='val2017/'),
test_mode=True,
pipeline=test_pipeline,
backend_args=backend_args))
test_dataloader = val_dataloader
val_evaluator = dict(
type='CocoMetric',
ann_file=data_root + 'annotations/instances_val2017.json',
metric='bbox',
backend_args=backend_args)
test_evaluator = val_evaluator
# training settings
max_epochs = 300
num_last_epochs = 15
interval = 10
train_cfg = dict(max_epochs=max_epochs, val_interval=interval)
# optimizer
# default 8 gpu
base_lr = 0.01
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(
type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
nesterov=True),
paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
# learning rate
param_scheduler = [
dict(
# use quadratic formula to warm up 5 epochs
# and lr is updated by iteration
type='mmdet.QuadraticWarmupLR',
by_epoch=True,
begin=0,
end=5,
convert_to_iter_based=True),
dict(
# use cosine lr from 5 to 285 epoch
type='CosineAnnealingLR',
eta_min=base_lr * 0.05,
begin=5,
T_max=max_epochs - num_last_epochs,
end=max_epochs - num_last_epochs,
by_epoch=True,
convert_to_iter_based=True),
dict(
# use fixed lr during last 15 epochs
type='ConstantLR',
by_epoch=True,
factor=1,
begin=max_epochs - num_last_epochs,
end=max_epochs,
)
]
default_hooks = dict(
checkpoint=dict(
interval=interval,
max_keep_ckpts=3 # only keep latest 3 checkpoints
))
custom_hooks = [
dict(
type='YOLOXModeSwitchHook',
num_last_epochs=num_last_epochs,
priority=48),
dict(type='SyncNormHook', priority=48),
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0001,
update_buffers=True,
priority=49)
]
# NOTE: `auto_scale_lr` is for automatically scaling LR,
# USER SHOULD NOT CHANGE ITS VALUES.
# base_batch_size = (8 GPUs) x (8 samples per GPU)
auto_scale_lr = dict(base_batch_size=64)

View File

@ -1,307 +0,0 @@
import math
import numpy as np
import cv2
eps = 0.01
def smart_resize(x, s):
Ht, Wt = s
if x.ndim == 2:
Ho, Wo = x.shape
Co = 1
else:
Ho, Wo, Co = x.shape
if Co == 3 or Co == 1:
k = float(Ht + Wt) / float(Ho + Wo)
return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
else:
return np.stack([smart_resize(x[:, :, i], s) for i in range(Co)], axis=2)
def smart_resize_k(x, fx, fy):
if x.ndim == 2:
Ho, Wo = x.shape
Co = 1
else:
Ho, Wo, Co = x.shape
Ht, Wt = Ho * fy, Wo * fx
if Co == 3 or Co == 1:
k = float(Ht + Wt) / float(Ho + Wo)
return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
else:
return np.stack([smart_resize_k(x[:, :, i], fx, fy) for i in range(Co)], axis=2)
def padRightDownCorner(img, stride, padValue):
h = img.shape[0]
w = img.shape[1]
pad = 4 * [None]
pad[0] = 0 # up
pad[1] = 0 # left
pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
img_padded = img
pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
img_padded = np.concatenate((pad_up, img_padded), axis=0)
pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
img_padded = np.concatenate((pad_left, img_padded), axis=1)
pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
img_padded = np.concatenate((img_padded, pad_down), axis=0)
pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
img_padded = np.concatenate((img_padded, pad_right), axis=1)
return img_padded, pad
def transfer(model, model_weights):
transfered_model_weights = {}
for weights_name in model.state_dict().keys():
transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
return transfered_model_weights
def draw_bodypose(canvas, candidate, subset):
H, W, _C = canvas.shape
candidate = np.array(candidate)
subset = np.array(subset)
stickwidth = 4
limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
[1, 16], [16, 18], [3, 17], [6, 18]]
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
for i in range(17):
for n in range(len(subset)):
index = subset[n][np.array(limbSeq[i]) - 1]
if -1 in index:
continue
Y = candidate[index.astype(int), 0] * float(W)
X = candidate[index.astype(int), 1] * float(H)
mX = np.mean(X)
mY = np.mean(Y)
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
cv2.fillConvexPoly(canvas, polygon, colors[i])
canvas = (canvas * 0.6).astype(np.uint8)
for i in range(18):
for n in range(len(subset)):
index = int(subset[n][i])
if index == -1:
continue
x, y = candidate[index][0:2]
x = int(x * W)
y = int(y * H)
cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
return canvas
def draw_handpose(canvas, all_hand_peaks):
import matplotlib as mpl
H, W, _C = canvas.shape
edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
# (person_number*2, 21, 2)
for i in range(len(all_hand_peaks)):
peaks = all_hand_peaks[i]
peaks = np.array(peaks)
for ie, e in enumerate(edges):
x1, y1 = peaks[e[0]]
x2, y2 = peaks[e[1]]
x1 = int(x1 * W)
y1 = int(y1 * H)
x2 = int(x2 * W)
y2 = int(y2 * H)
if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
cv2.line(canvas, (x1, y1), (x2, y2), mpl.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255, thickness=2)
for _, keyponit in enumerate(peaks):
x, y = keyponit
x = int(x * W)
y = int(y * H)
if x > eps and y > eps:
cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
return canvas
def draw_facepose(canvas, all_lmks):
H, W, _C = canvas.shape
for lmks in all_lmks:
lmks = np.array(lmks)
for lmk in lmks:
x, y = lmk
x = int(x * W)
y = int(y * H)
if x > eps and y > eps:
cv2.circle(canvas, (x, y), 3, (255, 255, 255), thickness=-1)
return canvas
# detect hand according to body pose keypoints
# please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
def handDetect(candidate, subset, oriImg):
# right hand: wrist 4, elbow 3, shoulder 2
# left hand: wrist 7, elbow 6, shoulder 5
ratioWristElbow = 0.33
detect_result = []
image_height, image_width = oriImg.shape[0:2]
for person in subset.astype(int):
# if any of three not detected
has_left = np.sum(person[[5, 6, 7]] == -1) == 0
has_right = np.sum(person[[2, 3, 4]] == -1) == 0
if not (has_left or has_right):
continue
hands = []
#left hand
if has_left:
left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
x1, y1 = candidate[left_shoulder_index][:2]
x2, y2 = candidate[left_elbow_index][:2]
x3, y3 = candidate[left_wrist_index][:2]
hands.append([x1, y1, x2, y2, x3, y3, True])
# right hand
if has_right:
right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
x1, y1 = candidate[right_shoulder_index][:2]
x2, y2 = candidate[right_elbow_index][:2]
x3, y3 = candidate[right_wrist_index][:2]
hands.append([x1, y1, x2, y2, x3, y3, False])
for x1, y1, x2, y2, x3, y3, is_left in hands:
# pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
# handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
# handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
# const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
# const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
# handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
x = x3 + ratioWristElbow * (x3 - x2)
y = y3 + ratioWristElbow * (y3 - y2)
distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
# x-y refers to the center --> offset to topLeft point
# handRectangle.x -= handRectangle.width / 2.f;
# handRectangle.y -= handRectangle.height / 2.f;
x -= width / 2
y -= width / 2 # width = height
# overflow the image
if x < 0:
x = 0
if y < 0:
y = 0
width1 = width
width2 = width
if x + width > image_width:
width1 = image_width - x
if y + width > image_height:
width2 = image_height - y
width = min(width1, width2)
# the max hand box value is 20 pixels
if width >= 20:
detect_result.append([int(x), int(y), int(width), is_left])
'''
return value: [[x, y, w, True if left hand else False]].
width=height since the network require squared input.
x, y is the coordinate of top left
'''
return detect_result
# Written by Lvmin
def faceDetect(candidate, subset, oriImg):
# left right eye ear 14 15 16 17
detect_result = []
image_height, image_width = oriImg.shape[0:2]
for person in subset.astype(int):
has_head = person[0] > -1
if not has_head:
continue
has_left_eye = person[14] > -1
has_right_eye = person[15] > -1
has_left_ear = person[16] > -1
has_right_ear = person[17] > -1
if not (has_left_eye or has_right_eye or has_left_ear or has_right_ear):
continue
head, left_eye, right_eye, left_ear, right_ear = person[[0, 14, 15, 16, 17]]
width = 0.0
x0, y0 = candidate[head][:2]
if has_left_eye:
x1, y1 = candidate[left_eye][:2]
d = max(abs(x0 - x1), abs(y0 - y1))
width = max(width, d * 3.0)
if has_right_eye:
x1, y1 = candidate[right_eye][:2]
d = max(abs(x0 - x1), abs(y0 - y1))
width = max(width, d * 3.0)
if has_left_ear:
x1, y1 = candidate[left_ear][:2]
d = max(abs(x0 - x1), abs(y0 - y1))
width = max(width, d * 1.5)
if has_right_ear:
x1, y1 = candidate[right_ear][:2]
d = max(abs(x0 - x1), abs(y0 - y1))
width = max(width, d * 1.5)
x, y = x0, y0
x -= width
y -= width
if x < 0:
x = 0
if y < 0:
y = 0
width1 = width * 2
width2 = width * 2
if x + width > image_width:
width1 = image_width - x
if y + width > image_height:
width2 = image_height - y
width = min(width1, width2)
if width >= 20:
detect_result.append([int(x), int(y), int(width)])
return detect_result
# get max index of 2d array
def npmax(array):
arrayindex = array.argmax(1)
arrayvalue = array.max(1)
i = arrayvalue.argmax()
j = arrayindex[i]
return i, j

View File

@ -1,111 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import numpy as np
from modules.shared import log
mmok = True
try:
import mmcv # pylint: disable=unused-import
except ImportError as e:
mmok = False
log.error(f"Control processor DWPose: {e}")
try:
from mmpose.apis import inference_topdown
from mmpose.apis import init_model as init_pose_estimator
from mmpose.evaluation.functional import nms
from mmpose.utils import adapt_mmdet_pipeline
from mmpose.structures import merge_data_samples
except ImportError as e:
mmok = False
log.error(f"Control processor DWPose: {e}")
try:
from mmdet.apis import inference_detector, init_detector
except ImportError as e:
mmok = False
log.error(f"Control processor DWPose: {e}")
def inference_detector(*args, **kwargs):
return lambda *args, **kwargs: None
if not mmok:
log.error('Control processor DWPose: OpenMMLab is not installed')
class Wholebody:
def __init__(self, det_config=None, det_ckpt=None, pose_config=None, pose_ckpt=None, device="cpu"):
if not mmok:
self.detector = lambda *args, **kwargs: None
return None
prefix = os.path.dirname(__file__)
if det_config is None:
det_config = "config/yolox_l_8xb8-300e_coco.py"
if pose_config is None:
pose_config = "config/dwpose-l_384x288.py"
if not det_config.startswith('prefix'):
det_config = os.path.join(prefix, det_config)
if not pose_config.startswith('prefix'):
pose_config = os.path.join(prefix, pose_config)
if det_ckpt is None:
det_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth'
if pose_ckpt is None:
pose_ckpt = "https://huggingface.co/wanghaofan/dw-ll_ucoco_384/resolve/main/dw-ll_ucoco_384.pth"
# build detector
self.detector = init_detector(det_config, det_ckpt, device=device)
self.detector.cfg = adapt_mmdet_pipeline(self.detector.cfg)
# build pose estimator
self.pose_estimator = init_pose_estimator(
pose_config,
pose_ckpt,
device=device)
def to(self, device):
self.detector.to(device)
self.pose_estimator.to(device)
return self
def __call__(self, oriImg):
if not mmok:
return None, None
# predict bbox
det_result = inference_detector(self.detector, oriImg)
pred_instance = det_result.pred_instances.cpu().numpy()
bboxes = np.concatenate((pred_instance.bboxes, pred_instance.scores[:, None]), axis=1)
bboxes = bboxes[np.logical_and(pred_instance.labels == 0, pred_instance.scores > 0.5)]
# set NMS threshold
bboxes = bboxes[nms(bboxes, 0.7), :4]
# predict keypoints
if len(bboxes) == 0:
pose_results = inference_topdown(self.pose_estimator, oriImg)
else:
pose_results = inference_topdown(self.pose_estimator, oriImg, bboxes)
preds = merge_data_samples(pose_results)
preds = preds.pred_instances
# preds = pose_results[0].pred_instances
keypoints = preds.get('transformed_keypoints', preds.keypoints)
if 'keypoint_scores' in preds:
scores = preds.keypoint_scores
else:
scores = np.ones(keypoints.shape[:-1])
if 'keypoints_visible' in preds:
visible = preds.keypoints_visible
else:
visible = np.ones(keypoints.shape[:-1])
keypoints_info = np.concatenate(
(keypoints, scores[..., None], visible[..., None]),
axis=-1)
# compute neck joint
neck = np.mean(keypoints_info[:, [5, 6]], axis=1)
# neck score when visualizing pred
neck[:, 2:4] = np.logical_and(
keypoints_info[:, 5, 2:4] > 0.3,
keypoints_info[:, 6, 2:4] > 0.3).astype(int)
new_keypoints_info = np.insert(
keypoints_info, 17, neck, axis=1)
mmpose_idx = [17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3]
openpose_idx = [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17]
new_keypoints_info[:, openpose_idx] = new_keypoints_info[:, mmpose_idx]
keypoints_info = new_keypoints_info
keypoints, scores, visible = keypoints_info[..., :2], keypoints_info[..., 2], keypoints_info[..., 3]
return keypoints, scores

View File

@ -60,7 +60,6 @@ def delay_load_config():
from modules.control.proc.midas import MidasDetector
from modules.control.proc.mlsd import MLSDdetector
from modules.control.proc.openpose import OpenposeDetector
from modules.control.proc.dwpose import DWposeDetector
from modules.control.proc.segment_anything import SamDetector
from modules.control.proc.zoe import ZoeDetector
from modules.control.proc.marigold import MarigoldDetector
@ -73,7 +72,6 @@ def delay_load_config():
'None': {},
# pose models
'OpenPose': {'class': OpenposeDetector, 'checkpoint': True, 'params': {'include_body': True, 'include_hand': False, 'include_face': False}},
'DWPose': {'class': DWposeDetector, 'checkpoint': False, 'model': 'Tiny', 'params': {'min_confidence': 0.3}},
'MediaPipe Face': {'class': MediapipeFaceDetector, 'checkpoint': False, 'params': {'max_faces': 1, 'min_confidence': 0.5}},
# outline models
'Canny': {'class': CannyDetector, 'checkpoint': False, 'params': {'low_threshold': 100, 'high_threshold': 200}},