automatic/modules/control/proc/rtmlib_pose/__init__.py

import os
import numpy as np
from PIL import Image
from modules.logger import log


# ---------------------------------------------------------------------------
# COCO-WholeBody 133-keypoint layout
#   0-16:    body (17 COCO keypoints)
#   17-22:   feet (6 points)
#   23-90:   face (68 landmarks)
#   91-111:  left hand (21 points)
#   112-132: right hand (21 points)
# ---------------------------------------------------------------------------

BODY_SKELETON = [
    [15, 13], [13, 11], [16, 14], [14, 12], [11, 12], [5, 11], [6, 12],
    [5, 6], [5, 7], [6, 8], [7, 9], [8, 10], [1, 2], [0, 1], [0, 2],
    [1, 3], [2, 4], [3, 5], [4, 6],
]

BODY_COLORS = [
    (255, 0, 0), (255, 85, 0), (255, 170, 0), (255, 255, 0), (170, 255, 0),
    (85, 255, 0), (0, 255, 0), (0, 255, 85), (0, 255, 170), (0, 255, 255),
    (0, 170, 255), (0, 85, 255), (0, 0, 255), (85, 0, 255), (170, 0, 255),
    (255, 0, 255), (255, 0, 170), (255, 0, 85), (255, 0, 0),
]

FOOT_SKELETON = [
    [0, 1], [1, 2],   # left ankle -> big toe -> small toe
    [3, 4], [4, 5],   # right ankle -> big toe -> small toe
]
FOOT_OFFSET = 17

HAND_EDGES = [
    [0, 1], [1, 2], [2, 3], [3, 4],
    [0, 5], [5, 6], [6, 7], [7, 8],
    [0, 9], [9, 10], [10, 11], [11, 12],
    [0, 13], [13, 14], [14, 15], [15, 16],
    [0, 17], [17, 18], [18, 19], [19, 20],
]
LEFT_HAND_OFFSET = 91
RIGHT_HAND_OFFSET = 112
FACE_OFFSET = 23
FACE_COUNT = 68


def _hsv_to_rgb(h, s, v):
    """Convert HSV [0-1] to BGR tuple for cv2."""
    import colorsys
    r, g, b = colorsys.hsv_to_rgb(h, s, v)
    return (int(b * 255), int(g * 255), int(r * 255))


def draw_body(canvas, keypoints, scores, min_conf):
    import cv2
    n_kps = min(len(keypoints), 17)
    for i in range(n_kps):
        if scores[i] >= min_conf:
            x, y = int(keypoints[i][0]), int(keypoints[i][1])
            color = BODY_COLORS[i % len(BODY_COLORS)]
            cv2.circle(canvas, (x, y), 4, color, -1)
    for idx, (start, end) in enumerate(BODY_SKELETON):
        if start < n_kps and end < n_kps:
            if scores[start] >= min_conf and scores[end] >= min_conf:
                pt1 = (int(keypoints[start][0]), int(keypoints[start][1]))
                pt2 = (int(keypoints[end][0]), int(keypoints[end][1]))
                color = BODY_COLORS[idx % len(BODY_COLORS)]
                cv2.line(canvas, pt1, pt2, color, 2)


def draw_feet(canvas, keypoints, scores, min_conf):
    import cv2
    for start_local, end_local in FOOT_SKELETON:
        start = start_local + FOOT_OFFSET
        end = end_local + FOOT_OFFSET
        if start < len(keypoints) and end < len(keypoints):
            if scores[start] >= min_conf and scores[end] >= min_conf:
                pt1 = (int(keypoints[start][0]), int(keypoints[start][1]))
                pt2 = (int(keypoints[end][0]), int(keypoints[end][1]))
                cv2.line(canvas, pt1, pt2, (0, 255, 170), 2)
    for i in range(FOOT_OFFSET, min(FOOT_OFFSET + 6, len(keypoints))):
        if scores[i] >= min_conf:
            cv2.circle(canvas, (int(keypoints[i][0]), int(keypoints[i][1])), 3, (0, 255, 170), -1)


def draw_hand(canvas, keypoints, scores, offset, min_conf):
    import cv2
    n_edges = len(HAND_EDGES)
    for ie, (start_local, end_local) in enumerate(HAND_EDGES):
        start = start_local + offset
        end = end_local + offset
        if start < len(keypoints) and end < len(keypoints):
            if scores[start] >= min_conf and scores[end] >= min_conf:
                pt1 = (int(keypoints[start][0]), int(keypoints[start][1]))
                pt2 = (int(keypoints[end][0]), int(keypoints[end][1]))
                color = _hsv_to_rgb(ie / n_edges, 1.0, 1.0)
                cv2.line(canvas, pt1, pt2, color, 2)
    for i in range(offset, min(offset + 21, len(keypoints))):
        if scores[i] >= min_conf:
            cv2.circle(canvas, (int(keypoints[i][0]), int(keypoints[i][1])), 3, (0, 0, 255), -1)


def draw_hands(canvas, keypoints, scores, min_conf):
    draw_hand(canvas, keypoints, scores, LEFT_HAND_OFFSET, min_conf)
    draw_hand(canvas, keypoints, scores, RIGHT_HAND_OFFSET, min_conf)


def draw_face(canvas, keypoints, scores, min_conf):
    import cv2
    for i in range(FACE_OFFSET, min(FACE_OFFSET + FACE_COUNT, len(keypoints))):
        if scores[i] >= min_conf:
            cv2.circle(canvas, (int(keypoints[i][0]), int(keypoints[i][1])), 2, (255, 255, 255), -1)


def draw_skeleton(canvas, keypoints, scores, min_conf):
    """Draw a full pose skeleton for one person. Used by ViTPoseDetector."""
    is_wholebody = len(keypoints) >= 133
    draw_body(canvas, keypoints, scores, min_conf)
    if is_wholebody:
        draw_feet(canvas, keypoints, scores, min_conf)
        draw_hands(canvas, keypoints, scores, min_conf)
        draw_face(canvas, keypoints, scores, min_conf)
    return canvas


class RtmlibPoseDetector:
    def __init__(self, pose_model, mode, openpose=True):
        self.pose_model = pose_model
        self.mode = mode
        self.openpose = openpose

    @classmethod
    def from_pretrained(cls, pretrained_model_or_path="DWPose", cache_dir=None, local_files_only=False, **kwargs):
        from installer import install
        install('rtmlib', quiet=True)
        # rtmlib reads TORCH_HOME to locate its cache at <TORCH_HOME>/hub/checkpoints
        old_torch_home = os.environ.get('TORCH_HOME')
        if cache_dir:
            rtmlib_cache = os.path.join(cache_dir, 'rtmlib')
            os.makedirs(rtmlib_cache, exist_ok=True)
            os.environ['TORCH_HOME'] = rtmlib_cache
        try:
            import rtmlib
            mode = pretrained_model_or_path
            model_map = {
                'DWPose': ('RTMPose', {'to_openpose': True}),
                'RTMW-l': ('RTMW', {'to_openpose': True}),
                'RTMO-l': ('RTMO', {'to_openpose': True}),
            }
            if mode not in model_map:
                log.warning(f'RtmlibPose: unknown mode "{mode}", falling back to DWPose')
                mode = 'DWPose'
            model_name, model_kwargs = model_map[mode]
            if model_name == 'RTMPose':
                body = rtmlib.Body(mode='lightweight', backend='onnxruntime', device='cpu', **model_kwargs)
            elif model_name == 'RTMW':
                body = rtmlib.Wholebody(mode='lightweight', backend='onnxruntime', device='cpu', to_openpose=True)
            elif model_name == 'RTMO':
                body = rtmlib.Body(mode='balanced', backend='onnxruntime', device='cpu', **model_kwargs)
            else:
                body = rtmlib.Body(mode='lightweight', backend='onnxruntime', device='cpu')
        finally:
            if old_torch_home is not None:
                os.environ['TORCH_HOME'] = old_torch_home
            elif 'TORCH_HOME' in os.environ:
                del os.environ['TORCH_HOME']
        return cls(body, mode)

    def __call__(self, image, min_confidence=0.3, draw_body_pose=True, draw_hand_pose=True, draw_face_pose=True, output_type="pil", **kwargs):
        if isinstance(image, Image.Image):
            image = np.array(image)
        if image.ndim == 3 and image.shape[2] == 4:
            image = image[:, :, :3]
        h, w = image.shape[:2]
        keypoints, scores = self.pose_model(image)
        canvas = np.zeros((h, w, 3), dtype=np.uint8)
        if keypoints is not None and len(keypoints) > 0:
            import rtmlib
            canvas = rtmlib.draw_skeleton(canvas, keypoints, scores, openpose_skeleton=self.openpose, kpt_thr=min_confidence)
        if output_type == "pil":
            canvas = Image.fromarray(canvas)
        return canvas