zhengchun
/
PaddleX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
							# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import numpy as np
import math
import PIL
from PIL import Image, ImageDraw, ImageFont

from .keys import DetKeys as K
from ...base import BaseTransform
from ...base.predictor.io import ImageWriter, ImageReader
from ...base.predictor.transforms import image_functions as F
from ...base.predictor.transforms.image_common import _BaseResize, _check_image_size
from ....utils.fonts import PINGFANG_FONT_FILE_PATH
from ....utils import logging

__all__ = ["SaveDetResults", "PadStride", "DetResize", "PrintResult"]


def get_color_map_list(num_classes):
    """
    Args:
        num_classes (int): number of class
    Returns:
        color_map (list): RGB color list
    """
    color_map = num_classes * [0, 0, 0]
    for i in range(0, num_classes):
        j = 0
        lab = i
        while lab:
            color_map[i * 3] |= ((lab >> 0) & 1) << (7 - j)
            color_map[i * 3 + 1] |= ((lab >> 1) & 1) << (7 - j)
            color_map[i * 3 + 2] |= ((lab >> 2) & 1) << (7 - j)
            j += 1
            lab >>= 3
    color_map = [color_map[i : i + 3] for i in range(0, len(color_map), 3)]
    return color_map


def colormap(rgb=False):
    """
    Get colormap

    The code of this function is copied from https://github.com/facebookresearch/Detectron/blob/main/detectron/\
utils/colormap.py
    """
    color_list = np.array(
        [
            0xFF,
            0x00,
            0x00,
            0xCC,
            0xFF,
            0x00,
            0x00,
            0xFF,
            0x66,
            0x00,
            0x66,
            0xFF,
            0xCC,
            0x00,
            0xFF,
            0xFF,
            0x4D,
            0x00,
            0x80,
            0xFF,
            0x00,
            0x00,
            0xFF,
            0xB2,
            0x00,
            0x1A,
            0xFF,
            0xFF,
            0x00,
            0xE5,
            0xFF,
            0x99,
            0x00,
            0x33,
            0xFF,
            0x00,
            0x00,
            0xFF,
            0xFF,
            0x33,
            0x00,
            0xFF,
            0xFF,
            0x00,
            0x99,
            0xFF,
            0xE5,
            0x00,
            0x00,
            0xFF,
            0x1A,
            0x00,
            0xB2,
            0xFF,
            0x80,
            0x00,
            0xFF,
            0xFF,
            0x00,
            0x4D,
        ]
    ).astype(np.float32)
    color_list = color_list.reshape((-1, 3))
    if not rgb:
        color_list = color_list[:, ::-1]
    return color_list.astype("int32")


def font_colormap(color_index):
    """
    Get font color according to the index of colormap
    """
    dark = np.array([0x14, 0x0E, 0x35])
    light = np.array([0xFF, 0xFF, 0xFF])
    light_indexs = [0, 3, 4, 8, 9, 13, 14, 18, 19]
    if color_index in light_indexs:
        return light.astype("int32")
    else:
        return dark.astype("int32")


def draw_box(img, np_boxes, labels, threshold=0.5):
    """
    Args:
        img (PIL.Image.Image): PIL image
        np_boxes (np.ndarray): shape:[N,6], N: number of box,
                               matix element:[class, score, x_min, y_min, x_max, y_max]
        labels (list): labels:['class1', ..., 'classn']
        threshold (float): threshold of box
    Returns:
        img (PIL.Image.Image): visualized image
    """
    font_size = int(0.024 * int(img.width)) + 2
    font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")

    draw_thickness = int(max(img.size) * 0.005)
    draw = ImageDraw.Draw(img)
    clsid2color = {}
    catid2fontcolor = {}
    color_list = colormap(rgb=True)
    expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1)
    np_boxes = np_boxes[expect_boxes, :]

    for i, dt in enumerate(np_boxes):
        clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
        if clsid not in clsid2color:
            color_index = i % len(color_list)
            clsid2color[clsid] = color_list[color_index]
            catid2fontcolor[clsid] = font_colormap(color_index)
        color = tuple(clsid2color[clsid])
        font_color = tuple(catid2fontcolor[clsid])

        xmin, ymin, xmax, ymax = bbox
        # draw bbox
        draw.line(
            [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), (xmin, ymin)],
            width=draw_thickness,
            fill=color,
        )

        # draw label
        text = "{} {:.2f}".format(labels[clsid], score)
        if tuple(map(int, PIL.__version__.split("."))) <= (10, 0, 0):
            tw, th = draw.textsize(text, font=font)
        else:
            left, top, right, bottom = draw.textbbox((0, 0), text, font)
            tw, th = right - left, bottom - top
        if ymin < th:
            draw.rectangle([(xmin, ymin), (xmin + tw + 4, ymin + th + 1)], fill=color)
            draw.text((xmin + 2, ymin - 2), text, fill=font_color, font=font)
        else:
            draw.rectangle([(xmin, ymin - th), (xmin + tw + 4, ymin + 1)], fill=color)
            draw.text((xmin + 2, ymin - th - 2), text, fill=font_color, font=font)

    return img


def draw_mask(im, np_boxes, np_masks, labels, threshold=0.5):
    """
    Args:
        im (PIL.Image.Image): PIL image
        np_boxes (np.ndarray): shape:[N,6], N: number of box,
            matix element:[class, score, x_min, y_min, x_max, y_max]
        np_masks (np.ndarray): shape:[N, im_h, im_w]
        labels (list): labels:['class1', ..., 'classn']
        threshold (float): threshold of mask
    Returns:
        im (PIL.Image.Image): visualized image
    """
    color_list = get_color_map_list(len(labels))
    w_ratio = 0.4
    alpha = 0.7
    im = np.array(im).astype("float32")
    clsid2color = {}
    expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1)
    np_boxes = np_boxes[expect_boxes, :]
    np_masks = np_masks[expect_boxes, :, :]
    im_h, im_w = im.shape[:2]
    np_masks = np_masks[:, :im_h, :im_w]
    for i in range(len(np_masks)):
        clsid, score = int(np_boxes[i][0]), np_boxes[i][1]
        mask = np_masks[i]
        if clsid not in clsid2color:
            clsid2color[clsid] = color_list[clsid]
        color_mask = clsid2color[clsid]
        for c in range(3):
            color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255
        idx = np.nonzero(mask)
        color_mask = np.array(color_mask)
        im[idx[0], idx[1], :] *= 1.0 - alpha
        im[idx[0], idx[1], :] += alpha * color_mask
    return Image.fromarray(im.astype("uint8"))


class SaveDetResults(BaseTransform):
    """Save Result Transform"""

    def __init__(self, save_dir, threshold=0.5, labels=None):
        super().__init__()
        self.save_dir = save_dir
        self.threshold = threshold
        self.labels = labels

        # We use pillow backend to save both numpy arrays and PIL Image objects
        self._writer = ImageWriter(backend="pillow")

    def apply(self, data):
        """apply"""
        ori_path = data[K.IM_PATH]
        file_name = os.path.basename(ori_path)
        save_path = os.path.join(self.save_dir, file_name)

        labels = self.labels
        image = ImageReader(backend="pil").read(ori_path)
        if K.MASKS in data:
            image = draw_mask(
                image,
                data[K.BOXES],
                data[K.MASKS],
                threshold=self.threshold,
                labels=labels,
            )
        image = draw_box(image, data[K.BOXES], threshold=self.threshold, labels=labels)

        self._write_image(save_path, image)
        return data

    def _write_image(self, path, image):
        """write image"""
        if os.path.exists(path):
            logging.warning(f"{path} already exists. Overwriting it.")
        self._writer.write(path, image)

    @classmethod
    def get_input_keys(cls):
        """get input keys"""
        return [K.IM_PATH, K.BOXES]

    @classmethod
    def get_output_keys(cls):
        """get output keys"""
        return []


class PadStride(BaseTransform):
    """padding image for model with FPN , instead PadBatch(pad_to_stride, pad_gt) in original config
    Args:
        stride (bool): model with FPN need image shape % stride == 0
    """

    def __init__(self, stride=0):
        self.coarsest_stride = stride

    def apply(self, data):
        """
        Args:
            im (np.ndarray): image (np.ndarray)
        Returns:
            im (np.ndarray):  processed image (np.ndarray)
        """
        im = data[K.IMAGE]
        coarsest_stride = self.coarsest_stride
        if coarsest_stride <= 0:
            return data
        im_c, im_h, im_w = im.shape
        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
        padding_im[:, :im_h, :im_w] = im
        data[K.IMAGE] = padding_im
        return data

    @classmethod
    def get_input_keys(cls):
        """get input keys"""
        return [K.IMAGE]

    @classmethod
    def get_output_keys(cls):
        """get output keys"""
        return [K.IMAGE]


class Pad(BaseTransform):
    def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
        """
        Pad image to a specified size.
        Args:
            size (list[int]): image target size
            fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
        """
        super(Pad, self).__init__()
        if isinstance(size, int):
            size = [size, size]
        self.size = size
        self.fill_value = fill_value

    def apply(self, data):
        im = data[K.IMAGE]
        im_h, im_w = im.shape[:2]
        h, w = self.size
        if h == im_h and w == im_w:
            # im = im.astype(np.float32)
            return data

        canvas = np.ones((h, w, 3), dtype=np.float32)
        canvas *= np.array(self.fill_value, dtype=np.float32)
        canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
        data[K.IMAGE] = canvas
        return data

    @classmethod
    def get_input_keys(cls):
        """get input keys"""
        return [K.IMAGE]

    @classmethod
    def get_output_keys(cls):
        """get output keys"""
        return [K.IMAGE]


class DetResize(_BaseResize):
    """
    Resize the image.

    Args:
        target_size (list|tuple|int): Target height and width.
        keep_ratio (bool, optional): Whether to keep the aspect ratio of resized
            image. Default: False.
        size_divisor (int|None, optional): Divisor of resized image size.
            Default: None.
        interp (str, optional): Interpolation method. Choices are 'NEAREST',
            'LINEAR', 'CUBIC', 'AREA', and 'LANCZOS4'. Default: 'LINEAR'.
    """

    def __init__(self, target_hw, keep_ratio=False, size_divisor=None, interp="LINEAR"):
        super().__init__(size_divisor=size_divisor, interp=interp)

        if isinstance(target_hw, int):
            target_hw = [target_hw, target_hw]
        _check_image_size(target_hw)
        self.target_hw = target_hw

        self.keep_ratio = keep_ratio

    def apply(self, data):
        """apply"""
        target_hw = self.target_hw
        im = data["image"]
        original_size = im.shape[:2]

        if self.keep_ratio:
            h, w = im.shape[0:2]
            target_hw, _ = self._rescale_size((h, w), self.target_hw)

        if self.size_divisor:
            target_hw = [
                math.ceil(i / self.size_divisor) * self.size_divisor for i in target_hw
            ]

        im_scale_w, im_scale_h = [
            target_hw[1] / original_size[1],
            target_hw[0] / original_size[0],
        ]
        im = F.resize(im, target_hw[::-1], interp=self.interp)

        data["image"] = im
        data["image_size"] = [im.shape[1], im.shape[0]]
        data["scale_factors"] = [im_scale_w, im_scale_h]
        return data

    @classmethod
    def get_input_keys(cls):
        """get input keys"""
        # image: Image in hw or hwc format.
        return ["image"]

    @classmethod
    def get_output_keys(cls):
        """get output keys"""
        # image: Image in hw or hwc format.
        # image_size: Width and height of the image.
        # scale_factors: Scale factors for image width and height.
        return ["image", "image_size", "scale_factors"]


class PrintResult(BaseTransform):
    """Print Result Transform"""

    def apply(self, data):
        """apply"""
        logging.info("The prediction result is:")
        logging.info(data[K.BOXES])
        return data

    @classmethod
    def get_input_keys(cls):
        """get input keys"""
        return [K.BOXES]

    @classmethod
    def get_output_keys(cls):
        """get output keys"""
        return []