|
|
@@ -0,0 +1,543 @@
|
|
|
+# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
|
|
|
+#
|
|
|
+# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
+# you may not use this file except in compliance with the License.
|
|
|
+# You may obtain a copy of the License at
|
|
|
+#
|
|
|
+# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+#
|
|
|
+# Unless required by applicable law or agreed to in writing, software
|
|
|
+# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+# See the License for the specific language governing permissions and
|
|
|
+# limitations under the License.
|
|
|
+
|
|
|
+
|
|
|
+import os
|
|
|
+import sys
|
|
|
+import cv2
|
|
|
+import copy
|
|
|
+import math
|
|
|
+import pyclipper
|
|
|
+import numpy as np
|
|
|
+from PIL import Image
|
|
|
+from shapely.geometry import Polygon
|
|
|
+
|
|
|
+from ...utils.io import ImageReader
|
|
|
+from ....utils import logging
|
|
|
+from ...results import TextDetResult
|
|
|
+from ..base import BaseComponent
|
|
|
+
|
|
|
+
|
|
|
+__all__ = ["DetResizeForTest", "NormalizeImage", "DBPostProcess", "CropByPolys"]
|
|
|
+
|
|
|
+
|
|
|
+class DetResizeForTest(BaseComponent):
|
|
|
+ """DetResizeForTest"""
|
|
|
+
|
|
|
+ INPUT_KEYS = ["img"]
|
|
|
+ OUTPUT_KEYS = ["img", "img_shape"]
|
|
|
+ DEAULT_INPUTS = {"img": "img"}
|
|
|
+ DEAULT_OUTPUTS = {"img": "img", "img_shape": "img_shape"}
|
|
|
+
|
|
|
+ def __init__(self, **kwargs):
|
|
|
+ super().__init__()
|
|
|
+ self.resize_type = 0
|
|
|
+ self.keep_ratio = False
|
|
|
+ if "image_shape" in kwargs:
|
|
|
+ self.image_shape = kwargs["image_shape"]
|
|
|
+ self.resize_type = 1
|
|
|
+ if "keep_ratio" in kwargs:
|
|
|
+ self.keep_ratio = kwargs["keep_ratio"]
|
|
|
+ elif "limit_side_len" in kwargs:
|
|
|
+ self.limit_side_len = kwargs["limit_side_len"]
|
|
|
+ self.limit_type = kwargs.get("limit_type", "min")
|
|
|
+ elif "resize_long" in kwargs:
|
|
|
+ self.resize_type = 2
|
|
|
+ self.resize_long = kwargs.get("resize_long", 960)
|
|
|
+ else:
|
|
|
+ self.limit_side_len = 736
|
|
|
+ self.limit_type = "min"
|
|
|
+
|
|
|
+ def apply(self, img):
|
|
|
+ """apply"""
|
|
|
+ src_h, src_w, _ = img.shape
|
|
|
+ if sum([src_h, src_w]) < 64:
|
|
|
+ img = self.image_padding(img)
|
|
|
+
|
|
|
+ if self.resize_type == 0:
|
|
|
+ # img, shape = self.resize_image_type0(img)
|
|
|
+ img, [ratio_h, ratio_w] = self.resize_image_type0(img)
|
|
|
+ elif self.resize_type == 2:
|
|
|
+ img, [ratio_h, ratio_w] = self.resize_image_type2(img)
|
|
|
+ else:
|
|
|
+ # img, shape = self.resize_image_type1(img)
|
|
|
+ img, [ratio_h, ratio_w] = self.resize_image_type1(img)
|
|
|
+ return {"img": img, "img_shape": np.array([src_h, src_w, ratio_h, ratio_w])}
|
|
|
+
|
|
|
+ def image_padding(self, im, value=0):
|
|
|
+ """padding image"""
|
|
|
+ h, w, c = im.shape
|
|
|
+ im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
|
|
|
+ im_pad[:h, :w, :] = im
|
|
|
+ return im_pad
|
|
|
+
|
|
|
+ def resize_image_type1(self, img):
|
|
|
+ """resize the image"""
|
|
|
+ resize_h, resize_w = self.image_shape
|
|
|
+ ori_h, ori_w = img.shape[:2] # (h, w, c)
|
|
|
+ if self.keep_ratio is True:
|
|
|
+ resize_w = ori_w * resize_h / ori_h
|
|
|
+ N = math.ceil(resize_w / 32)
|
|
|
+ resize_w = N * 32
|
|
|
+ ratio_h = float(resize_h) / ori_h
|
|
|
+ ratio_w = float(resize_w) / ori_w
|
|
|
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
|
|
+ # return img, np.array([ori_h, ori_w])
|
|
|
+ return img, [ratio_h, ratio_w]
|
|
|
+
|
|
|
+ def resize_image_type0(self, img):
|
|
|
+ """
|
|
|
+ resize image to a size multiple of 32 which is required by the network
|
|
|
+ args:
|
|
|
+ img(array): array with shape [h, w, c]
|
|
|
+ return(tuple):
|
|
|
+ img, (ratio_h, ratio_w)
|
|
|
+ """
|
|
|
+ limit_side_len = self.limit_side_len
|
|
|
+ h, w, c = img.shape
|
|
|
+
|
|
|
+ # limit the max side
|
|
|
+ if self.limit_type == "max":
|
|
|
+ if max(h, w) > limit_side_len:
|
|
|
+ if h > w:
|
|
|
+ ratio = float(limit_side_len) / h
|
|
|
+ else:
|
|
|
+ ratio = float(limit_side_len) / w
|
|
|
+ else:
|
|
|
+ ratio = 1.0
|
|
|
+ elif self.limit_type == "min":
|
|
|
+ if min(h, w) < limit_side_len:
|
|
|
+ if h < w:
|
|
|
+ ratio = float(limit_side_len) / h
|
|
|
+ else:
|
|
|
+ ratio = float(limit_side_len) / w
|
|
|
+ else:
|
|
|
+ ratio = 1.0
|
|
|
+ elif self.limit_type == "resize_long":
|
|
|
+ ratio = float(limit_side_len) / max(h, w)
|
|
|
+ else:
|
|
|
+ raise Exception("not support limit type, image ")
|
|
|
+ resize_h = int(h * ratio)
|
|
|
+ resize_w = int(w * ratio)
|
|
|
+
|
|
|
+ resize_h = max(int(round(resize_h / 32) * 32), 32)
|
|
|
+ resize_w = max(int(round(resize_w / 32) * 32), 32)
|
|
|
+
|
|
|
+ try:
|
|
|
+ if int(resize_w) <= 0 or int(resize_h) <= 0:
|
|
|
+ return None, (None, None)
|
|
|
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
|
|
+ except:
|
|
|
+ logging.info(img.shape, resize_w, resize_h)
|
|
|
+ sys.exit(0)
|
|
|
+ ratio_h = resize_h / float(h)
|
|
|
+ ratio_w = resize_w / float(w)
|
|
|
+ return img, [ratio_h, ratio_w]
|
|
|
+
|
|
|
+ def resize_image_type2(self, img):
|
|
|
+ """resize image size"""
|
|
|
+ h, w, _ = img.shape
|
|
|
+
|
|
|
+ resize_w = w
|
|
|
+ resize_h = h
|
|
|
+
|
|
|
+ if resize_h > resize_w:
|
|
|
+ ratio = float(self.resize_long) / resize_h
|
|
|
+ else:
|
|
|
+ ratio = float(self.resize_long) / resize_w
|
|
|
+
|
|
|
+ resize_h = int(resize_h * ratio)
|
|
|
+ resize_w = int(resize_w * ratio)
|
|
|
+
|
|
|
+ max_stride = 128
|
|
|
+ resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
|
|
|
+ resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
|
|
|
+ img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
|
|
+ ratio_h = resize_h / float(h)
|
|
|
+ ratio_w = resize_w / float(w)
|
|
|
+
|
|
|
+ return img, [ratio_h, ratio_w]
|
|
|
+
|
|
|
+
|
|
|
+class NormalizeImage(BaseComponent):
|
|
|
+ """normalize image such as substract mean, divide std"""
|
|
|
+
|
|
|
+ INPUT_KEYS = ["img"]
|
|
|
+ OUTPUT_KEYS = ["img"]
|
|
|
+ DEAULT_INPUTS = {"img": "img"}
|
|
|
+ DEAULT_OUTPUTS = {"img": "img"}
|
|
|
+
|
|
|
+ def __init__(self, scale=None, mean=None, std=None, order="chw", **kwargs):
|
|
|
+ super().__init__()
|
|
|
+ if isinstance(scale, str):
|
|
|
+ scale = eval(scale)
|
|
|
+ self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
|
|
|
+ mean = mean if mean is not None else [0.485, 0.456, 0.406]
|
|
|
+ std = std if std is not None else [0.229, 0.224, 0.225]
|
|
|
+
|
|
|
+ shape = (3, 1, 1) if order == "chw" else (1, 1, 3)
|
|
|
+ self.mean = np.array(mean).reshape(shape).astype("float32")
|
|
|
+ self.std = np.array(std).reshape(shape).astype("float32")
|
|
|
+
|
|
|
+ def apply(self, img):
|
|
|
+ """apply"""
|
|
|
+ from PIL import Image
|
|
|
+
|
|
|
+ if isinstance(img, Image.Image):
|
|
|
+ img = np.array(img)
|
|
|
+ assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage"
|
|
|
+ img = (img.astype("float32") * self.scale - self.mean) / self.std
|
|
|
+ return {"img": img}
|
|
|
+
|
|
|
+
|
|
|
+class DBPostProcess(BaseComponent):
|
|
|
+ """
|
|
|
+ The post process for Differentiable Binarization (DB).
|
|
|
+ """
|
|
|
+
|
|
|
+ INPUT_KEYS = ["pred", "img_shape", "img_path"]
|
|
|
+ OUTPUT_KEYS = ["text_det_res"]
|
|
|
+ DEAULT_INPUTS = {"pred": "pred", "img_shape": "img_shape", "img_path": "img_path"}
|
|
|
+ DEAULT_OUTPUTS = {"text_det_res": "text_det_res"}
|
|
|
+
|
|
|
+ def __init__(
|
|
|
+ self,
|
|
|
+ thresh=0.3,
|
|
|
+ box_thresh=0.7,
|
|
|
+ max_candidates=1000,
|
|
|
+ unclip_ratio=2.0,
|
|
|
+ use_dilation=False,
|
|
|
+ score_mode="fast",
|
|
|
+ box_type="quad",
|
|
|
+ **kwargs
|
|
|
+ ):
|
|
|
+ super().__init__()
|
|
|
+ self.thresh = thresh
|
|
|
+ self.box_thresh = box_thresh
|
|
|
+ self.max_candidates = max_candidates
|
|
|
+ self.unclip_ratio = unclip_ratio
|
|
|
+ self.min_size = 3
|
|
|
+ self.score_mode = score_mode
|
|
|
+ self.box_type = box_type
|
|
|
+ assert score_mode in [
|
|
|
+ "slow",
|
|
|
+ "fast",
|
|
|
+ ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
|
|
|
+
|
|
|
+ self.dilation_kernel = None if not use_dilation else np.array([[1, 1], [1, 1]])
|
|
|
+
|
|
|
+ def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
|
|
+ """_bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1}"""
|
|
|
+
|
|
|
+ bitmap = _bitmap
|
|
|
+ height, width = bitmap.shape
|
|
|
+
|
|
|
+ boxes = []
|
|
|
+ scores = []
|
|
|
+
|
|
|
+ contours, _ = cv2.findContours(
|
|
|
+ (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
|
|
|
+ )
|
|
|
+
|
|
|
+ for contour in contours[: self.max_candidates]:
|
|
|
+ epsilon = 0.002 * cv2.arcLength(contour, True)
|
|
|
+ approx = cv2.approxPolyDP(contour, epsilon, True)
|
|
|
+ points = approx.reshape((-1, 2))
|
|
|
+ if points.shape[0] < 4:
|
|
|
+ continue
|
|
|
+
|
|
|
+ score = self.box_score_fast(pred, points.reshape(-1, 2))
|
|
|
+ if self.box_thresh > score:
|
|
|
+ continue
|
|
|
+
|
|
|
+ if points.shape[0] > 2:
|
|
|
+ box = self.unclip(points, self.unclip_ratio)
|
|
|
+ if len(box) > 1:
|
|
|
+ continue
|
|
|
+ else:
|
|
|
+ continue
|
|
|
+ box = box.reshape(-1, 2)
|
|
|
+
|
|
|
+ _, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
|
|
|
+ if sside < self.min_size + 2:
|
|
|
+ continue
|
|
|
+
|
|
|
+ box = np.array(box)
|
|
|
+ box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
|
|
|
+ box[:, 1] = np.clip(
|
|
|
+ np.round(box[:, 1] / height * dest_height), 0, dest_height
|
|
|
+ )
|
|
|
+ boxes.append(box.tolist())
|
|
|
+ scores.append(score)
|
|
|
+ return boxes, scores
|
|
|
+
|
|
|
+ def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
|
|
+ """_bitmap: single map with shape (1, H, W), whose values are binarized as {0, 1}"""
|
|
|
+
|
|
|
+ bitmap = _bitmap
|
|
|
+ height, width = bitmap.shape
|
|
|
+
|
|
|
+ outs = cv2.findContours(
|
|
|
+ (bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
|
|
|
+ )
|
|
|
+ if len(outs) == 3:
|
|
|
+ img, contours, _ = outs[0], outs[1], outs[2]
|
|
|
+ elif len(outs) == 2:
|
|
|
+ contours, _ = outs[0], outs[1]
|
|
|
+
|
|
|
+ num_contours = min(len(contours), self.max_candidates)
|
|
|
+
|
|
|
+ boxes = []
|
|
|
+ scores = []
|
|
|
+ for index in range(num_contours):
|
|
|
+ contour = contours[index]
|
|
|
+ points, sside = self.get_mini_boxes(contour)
|
|
|
+ if sside < self.min_size:
|
|
|
+ continue
|
|
|
+ points = np.array(points)
|
|
|
+ if self.score_mode == "fast":
|
|
|
+ score = self.box_score_fast(pred, points.reshape(-1, 2))
|
|
|
+ else:
|
|
|
+ score = self.box_score_slow(pred, contour)
|
|
|
+ if self.box_thresh > score:
|
|
|
+ continue
|
|
|
+
|
|
|
+ box = self.unclip(points, self.unclip_ratio).reshape(-1, 1, 2)
|
|
|
+ box, sside = self.get_mini_boxes(box)
|
|
|
+ if sside < self.min_size + 2:
|
|
|
+ continue
|
|
|
+ box = np.array(box)
|
|
|
+
|
|
|
+ box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
|
|
|
+ box[:, 1] = np.clip(
|
|
|
+ np.round(box[:, 1] / height * dest_height), 0, dest_height
|
|
|
+ )
|
|
|
+ boxes.append(box.astype(np.int16))
|
|
|
+ scores.append(score)
|
|
|
+ return np.array(boxes, dtype=np.int16), scores
|
|
|
+
|
|
|
+ def unclip(self, box, unclip_ratio):
|
|
|
+ """unclip"""
|
|
|
+ poly = Polygon(box)
|
|
|
+ distance = poly.area * unclip_ratio / poly.length
|
|
|
+ offset = pyclipper.PyclipperOffset()
|
|
|
+ offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
|
|
|
+ expanded = np.array(offset.Execute(distance))
|
|
|
+ return expanded
|
|
|
+
|
|
|
+ def get_mini_boxes(self, contour):
|
|
|
+ """get mini boxes"""
|
|
|
+ bounding_box = cv2.minAreaRect(contour)
|
|
|
+ points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
|
|
|
+
|
|
|
+ index_1, index_2, index_3, index_4 = 0, 1, 2, 3
|
|
|
+ if points[1][1] > points[0][1]:
|
|
|
+ index_1 = 0
|
|
|
+ index_4 = 1
|
|
|
+ else:
|
|
|
+ index_1 = 1
|
|
|
+ index_4 = 0
|
|
|
+ if points[3][1] > points[2][1]:
|
|
|
+ index_2 = 2
|
|
|
+ index_3 = 3
|
|
|
+ else:
|
|
|
+ index_2 = 3
|
|
|
+ index_3 = 2
|
|
|
+
|
|
|
+ box = [points[index_1], points[index_2], points[index_3], points[index_4]]
|
|
|
+ return box, min(bounding_box[1])
|
|
|
+
|
|
|
+ def box_score_fast(self, bitmap, _box):
|
|
|
+ """box_score_fast: use bbox mean score as the mean score"""
|
|
|
+ h, w = bitmap.shape[:2]
|
|
|
+ box = _box.copy()
|
|
|
+ xmin = np.clip(np.floor(box[:, 0].min()).astype("int"), 0, w - 1)
|
|
|
+ xmax = np.clip(np.ceil(box[:, 0].max()).astype("int"), 0, w - 1)
|
|
|
+ ymin = np.clip(np.floor(box[:, 1].min()).astype("int"), 0, h - 1)
|
|
|
+ ymax = np.clip(np.ceil(box[:, 1].max()).astype("int"), 0, h - 1)
|
|
|
+
|
|
|
+ mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
|
|
|
+ box[:, 0] = box[:, 0] - xmin
|
|
|
+ box[:, 1] = box[:, 1] - ymin
|
|
|
+ cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
|
|
|
+ return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]
|
|
|
+
|
|
|
+ def box_score_slow(self, bitmap, contour):
|
|
|
+ """box_score_slow: use polyon mean score as the mean score"""
|
|
|
+ h, w = bitmap.shape[:2]
|
|
|
+ contour = contour.copy()
|
|
|
+ contour = np.reshape(contour, (-1, 2))
|
|
|
+
|
|
|
+ xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
|
|
|
+ xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
|
|
|
+ ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
|
|
|
+ ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
|
|
|
+
|
|
|
+ mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
|
|
|
+
|
|
|
+ contour[:, 0] = contour[:, 0] - xmin
|
|
|
+ contour[:, 1] = contour[:, 1] - ymin
|
|
|
+
|
|
|
+ cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
|
|
|
+ return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]
|
|
|
+
|
|
|
+ def apply(self, pred, img_shape, img_path):
|
|
|
+ """apply"""
|
|
|
+ pred = pred[0, :, :]
|
|
|
+ segmentation = pred > self.thresh
|
|
|
+
|
|
|
+ src_h, src_w, ratio_h, ratio_w = img_shape
|
|
|
+ if self.dilation_kernel is not None:
|
|
|
+ mask = cv2.dilate(
|
|
|
+ np.array(segmentation).astype(np.uint8),
|
|
|
+ self.dilation_kernel,
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ mask = segmentation
|
|
|
+ if self.box_type == "poly":
|
|
|
+ boxes, scores = self.polygons_from_bitmap(pred, mask, src_w, src_h)
|
|
|
+ elif self.box_type == "quad":
|
|
|
+ boxes, scores = self.boxes_from_bitmap(pred, mask, src_w, src_h)
|
|
|
+ else:
|
|
|
+ raise ValueError("box_type can only be one of ['quad', 'poly']")
|
|
|
+
|
|
|
+ text_det_res = TextDetResult(
|
|
|
+ {"img_path": img_path, "dt_polys": boxes, "dt_scores": scores}
|
|
|
+ )
|
|
|
+ return {"text_det_res": text_det_res}
|
|
|
+
|
|
|
+
|
|
|
+class CropByPolys(BaseComponent):
|
|
|
+ """Crop Image by Polys"""
|
|
|
+
|
|
|
+ INPUT_KEYS = ["img_path", "dt_polys"]
|
|
|
+ OUTPUT_KEYS = ["img"]
|
|
|
+ DEAULT_INPUTS = {"img_path": "img_path", "dt_polys": "dt_polys"}
|
|
|
+ DEAULT_OUTPUTS = {"img": "img"}
|
|
|
+
|
|
|
+ def __init__(self, det_box_type="quad"):
|
|
|
+ super().__init__()
|
|
|
+ self.det_box_type = det_box_type
|
|
|
+ self._reader = ImageReader(backend="opencv")
|
|
|
+
|
|
|
+ def apply(self, img_path, dt_polys):
|
|
|
+ """apply"""
|
|
|
+ img = self._reader.read(img_path)
|
|
|
+ dt_boxes = np.array(dt_polys)
|
|
|
+ # TODO
|
|
|
+ # dt_boxes = self.sorted_boxes(data[K.DT_POLYS])
|
|
|
+ output_list = []
|
|
|
+ for bno in range(len(dt_boxes)):
|
|
|
+ tmp_box = copy.deepcopy(dt_boxes[bno])
|
|
|
+ if self.det_box_type == "quad":
|
|
|
+ img_crop = self.get_rotate_crop_image(img, tmp_box)
|
|
|
+ else:
|
|
|
+ img_crop = self.get_minarea_rect_crop(img, tmp_box)
|
|
|
+ output_list.append(
|
|
|
+ {"img": img_crop, "img_size": [img_crop.shape[1], img_crop.shape[0]]}
|
|
|
+ )
|
|
|
+ return output_list
|
|
|
+
|
|
|
+ def sorted_boxes(self, dt_boxes):
|
|
|
+ """
|
|
|
+ Sort text boxes in order from top to bottom, left to right
|
|
|
+ args:
|
|
|
+ dt_boxes(array):detected text boxes with shape [4, 2]
|
|
|
+ return:
|
|
|
+ sorted boxes(array) with shape [4, 2]
|
|
|
+ """
|
|
|
+ dt_boxes = np.array(dt_boxes)
|
|
|
+ num_boxes = dt_boxes.shape[0]
|
|
|
+ sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
|
|
|
+ _boxes = list(sorted_boxes)
|
|
|
+
|
|
|
+ for i in range(num_boxes - 1):
|
|
|
+ for j in range(i, -1, -1):
|
|
|
+ if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and (
|
|
|
+ _boxes[j + 1][0][0] < _boxes[j][0][0]
|
|
|
+ ):
|
|
|
+ tmp = _boxes[j]
|
|
|
+ _boxes[j] = _boxes[j + 1]
|
|
|
+ _boxes[j + 1] = tmp
|
|
|
+ else:
|
|
|
+ break
|
|
|
+ return _boxes
|
|
|
+
|
|
|
+ def get_minarea_rect_crop(self, img, points):
|
|
|
+ """get_minarea_rect_crop"""
|
|
|
+ bounding_box = cv2.minAreaRect(np.array(points).astype(np.int32))
|
|
|
+ points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
|
|
|
+
|
|
|
+ index_a, index_b, index_c, index_d = 0, 1, 2, 3
|
|
|
+ if points[1][1] > points[0][1]:
|
|
|
+ index_a = 0
|
|
|
+ index_d = 1
|
|
|
+ else:
|
|
|
+ index_a = 1
|
|
|
+ index_d = 0
|
|
|
+ if points[3][1] > points[2][1]:
|
|
|
+ index_b = 2
|
|
|
+ index_c = 3
|
|
|
+ else:
|
|
|
+ index_b = 3
|
|
|
+ index_c = 2
|
|
|
+
|
|
|
+ box = [points[index_a], points[index_b], points[index_c], points[index_d]]
|
|
|
+ crop_img = self.get_rotate_crop_image(img, np.array(box))
|
|
|
+ return crop_img
|
|
|
+
|
|
|
+ def get_rotate_crop_image(self, img, points):
|
|
|
+ """
|
|
|
+ img_height, img_width = img.shape[0:2]
|
|
|
+ left = int(np.min(points[:, 0]))
|
|
|
+ right = int(np.max(points[:, 0]))
|
|
|
+ top = int(np.min(points[:, 1]))
|
|
|
+ bottom = int(np.max(points[:, 1]))
|
|
|
+ img_crop = img[top:bottom, left:right, :].copy()
|
|
|
+ points[:, 0] = points[:, 0] - left
|
|
|
+ points[:, 1] = points[:, 1] - top
|
|
|
+ """
|
|
|
+ assert len(points) == 4, "shape of points must be 4*2"
|
|
|
+ img_crop_width = int(
|
|
|
+ max(
|
|
|
+ np.linalg.norm(points[0] - points[1]),
|
|
|
+ np.linalg.norm(points[2] - points[3]),
|
|
|
+ )
|
|
|
+ )
|
|
|
+ img_crop_height = int(
|
|
|
+ max(
|
|
|
+ np.linalg.norm(points[0] - points[3]),
|
|
|
+ np.linalg.norm(points[1] - points[2]),
|
|
|
+ )
|
|
|
+ )
|
|
|
+ pts_std = np.float32(
|
|
|
+ [
|
|
|
+ [0, 0],
|
|
|
+ [img_crop_width, 0],
|
|
|
+ [img_crop_width, img_crop_height],
|
|
|
+ [0, img_crop_height],
|
|
|
+ ]
|
|
|
+ )
|
|
|
+ M = cv2.getPerspectiveTransform(points, pts_std)
|
|
|
+ dst_img = cv2.warpPerspective(
|
|
|
+ img,
|
|
|
+ M,
|
|
|
+ (img_crop_width, img_crop_height),
|
|
|
+ borderMode=cv2.BORDER_REPLICATE,
|
|
|
+ flags=cv2.INTER_CUBIC,
|
|
|
+ )
|
|
|
+ dst_img_height, dst_img_width = dst_img.shape[0:2]
|
|
|
+ if dst_img_height * 1.0 / dst_img_width >= 1.5:
|
|
|
+ dst_img = np.rot90(dst_img)
|
|
|
+ return dst_img
|