# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import Dict import math import random from pathlib import Path import copy import numpy as np import cv2 import PIL from PIL import Image, ImageDraw, ImageFont from ....utils.fonts import PINGFANG_FONT_FILE_PATH, create_font from ...common.result import BaseCVResult, StrMixin, JsonMixin class DocPreprocessorResult(BaseCVResult): """doc preprocessor result""" def _to_img(self) -> Dict[str, Image.Image]: """ Generate an image combining the original, rotated, and unwarping images. Returns: Dict[Image.Image]: A new image combining the original, rotated, and unwarping images """ image = self["input_img"][:, :, ::-1] rot_img = self["rot_img"][:, :, ::-1] angle = self["angle"] output_img = self["output_img"][:, :, ::-1] use_doc_orientation_classify = self["model_settings"][ "use_doc_orientation_classify" ] use_doc_unwarping = self["model_settings"]["use_doc_unwarping"] h1, w1 = image.shape[0:2] h2, w2 = rot_img.shape[0:2] h3, w3 = output_img.shape[0:2] h = max(max(h1, h2), h3) img_show = Image.new("RGB", (w1 + w2 + w3, h + 25), (255, 255, 255)) img_show.paste(Image.fromarray(image), (0, 0, w1, h1)) img_show.paste(Image.fromarray(rot_img), (w1, 0, w1 + w2, h2)) img_show.paste(Image.fromarray(output_img), (w1 + w2, 0, w1 + w2 + w3, h3)) draw_text = ImageDraw.Draw(img_show) txt_list = ["Original Image", "Rotated Image", "Unwarping Image"] txt_list[1] = f"Rotated Image ({use_doc_orientation_classify}, {angle})" txt_list[2] = f"Unwarping Image ({use_doc_unwarping})" region_w_list = [w1, w2, w3] beg_w_list = [0, w1, w1 + w2] for tno in range(len(txt_list)): txt = txt_list[tno] font = create_font(txt, (region_w_list[tno], 20), PINGFANG_FONT_FILE_PATH) draw_text.text( [10 + beg_w_list[tno], h + 2], txt, fill=(0, 0, 0), font=font ) imgs = {"preprocessed_img": img_show} return imgs def _to_str(self, *args, **kwargs) -> Dict[str, str]: """Converts the instance's attributes to a dictionary and then to a string. Args: *args: Additional positional arguments passed to the base class method. **kwargs: Additional keyword arguments passed to the base class method. Returns: Dict[str, str]: A dictionary with the instance's attributes converted to strings. """ data = {} data["input_path"] = self["input_path"] data["model_settings"] = self["model_settings"] data["angle"] = self["angle"] return StrMixin._to_str(data, *args, **kwargs) def _to_json(self, *args, **kwargs) -> Dict[str, str]: """ Converts the object's data to a JSON dictionary. Args: *args: Positional arguments passed to the JsonMixin._to_json method. **kwargs: Keyword arguments passed to the JsonMixin._to_json method. Returns: Dict[str, str]: A dictionary containing the object's data in JSON format. """ data = {} data["input_path"] = self["input_path"] data["model_settings"] = self["model_settings"] data["angle"] = self["angle"] return JsonMixin._to_json(data, *args, **kwargs)