| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- __all__ = ["cal_ocr_word_box"]
- import numpy as np
- # from .convert_points_and_boxes import convert_points_to_boxes
- def cal_ocr_word_box(rec_str, box, rec_word_info):
- """Calculate the detection frame for each word based on the results of recognition and detection of ocr"""
- col_num, word_list, word_col_list, state_list = rec_word_info
- box = box.tolist()
- bbox_x_start = box[0][0]
- bbox_x_end = box[1][0]
- bbox_y_start = box[0][1]
- bbox_y_end = box[2][1]
- cell_width = (bbox_x_end - bbox_x_start) / col_num
- word_box_list = []
- word_box_content_list = []
- cn_width_list = []
- cn_col_list = []
- for word, word_col, state in zip(word_list, word_col_list, state_list):
- if state == "cn":
- if len(word_col) != 1:
- char_seq_length = (word_col[-1] - word_col[0] + 1) * cell_width
- char_width = char_seq_length / (len(word_col) - 1)
- cn_width_list.append(char_width)
- cn_col_list += word_col
- word_box_content_list += word
- else:
- cell_x_start = bbox_x_start + int(word_col[0] * cell_width)
- cell_x_end = bbox_x_start + int((word_col[-1] + 1) * cell_width)
- cell = (
- (cell_x_start, bbox_y_start),
- (cell_x_end, bbox_y_start),
- (cell_x_end, bbox_y_end),
- (cell_x_start, bbox_y_end),
- )
- word_box_list.append(cell)
- word_box_content_list.append("".join(word))
- if len(cn_col_list) != 0:
- if len(cn_width_list) != 0:
- avg_char_width = np.mean(cn_width_list)
- else:
- avg_char_width = (bbox_x_end - bbox_x_start) / len(rec_str)
- for center_idx in cn_col_list:
- center_x = (center_idx + 0.5) * cell_width
- cell_x_start = max(int(center_x - avg_char_width / 2), 0) + bbox_x_start
- cell_x_end = (
- min(int(center_x + avg_char_width / 2), bbox_x_end - bbox_x_start)
- + bbox_x_start
- )
- cell = (
- (cell_x_start, bbox_y_start),
- (cell_x_end, bbox_y_start),
- (cell_x_end, bbox_y_end),
- (cell_x_start, bbox_y_end),
- )
- word_box_list.append(cell)
- word_box_list = sort_boxes(word_box_list, y_thresh=12)
- return word_box_content_list, word_box_list
- def sort_boxes(boxes, y_thresh=10):
- box_centers = [np.mean(box, axis=0) for box in boxes]
- items = list(zip(boxes, box_centers))
- items.sort(key=lambda x: x[1][1])
- lines = []
- current_line = []
- last_y = None
- for box, center in items:
- if last_y is None or abs(center[1] - last_y) < y_thresh:
- current_line.append((box, center))
- else:
- lines.append(current_line)
- current_line = [(box, center)]
- last_y = center[1]
- if current_line:
- lines.append(current_line)
- final_box = []
- for line in lines:
- line = sorted(line, key=lambda x: x[1][0])
- final_box.extend(box for box, center in line)
- return final_box
|