|
|
@@ -324,7 +324,7 @@ def remove_overlaps_low_confidence_blocks(combined_res_list, overlap_threshold=0
|
|
|
marked_indices.add(i) # 标记当前索引为已处理
|
|
|
return blocks_to_remove
|
|
|
|
|
|
-
|
|
|
+# @todo 这个方法以后需要重构
|
|
|
def get_res_list_from_layout_res(layout_res, iou_threshold=0.7, overlap_threshold=0.8, area_threshold=0.8):
|
|
|
"""Extract OCR, table and other regions from layout results."""
|
|
|
ocr_res_list = []
|
|
|
@@ -358,6 +358,24 @@ def get_res_list_from_layout_res(layout_res, iou_threshold=0.7, overlap_threshol
|
|
|
filtered_table_res_list = filter_nested_tables(
|
|
|
table_res_list, overlap_threshold, area_threshold)
|
|
|
|
|
|
+ for table_res in filtered_table_res_list:
|
|
|
+ table_res['bbox'] = [int(table_res['poly'][0]), int(table_res['poly'][1]), int(table_res['poly'][4]), int(table_res['poly'][5])]
|
|
|
+
|
|
|
+ filtered_table_res_list, table_need_remove = remove_overlaps_min_blocks(filtered_table_res_list)
|
|
|
+
|
|
|
+ for res in filtered_table_res_list:
|
|
|
+ # 将res的poly使用bbox重构
|
|
|
+ res['poly'] = [res['bbox'][0], res['bbox'][1], res['bbox'][2], res['bbox'][1],
|
|
|
+ res['bbox'][2], res['bbox'][3], res['bbox'][0], res['bbox'][3]]
|
|
|
+ # 删除res的bbox
|
|
|
+ del res['bbox']
|
|
|
+
|
|
|
+ if len(table_need_remove) > 0:
|
|
|
+ for res in table_need_remove:
|
|
|
+ del res['bbox']
|
|
|
+ if res in layout_res:
|
|
|
+ layout_res.remove(res)
|
|
|
+
|
|
|
# Remove filtered out tables from layout_res
|
|
|
if len(filtered_table_res_list) < len(table_res_list):
|
|
|
kept_tables = set(id(table) for table in filtered_table_res_list)
|
|
|
@@ -381,7 +399,8 @@ def get_res_list_from_layout_res(layout_res, iou_threshold=0.7, overlap_threshol
|
|
|
if len(need_remove) > 0:
|
|
|
for res in need_remove:
|
|
|
del res['bbox']
|
|
|
- layout_res.remove(res)
|
|
|
+ if res in layout_res:
|
|
|
+ layout_res.remove(res)
|
|
|
|
|
|
# 检测大block内部是否包含多个小block, 合并ocr和table列表进行检测
|
|
|
combined_res_list = ocr_res_list + filtered_table_res_list
|