Browse Source

feat: refactor table image cropping and processing for improved clarity and functionality

myhloli 2 months ago
parent
commit
bae254fa72

+ 11 - 10
mineru/backend/pipeline/batch_analyze.py

@@ -93,18 +93,19 @@ class BatchAnalyze:
                                           })
 
             for table_res in table_res_list:
-                # table_img, _ = crop_img(table_res, pil_img)
-                # bbox = (241, 208, 1475, 2019)
-                scale = 10/3
-                # scale = 1
-                crop_xmin, crop_ymin = int(table_res['poly'][0]), int(table_res['poly'][1])
-                crop_xmax, crop_ymax = int(table_res['poly'][4]), int(table_res['poly'][5])
-                bbox = (int(crop_xmin/scale), int(crop_ymin/scale), int(crop_xmax/scale), int(crop_ymax/scale))
-                table_img = get_crop_np_img(bbox, np_img, scale=scale)
+                def get_crop_table_img(scale):
+                    crop_xmin, crop_ymin = int(table_res['poly'][0]), int(table_res['poly'][1])
+                    crop_xmax, crop_ymax = int(table_res['poly'][4]), int(table_res['poly'][5])
+                    bbox = (int(crop_xmin / scale), int(crop_ymin / scale), int(crop_xmax / scale), int(crop_ymax / scale))
+                    return get_crop_np_img(bbox, np_img, scale=scale)
+
+                wireless_table_img = get_crop_table_img(scale = 1)
+                wired_table_img = get_crop_table_img(scale = 10/3)
 
                 table_res_list_all_page.append({'table_res':table_res,
                                                 'lang':_lang,
-                                                'table_img':table_img,
+                                                'table_img':wireless_table_img,
+                                                'wired_table_img':wired_table_img,
                                               })
 
         # 表格识别 table recognition
@@ -207,7 +208,7 @@ class BatchAnalyze:
                         lang=table_res_dict["lang"],
                     )
                     table_res_dict["table_res"]["html"] = wired_table_model.predict(
-                        table_res_dict["table_img"],
+                        table_res_dict["wired_table_img"],
                         table_res_dict["ocr_result"],
                         table_res_dict["table_res"].get("html", None)
                     )

+ 8 - 0
mineru/model/ori_cls/paddle_ori_cls.py

@@ -260,11 +260,19 @@ class PaddleOrientationClsModel:
                                 np.asarray(img_info["table_img"]),
                                 cv2.ROTATE_90_CLOCKWISE,
                             )
+                            img_info["wired_table_img"] = cv2.rotate(
+                                np.asarray(img_info["wired_table_img"]),
+                                cv2.ROTATE_90_CLOCKWISE,
+                            )
                         elif label == "90":
                             img_info["table_img"] = cv2.rotate(
                                 np.asarray(img_info["table_img"]),
                                 cv2.ROTATE_90_COUNTERCLOCKWISE,
                             )
+                            img_info["wired_table_img"] = cv2.rotate(
+                                np.asarray(img_info["wired_table_img"]),
+                                cv2.ROTATE_90_COUNTERCLOCKWISE,
+                            )
                         else:
                             # 180度和0度不做处理
                             pass

+ 3 - 2
mineru/model/table/rec/unet_table/main.py

@@ -310,8 +310,9 @@ class UnetTableModel:
 
             # 判断是否使用无线表格模型的结果
             if (
-                (int(wireless_len * 0.04) <= wired_len <= int(wireless_len * 0.62)+1 and wireless_blank_count <= wired_blank_count+50)
-                or int(wireless_len * 0.04) <= wired_len <= int(wireless_len * 0.55)+1 # 有线模型检测到的单元格数太少(低于无线模型的55%)
+                # (int(wireless_len * 0.04) <= wired_len <= int(wireless_len * 0.62)+1 and wireless_blank_count <= wired_blank_count+50)
+                # or int(wireless_len * 0.04) <= wired_len <= int(wireless_len * 0.55)+1 # 有线模型检测到的单元格数太少(低于无线模型的55%)
+                (int(wireless_len * 0.04) <= (wired_len-wired_blank_count) <= int((wireless_len-wireless_blank_count) * 0.76) and wired_len <= int(wireless_len * 0.5)) # 非空表数量有线表明显少于无线表模型60%
                 or (0 <= gap_of_len <= 5 and wired_len <= round(wireless_len * 0.75))  # 两者相差不大但有线模型结果较少
                 or (gap_of_len == 0 and wired_len <= 4)  # 单元格数量完全相等且总量小于等于4
                 or (wired_text_count <= wireless_text_count * 0.6 and  wireless_text_count >=10) # 有线模型填入的文字明显少于无线模型