zhouchangda 5 달 전
부모
커밋
df905a1bfa

+ 2 - 2
paddlex/inference/pipelines/layout_parsing/layout_objects.py

@@ -396,7 +396,7 @@ class LayoutBlock(object):
         self.seg_end_coordinate = float("-inf")
         self.width = bbox[2] - bbox[0]
         self.height = bbox[3] - bbox[1]
-        self.area = self.width * self.height
+        self.area = float(self.width) * float(self.height)
         self.num_of_lines = 1
         self.image = None
         self.index = None
@@ -827,7 +827,7 @@ class LayoutRegion(LayoutBlock):
                 caculate_euclidean_dist((block.bbox[2], block.bbox[1]), ref_point)
                 for block in blocks
             ]
-        self.euclidean_distance = min(block_distance)
+        self.euclidean_distance = min(block_distance) if len(block_distance) > 0 else 0
 
     def update_direction(self, direction=None):
         """

+ 12 - 6
paddlex/inference/pipelines/layout_parsing/pipeline_v2.py

@@ -423,9 +423,12 @@ class _LayoutParsingPipelineV2(BasePipeline):
                         else:
                             # the other matched ocr be appended to the overall ocr result
                             overall_ocr_res["dt_polys"].append(crop_img_dt_poly)
-                            overall_ocr_res["rec_boxes"] = np.vstack(
-                                (overall_ocr_res["rec_boxes"], crop_box)
-                            )
+                            if len(overall_ocr_res["rec_boxes"]) == 0:
+                                overall_ocr_res["rec_boxes"] = np.array([crop_box])
+                            else:
+                                overall_ocr_res["rec_boxes"] = np.vstack(
+                                    (overall_ocr_res["rec_boxes"], crop_box)
+                                )
                             overall_ocr_res["rec_polys"].append(crop_img_dt_poly)
                             overall_ocr_res["rec_scores"].append(crop_img_rec_score)
                             overall_ocr_res["rec_texts"].append(crop_img_rec_text)
@@ -460,9 +463,12 @@ class _LayoutParsingPipelineV2(BasePipeline):
                     else (self.general_ocr_pipeline.text_rec_score_thresh)
                 )
                 if crop_img_rec_score >= text_rec_score_thresh:
-                    overall_ocr_res["rec_boxes"] = np.vstack(
-                        (overall_ocr_res["rec_boxes"], crop_box)
-                    )
+                    if len(overall_ocr_res["rec_boxes"]) == 0:
+                        overall_ocr_res["rec_boxes"] = np.array([crop_box])
+                    else:
+                        overall_ocr_res["rec_boxes"] = np.vstack(
+                            (overall_ocr_res["rec_boxes"], crop_box)
+                        )
                     overall_ocr_res["rec_polys"].append(crop_img_dt_poly)
                     overall_ocr_res["rec_scores"].append(crop_img_rec_score)
                     overall_ocr_res["rec_texts"].append(crop_img_rec_text)

+ 2 - 4
paddlex/inference/pipelines/layout_parsing/utils.py

@@ -508,7 +508,7 @@ def shrink_supplement_region_bbox(
         (x2 - x2_prime) / image_width,
         (y2 - y2_prime) / image_height,
     ]
-    edge_distance_list_tmp = edge_distance_list[:]
+    edge_distance_list_tmp = deepcopy(edge_distance_list)
     min_distance = min(edge_distance_list)
     src_index = index_conversion_map[edge_distance_list.index(min_distance)]
     if len(block_idxes_set) == 0:
@@ -561,9 +561,7 @@ def shrink_supplement_region_bbox(
             supplement_region_bbox = calculate_minimum_enclosing_bbox(matched_bboxes)
             break
         else:
-            edge_distance_list_tmp = [
-                x for x in edge_distance_list_tmp if x != min_distance
-            ]
+            edge_distance_list_tmp.remove(min_distance)
             min_distance = min(edge_distance_list_tmp)
             src_index = index_conversion_map[edge_distance_list.index(min_distance)]
     return supplement_region_bbox, iner_block_idxes

+ 2 - 2
paddlex/inference/pipelines/ocr/result.py

@@ -207,10 +207,10 @@ def draw_box_txt_fine(
         np.ndarray: An image with the text drawn in the specified box.
     """
     box_height = int(
-        math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
+        math.sqrt(float(box[0][0] - box[3][0]) ** 2 + float(box[0][1] - box[3][1]) ** 2)
     )
     box_width = int(
-        math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
+        math.sqrt(float(box[0][0] - box[1][0]) ** 2 + float(box[0][1] - box[1][1]) ** 2)
     )
 
     if box_height > 2 * box_width and box_height > 30: